# Load essential libraries
library(tidyverse) # for data wrangling and plotting
library(data.table) # for fast data import
library(weathermetrics) # for humidity and temperature calculations
library(openxlsx2) # for Excel output
library(zoo) # for date utilities
library(heatwaveR) # for extreme event detection
# Study species and colony details
colony_name <- "Big Green Island"
colony_lat <- -40.1841
colony_lon <- 147.9802
# Define breeding season
monitoring_months <- c("12", "1", "2", "3") # December to March
start_month <- 9 # Used to calculate 'season' by year crossing - actual start of the breeding season
start_season <- 1997 # First monitored breeding season
last_season <- 2024 # Last monitored breeding season
Hourly Observations (1 to 24 observations per day)
# Import and summarise BOM synoptic data (hourly observations)
bom_syno <- fread("BOM/Synoptic/HC06D_Data_099005_9999999910800646.txt") %>%
dplyr::select(year = 'Year',
month = 'Month',
day = 'Day',
hour = 'Hour',
air_temperature = 'Air temperature in Degrees C',
dewpoint_temperature = 'Dew point temperature in Degrees C',
wetbulb_temperature = 'Wet bulb temperature in Degrees C',
wind_speed = 'Wind speed measured in km/h',
sea_level_pressure = 'Mean sea level pressure in hPa',
) %>%
dplyr::mutate(date = make_date(year, month, day),
season = if_else(month(date) >= start_month, year(date), year(date) - 1)
) %>%
dplyr::mutate(across(c(air_temperature, dewpoint_temperature, wetbulb_temperature,
wind_speed, sea_level_pressure), as.numeric)) %>%
dplyr::group_by(season, date, year, month, day) %>%
dplyr::summarise(air_temperature = if_else(any(!is.na(air_temperature)), mean(air_temperature, na.rm = TRUE), NA_real_),
dewpoint_temperature= if_else(any(!is.na(dewpoint_temperature)),mean(dewpoint_temperature, na.rm = TRUE), NA_real_),
wetbulb_temperature = if_else(any(!is.na(wetbulb_temperature)), max(wetbulb_temperature, na.rm = TRUE), NA_real_),
wind_speed_mean = if_else(any(!is.na(wind_speed)), mean(wind_speed, na.rm = TRUE), NA_real_),
wind_speed_max = if_else(any(!is.na(wind_speed)), max(wind_speed, na.rm = TRUE), NA_real_),
sea_level_pressure = if_else(any(!is.na(sea_level_pressure)), mean(sea_level_pressure, na.rm = TRUE), NA_real_),
.groups = "drop"
)
# Import daily observations
bom_daily <- fread("BOM/Daily/DC02D_Data_099005_9999999910804711.txt") %>%
dplyr::select(year = 'Year',
month = 'Month',
day = 'Day',
air_temperature_min = 'Minimum temperature in 24 hours before 9am (local time) in Degrees C',
air_temperature_max = 'Maximum temperature in 24 hours after 9am (local time) in Degrees C',
precipitation = 'Precipitation in the 24 hours before 9am (local time) in mm',
gust_speed = 'Speed of maximum wind gust in km/h',
gust_direction = 'Direction of maximum wind gust in degrees',
solar_exposure = 'Total daily global solar exposure - derived from satellite data in kWh.m-2'
) %>%
dplyr::mutate(date = make_date(year, month, day),
season = if_else(month(date) >= start_month, year(date), year(date) - 1),
solar_radiation = solar_exposure * 1000 / 24 # Convert kWh/m² to W/m²
) %>%
dplyr::select(season, date, year, month, day, everything())
# Merge with synoptic data
bom_daily <- merge(bom_daily, bom_syno,
by = c("season", "date", "year", "month", "day"),
all = TRUE
)
# Ensure date is in Date format and remove duplicate rows (if any)
bom_daily <- bom_daily %>%
dplyr::mutate(date = as.Date(date)) %>%
dplyr::distinct(date, .keep_all = TRUE) %>%
dplyr::select(season, date, year, month, day,
precipitation
) %>%
dplyr::arrange(date) %>%
as.data.frame()
# Note: We are only interested in precipitation-related indices, as other weather variables are not considered to affect breeding success in burrowing seabirds. The burrows provide a thermal refuge, protecting the birds from most weather elements, including both extreme heat and cold conditions. Additionally, given the topography of these islands, their location of breeding habitats, the population is unlikely to be affected by wave conditions.
# Summary for number of missing values per weather variable, grouped by season to identify starting point of climatology baseline
bom_daily %>%
group_by(season) %>%
summarise(
across(
.cols = -c(date, year, month, day),
.fns = ~ sum(is.na(.))
),
.groups = "drop"
) %>%
DT::datatable(
options = list(
pageLength = 20,
scrollX = TRUE)
)
The climatology period used to calculate percentile thresholds will be based on the full extent of our available dataset. For example, for extreme weather indices that rely on precipitation, the climatology will be calculated from the 1970 season through to the final monitoring season for the focal population, which is the 2024 season. Only seasons with no more than approximately 10% missing daily data (i.e., at least around 330 days of data per season) will be included to ensure reliability.
By applying consistent criteria for data completeness, we minimise bias and ensure that the percentile thresholds truly reflect the local climatological context. Furthermore, using an extended climatology period enhances the robustness of extreme event detection, providing a solid foundation for comparative analysis across years and for assessing long-term trends.
Wet day = daily precipitation ≥ 1 mm Heavy rain = daily precipitation ≥ 10 mm Very heavy rain = daily precipitation ≥ 30 mm
# Categorise rainfall days based on thresholds (leave as NA if no data)
bom_daily <- bom_daily %>%
dplyr::mutate(wet_day = if_else(is.na(precipitation), NA_real_,
if_else(precipitation >= 1, precipitation, 0)),
heavy_rain_day = if_else(is.na(precipitation), NA_real_,
if_else(precipitation >= 10, precipitation, 0)),
very_heavy_rain_day = if_else(is.na(precipitation), NA_real_,
if_else(precipitation >= 30, precipitation, 0))
)
# Plot rainfall types during monitoring period
ggplot(bom_daily %>% dplyr::filter(month %in% monitoring_months, season >= start_season),
aes(x = as.factor(season))
) +
geom_bar(aes(y = wet_day, fill = "Wet day"), stat = "identity") +
geom_bar(aes(y = heavy_rain_day, fill = "Heavy rain day"), stat = "identity") +
geom_bar(aes(y = very_heavy_rain_day, fill = "Very heavy rain day"), stat = "identity") +
scale_fill_manual(values = c("Wet day" = "#96C9F4",
"Heavy rain day" = "#3FA2F6",
"Very heavy rain day" = "#0F67B1")
) +
labs(x = "Season",
y = "Precipitation (mm)",
title = "Extreme precipitation days during monitoring period"
) +
theme_classic() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
legend.title = element_blank()
)
When daily precipitation > 99th percentile
# Detect extremely wet days
ewdp_climatology <- ts2clm(data = bom_daily,
x = date,
y = precipitation,
pctile = 99,
climatologyPeriod = c("1970-05-01", "2025-04-30"),
windowHalfWidth = 5
)
ewdp <- detect_event(data = ewdp_climatology,
x = date,
y = precipitation,
minDuration = 1,
maxGap = 0,
coldSpells = FALSE
)
# Add a new column to daily weather data
bom_daily <- bom_daily %>%
dplyr::mutate(ewdp = if_else(date %in% ewdp$climatology$date[ewdp$climatology$event],
precipitation,
if_else(!is.na(precipitation), 0, NA_real_)
)
)
# Plot above/below threshold values during monitoring period
ggplot(bom_daily %>% filter(month %in% monitoring_months, season >= start_season),
aes(x = as.factor(season), y = na_if(ewdp, 0))) +
geom_point(alpha = 0.6, color = "#003161") +
labs(
x = "Season",
y = "Precipitation (mm)",
title = "Extremely wet day precipitation during monitoring period"
) +
theme_classic() +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
When daily precipitation > 95th percentile
# Detect very wet days
vwdp_climatology <- ts2clm(data = bom_daily,
x = date,
y = precipitation,
pctile = 95,
climatologyPeriod = c("1970-05-01", "2025-04-30"),
windowHalfWidth = 5
)
vwdp <- detect_event(data = vwdp_climatology,
x = date,
y = precipitation,
minDuration = 1,
maxGap = 0,
coldSpells = FALSE
)
# Add a new column to daily weather data
bom_daily <- bom_daily %>%
dplyr::mutate(
vwdp = ifelse(
vwdp$climatology$event[match(date,
vwdp$climatology$date)] == TRUE,
vwdp$climatology$precipitation[match(date,
vwdp$climatology$date)],
ifelse(!is.na(precipitation), 0, NA)
)
)
# Plot above/below threshold values during monitoring period
ggplot(bom_daily %>% filter(month %in% monitoring_months, season >= start_season),
aes(x = as.factor(season), y = na_if(vwdp, 0))) +
geom_point(alpha = 0.6, color = "#003161") +
labs(
x = "Season",
y = "Precipitation (mm)",
title = "Very wet day precipitation during monitoring period"
) +
theme_classic() +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Assign colony name to each record
bom_daily <- bom_daily %>%
dplyr::mutate(colony_name = "Big Green Island") %>%
dplyr::select(colony_name,
season, date,
wet_day, heavy_rain_day, very_heavy_rain_day, ewdp, vwdp
)
# Save the cleaned and annotated data to an Excel file
write_xlsx(bom_daily,
file = "Breeding_colony_ewes/Big_Green_ewes.xlsx",
col_names = TRUE,
row_names = FALSE
)
# View dataset summary
summary(bom_daily)
## colony_name season date wet_day
## Length:20210 Min. :1969 Min. :1970-01-01 Min. : 0.00
## Class :character 1st Qu.:1983 1st Qu.:1983-11-01 1st Qu.: 0.00
## Mode :character Median :1996 Median :1997-08-31 Median : 0.00
## Mean :1997 Mean :1997-08-31 Mean : 1.87
## 3rd Qu.:2010 3rd Qu.:2011-07-01 3rd Qu.: 1.20
## Max. :2024 Max. :2025-05-01 Max. :119.40
## NA's :348
## heavy_rain_day very_heavy_rain_day ewdp vwdp
## Min. : 0.000 Min. : 0.000 Min. : 0.0000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 0.0000 Median : 0.000
## Mean : 1.033 Mean : 0.269 Mean : 0.3993 Mean : 1.006
## 3rd Qu.: 0.000 3rd Qu.: 0.000 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :119.400 Max. :119.400 Max. :119.4000 Max. :119.400
## NA's :348 NA's :348 NA's :348 NA's :348