# Load essential libraries
library(tidyverse)      # for data wrangling and plotting
library(data.table)     # for fast data import
library(weathermetrics) # for humidity and temperature calculations
library(openxlsx2)      # for Excel output
library(zoo)            # for date utilities
library(heatwaveR)      # for extreme event detection

1 Breeding Season & Colony Information

# Study species and colony details
colony_name <- "Big Green Island"
colony_lat <- -40.1841
colony_lon <- 147.9802

# Define breeding season
monitoring_months <- c("12", "1", "2", "3")             # December to March
start_month <- 9                                        # Used to calculate 'season' by year crossing - actual start of the breeding season
start_season <- 1997                                    # First monitored breeding season
last_season <- 2024                                     # Last monitored breeding season

2 Import and Process BOM Weather Data

2.1 Synoptic Data

Hourly Observations (1 to 24 observations per day)

# Import and summarise BOM synoptic data (hourly observations)
bom_syno <- fread("BOM/Synoptic/HC06D_Data_099005_9999999910800646.txt") %>%
            dplyr::select(year = 'Year', 
                          month = 'Month', 
                          day = 'Day', 
                          hour = 'Hour',
                          air_temperature = 'Air temperature in Degrees C',
                          dewpoint_temperature = 'Dew point temperature in Degrees C',
                          wetbulb_temperature = 'Wet bulb temperature in Degrees C',
                          wind_speed = 'Wind speed measured in km/h',
                          sea_level_pressure = 'Mean sea level pressure in hPa',
                          ) %>%
            dplyr::mutate(date = make_date(year, month, day),
                          season = if_else(month(date) >= start_month, year(date), year(date) - 1)
                          ) %>%
            dplyr::mutate(across(c(air_temperature, dewpoint_temperature, wetbulb_temperature,
                                   wind_speed, sea_level_pressure), as.numeric)) %>%
            dplyr::group_by(season, date, year, month, day) %>%
            dplyr::summarise(air_temperature     = if_else(any(!is.na(air_temperature)),     mean(air_temperature, na.rm = TRUE), NA_real_),
                             dewpoint_temperature= if_else(any(!is.na(dewpoint_temperature)),mean(dewpoint_temperature, na.rm = TRUE), NA_real_),
                             wetbulb_temperature = if_else(any(!is.na(wetbulb_temperature)), max(wetbulb_temperature, na.rm = TRUE), NA_real_),
                             wind_speed_mean     = if_else(any(!is.na(wind_speed)),          mean(wind_speed, na.rm = TRUE), NA_real_),
                             wind_speed_max      = if_else(any(!is.na(wind_speed)),          max(wind_speed, na.rm = TRUE), NA_real_),
                             sea_level_pressure  = if_else(any(!is.na(sea_level_pressure)),  mean(sea_level_pressure, na.rm = TRUE), NA_real_),
                             .groups = "drop"
                             )

2.2 BOM Daily data

# Import daily observations
bom_daily <- fread("BOM/Daily/DC02D_Data_099005_9999999910804711.txt") %>%
             dplyr::select(year = 'Year', 
                           month = 'Month', 
                           day = 'Day',
                           air_temperature_min = 'Minimum temperature in 24 hours before 9am (local time) in Degrees C',
                           air_temperature_max = 'Maximum temperature in 24 hours after 9am (local time) in Degrees C',
                           precipitation = 'Precipitation in the 24 hours before 9am (local time) in mm',
                           gust_speed = 'Speed of maximum wind gust in km/h',
                           gust_direction = 'Direction of maximum wind gust in degrees',
                           solar_exposure = 'Total daily global solar exposure - derived from satellite data in kWh.m-2'
                           ) %>%
              dplyr::mutate(date = make_date(year, month, day),
                            season = if_else(month(date) >= start_month, year(date), year(date) - 1),
                            solar_radiation = solar_exposure * 1000 / 24  # Convert kWh/m² to W/m²
                            ) %>%
              dplyr::select(season, date, year, month, day, everything())

# Merge with synoptic data
bom_daily <- merge(bom_daily, bom_syno,
                   by = c("season", "date", "year", "month", "day"),
                   all = TRUE
                   )

2.3 Finalise Dataset and Generate Summary

# Ensure date is in Date format and remove duplicate rows (if any)
bom_daily <- bom_daily %>%
             dplyr::mutate(date = as.Date(date)) %>%
             dplyr::distinct(date, .keep_all = TRUE) %>%
             dplyr::select(season, date, year, month, day,
                           precipitation
                           ) %>%
             dplyr::arrange(date) %>%
             as.data.frame()


# Note: We are only interested in precipitation-related indices, as other weather variables are not considered to affect breeding success in burrowing seabirds. The burrows provide a thermal refuge, protecting the birds from most weather elements, including both extreme heat and cold conditions. Additionally, given the topography of these islands, their location of breeding habitats, the population is unlikely to be affected by wave conditions.


# Summary for number of missing values per weather variable, grouped by season to identify starting point of climatology baseline
bom_daily %>%
  group_by(season) %>%
  summarise(
    across(
      .cols = -c(date, year, month, day),
      .fns = ~ sum(is.na(.))
    ),
    .groups = "drop"
  ) %>%
  DT::datatable(
    options = list(
      pageLength = 20,
      scrollX = TRUE)
    )

The climatology period used to calculate percentile thresholds will be based on the full extent of our available dataset. For example, for extreme weather indices that rely on precipitation, the climatology will be calculated from the 1970 season through to the final monitoring season for the focal population, which is the 2024 season. Only seasons with no more than approximately 10% missing daily data (i.e., at least around 330 days of data per season) will be included to ensure reliability.

By applying consistent criteria for data completeness, we minimise bias and ensure that the percentile thresholds truly reflect the local climatological context. Furthermore, using an extended climatology period enhances the robustness of extreme event detection, providing a solid foundation for comparative analysis across years and for assessing long-term trends.

3 Calculate Extreme Weather Indices

3.1 Extreme Precipitation Indices

3.1.1 Wet, Heavy, and Very Heavy Rain Days

Wet day = daily precipitation ≥ 1 mm Heavy rain = daily precipitation ≥ 10 mm Very heavy rain = daily precipitation ≥ 30 mm

# Categorise rainfall days based on thresholds (leave as NA if no data)
bom_daily <- bom_daily %>%
             dplyr::mutate(wet_day = if_else(is.na(precipitation), NA_real_,
                                             if_else(precipitation >= 1, precipitation, 0)),
                           heavy_rain_day = if_else(is.na(precipitation), NA_real_,
                                                    if_else(precipitation >= 10, precipitation, 0)),
                           very_heavy_rain_day = if_else(is.na(precipitation), NA_real_,
                                                         if_else(precipitation >= 30, precipitation, 0))
                           )


# Plot rainfall types during monitoring period
ggplot(bom_daily %>% dplyr::filter(month %in% monitoring_months, season >= start_season),
                                   aes(x = as.factor(season))
       ) +
       geom_bar(aes(y = wet_day,             fill = "Wet day"),             stat = "identity") +
       geom_bar(aes(y = heavy_rain_day,      fill = "Heavy rain day"),      stat = "identity") +
       geom_bar(aes(y = very_heavy_rain_day, fill = "Very heavy rain day"), stat = "identity") +
       scale_fill_manual(values = c("Wet day" = "#96C9F4", 
                                    "Heavy rain day" = "#3FA2F6", 
                                    "Very heavy rain day" = "#0F67B1")
                         ) +
       labs(x = "Season",
            y = "Precipitation (mm)",
            title = "Extreme precipitation days during monitoring period"
            ) +
       theme_classic() +
       theme(
         axis.text.x = element_text(angle = 45, hjust = 1),
         legend.position = "bottom",
         legend.title = element_blank()
         )

3.1.2 Extremely wet day precipitation

When daily precipitation > 99th percentile

# Detect extremely wet days
ewdp_climatology <- ts2clm(data = bom_daily, 
                            x = date,                          
                            y = precipitation,  
                            pctile = 99,                    
                            climatologyPeriod = c("1970-05-01", "2025-04-30"), 
                            windowHalfWidth = 5
                           )    


ewdp <- detect_event(data = ewdp_climatology,
                     x = date,
                     y = precipitation,
                     minDuration = 1,
                     maxGap = 0,
                     coldSpells = FALSE
                     )


# Add a new column to daily weather data
bom_daily <- bom_daily %>%
             dplyr::mutate(ewdp = if_else(date %in% ewdp$climatology$date[ewdp$climatology$event],
                                          precipitation,
                                          if_else(!is.na(precipitation), 0, NA_real_)
                                          )
                           )


# Plot above/below threshold values during monitoring period
ggplot(bom_daily %>% filter(month %in% monitoring_months, season >= start_season), 
                     aes(x = as.factor(season), y = na_if(ewdp, 0))) +
                     geom_point(alpha = 0.6, color = "#003161") +  
                     labs(
                          x = "Season",
                          y = "Precipitation (mm)",
                          title = "Extremely wet day precipitation during monitoring period"
                          ) +
                     theme_classic() +
                     scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
                     theme(axis.text.x = element_text(angle = 45, hjust = 1))

3.1.3 Very wet day precipitation

When daily precipitation > 95th percentile

# Detect very wet days
vwdp_climatology <- ts2clm(data = bom_daily, 
                           x = date,                          
                           y = precipitation,  
                           pctile = 95,                    
                           climatologyPeriod = c("1970-05-01", "2025-04-30"), 
                           windowHalfWidth = 5
                           ) 


vwdp <- detect_event(data = vwdp_climatology, 
                     x = date,                               
                     y = precipitation,                  
                     minDuration = 1,                        
                     maxGap = 0,                             
                     coldSpells = FALSE
                     ) 


# Add a new column to daily weather data
bom_daily <- bom_daily %>%
             dplyr::mutate(
                           vwdp = ifelse(
                                      vwdp$climatology$event[match(date,
                                                                   vwdp$climatology$date)] == TRUE,
                                      vwdp$climatology$precipitation[match(date,
                                                                                vwdp$climatology$date)], 
                                      ifelse(!is.na(precipitation), 0, NA)
                                      )
                           )


# Plot above/below threshold values during monitoring period
ggplot(bom_daily %>% filter(month %in% monitoring_months, season >= start_season), 
                     aes(x = as.factor(season), y = na_if(vwdp, 0))) +
                     geom_point(alpha = 0.6, color = "#003161") +  
                     labs(
                          x = "Season",
                          y = "Precipitation (mm)",
                          title = "Very wet day precipitation during monitoring period"
                          ) +
                     theme_classic() +
                     scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
                     theme(axis.text.x = element_text(angle = 45, hjust = 1))

4 Save daily weather data

# Assign colony name to each record
bom_daily <- bom_daily %>%
             dplyr::mutate(colony_name = "Big Green Island") %>%
             dplyr::select(colony_name, 
                           season, date,
                           wet_day, heavy_rain_day, very_heavy_rain_day, ewdp, vwdp
                           )


# Save the cleaned and annotated data to an Excel file
write_xlsx(bom_daily,
           file = "Breeding_colony_ewes/Big_Green_ewes.xlsx",
           col_names = TRUE,
           row_names = FALSE
           )


# View dataset summary
summary(bom_daily)
##  colony_name            season          date               wet_day      
##  Length:20210       Min.   :1969   Min.   :1970-01-01   Min.   :  0.00  
##  Class :character   1st Qu.:1983   1st Qu.:1983-11-01   1st Qu.:  0.00  
##  Mode  :character   Median :1996   Median :1997-08-31   Median :  0.00  
##                     Mean   :1997   Mean   :1997-08-31   Mean   :  1.87  
##                     3rd Qu.:2010   3rd Qu.:2011-07-01   3rd Qu.:  1.20  
##                     Max.   :2024   Max.   :2025-05-01   Max.   :119.40  
##                                                         NA's   :348     
##  heavy_rain_day    very_heavy_rain_day      ewdp               vwdp        
##  Min.   :  0.000   Min.   :  0.000     Min.   :  0.0000   Min.   :  0.000  
##  1st Qu.:  0.000   1st Qu.:  0.000     1st Qu.:  0.0000   1st Qu.:  0.000  
##  Median :  0.000   Median :  0.000     Median :  0.0000   Median :  0.000  
##  Mean   :  1.033   Mean   :  0.269     Mean   :  0.3993   Mean   :  1.006  
##  3rd Qu.:  0.000   3rd Qu.:  0.000     3rd Qu.:  0.0000   3rd Qu.:  0.000  
##  Max.   :119.400   Max.   :119.400     Max.   :119.4000   Max.   :119.400  
##  NA's   :348       NA's   :348         NA's   :348        NA's   :348