# Load libraries for data processing, modelling, and visualisation
library(tidyverse)
library(openxlsx2)
library(MASS)
library(corrplot)
library(DT)
library(climwin)
library(jtools)
library(DHARMa)
library(lavaan)
library(DiagrammeR)
library(lmtest)
library(glmmTMB)
library(scales)
library(performance)

1 Load and Process Data

# Load daily extreme weather event (EWE) data
extreme_weather <- wb_to_df("Breeding_colony_ewes/Albatross_ewes.xlsx") %>%
                   dplyr::mutate(across(4:20, ~ ifelse(is.na(.), 0, .)))  # Replace NA with 0 for analysis


# Create binary version: 1 = event occurred, 0 = no event, NA = missing
extreme_weather_binary <- extreme_weather %>%
                          dplyr::mutate(across(4:20, ~ ifelse(!is.na(.) & . != 0, 1, ifelse(is.na(.), NA, 0))))


# Note:
# Missing values in extreme weather data are replaced with zero. This is critical because the slidingwin method ("method1" and "method2") internally calculates means when NA values are present, which is not suitable when assessing extreme values. We are specifically interested in whether an extreme event occurred, not in average conditions.
# A small number of missing values are present in the dataset, and replacing them with zero ensures consistency without introducing bias in this context.


# Load breeding data
breeding_data <- wb_to_df("Breeding_data/SHAL.xlsx", sheet = "Albatross_Island") %>%
                 dplyr::filter(!is.na(bs))  # Remove seasons without breeding success data


# Record sample size
sample_size <- nrow(breeding_data)


# Assess normality of breeding success
# If p > 0.05, the data does not significantly deviate from normality.
shapiro.test(breeding_data$bs)
## 
##  Shapiro-Wilk normality test
## 
## data:  breeding_data$bs
## W = 0.97617, p-value = 0.8002
# Histogram with density curve
hist(breeding_data$bs,
     main = "Histogram of Breeding success",
     xlab = "Breeding success",
     col = "#a6d6fa",
     border = "white",
     prob = TRUE  
)

# Overlay kernel density estimate
lines(density(breeding_data$bs, na.rm = TRUE), col = "#0D92F4", lwd = 2)

# Q-Q plot
ggplot(breeding_data, aes(sample = bs)) +
  stat_qq() +
  stat_qq_line(colour = "red") +
  labs(title = "Q-Q Plot of Breeding Success",
       x = "Theoretical Quantiles",
       y = "Sample Quantiles") +
  theme_classic()

The Shapiro–Wilk test did not indicate a significant deviation from normality (W = 0.97617, p = 0.8002); therefore, we fail to reject the null hypothesis that the data are normally distributed.

Furthermore, visual assessments, including the histogram and Q–Q plot, also support the assumption that the data approximate a normal distribution.

2 Sliding window analysis

2.1 Actual above threshold values

# Run the sliding window analysis using actual (non-binary) values
output1 <- slidingwin(xvar = list(warm_day               = extreme_weather$warm_day,
                                  warm_night             = extreme_weather$warm_night,
                                  heatwave               = extreme_weather$heatwave,
                                  cool_day               = extreme_weather$cool_day,
                                  cool_night             = extreme_weather$cool_night,
                                  coldwave               = extreme_weather$coldwave,
                                  wet_day                = extreme_weather$wet_day,
                                  heavy_rain_day         = extreme_weather$heavy_rain_day,
                                  very_heavy_rain_day    = extreme_weather$very_heavy_rain_day,
                                  ewdp                   = extreme_weather$ewdp,
                                  vwdp                   = extreme_weather$vwdp,
                                  extreme_wind_day       = extreme_weather$extreme_wind_day,
                                  extreme_wbt_day        = extreme_weather$extreme_wbt_day,
                                  extreme_wbgt_day       = extreme_weather$extreme_wbgt_day,
                                  extreme_at_day         = extreme_weather$extreme_at_day,
                                  extreme_wind_chill_day = extreme_weather$extreme_wind_chill_day),
                      cdate     = extreme_weather$date,                       # Climate date
                      bdate     = breeding_data$date,                         # Biological event date
                      baseline  = lm(bs ~ 1, 
                                     data = breeding_data),                   # Baseline model
                      cohort    = breeding_data$season,                       # Group by season
                      cinterval = "day",                                      # Daily resolution
                      range     = c(182, 0),                                  # Check windows from 1 October to 1 April
                      refday    = c(01, 04),                                  # Reference date: 1 April
                      type      = "absolute",                                 # Absolute window type
                      stat      = "sum",                                      # Sum values within each window
                      func      = "lin"                                       # For linear relationship
                      )

2.2 Binary above threshold values

# Run the sliding window analysis using binary event indicators
output2 <- slidingwin(xvar = list(warm_day_bi               = extreme_weather_binary$warm_day,
                                  warm_night_bi             = extreme_weather_binary$warm_night,
                                  heatwave_bi               = extreme_weather_binary$heatwave,
                                  cool_day_bi               = extreme_weather_binary$cool_day,
                                  cool_night_bi             = extreme_weather_binary$cool_night,
                                  coldwave_bi               = extreme_weather_binary$coldwave,
                                  wet_day_bi                = extreme_weather_binary$wet_day,
                                  heavy_rain_day_bi         = extreme_weather_binary$heavy_rain_day,
                                  very_heavy_rain_day_bi    = extreme_weather_binary$very_heavy_rain_day,
                                  ewdp_bi                   = extreme_weather_binary$ewdp,
                                  vwdp_bi                   = extreme_weather_binary$vwdp,
                                  extreme_wind_day_bi       = extreme_weather_binary$extreme_wind_day,
                                  extreme_wbt_day_bi        = extreme_weather_binary$extreme_wbt_day,
                                  extreme_wbgt_day_bi       = extreme_weather_binary$extreme_wbgt_day,
                                  extreme_at_day_bi         = extreme_weather_binary$extreme_at_day,
                                  extreme_wind_chill_day_bi = extreme_weather_binary$extreme_wind_chill_day),
                      cdate     = extreme_weather_binary$date,                # Climate date
                      bdate     = breeding_data$date,                         # Biological event date
                      baseline  = lm(bs ~ 1, 
                                     data = breeding_data),                   # Baseline model
                      cohort    = breeding_data$season,                       # Group by season
                      cinterval = "day",                                      # Daily resolution
                      range     = c(182, 0),                                  # Check windows from 1 October to 1 April
                      refday    = c(01, 04),                                  # Reference date: 1 April
                      type      = "absolute",                                 # Absolute window type
                      stat      = "sum",                                      # Sum values within each window
                      func      = "lin"                                       # For linear relationship
                      )

2.3 Merge the results

# Combine output from actual and binary sliding window analyses
output <- merge_results(output1, output2)


# View merged model combinations with calculated window duration
datatable(output$combos %>% 
          dplyr::mutate(WindowDuration = WindowOpen - WindowClose + 1),
          options = list(pageLength = 10, orderClasses = TRUE))

2.4 Check best model for each variable

Before running the randomisation process, we need to identify the best-performing model for each extreme weather variable. This ensures that we are testing the most likely biologically relevant window against random expectation.

What we are doing here: For each weather variable (e.g., heavy rain, wet days), we extract the model with:

The lowest AIC value, and

A window duration longer than 14 days, to focus on ecologically meaningful timeframes.

These best models represent the strongest climate–breeding success relationships, and will be used for the randomisation test to assess whether the relationship is likely to have occurred by chance.

2.4.1 Cool day

# Summary of the best model
summary(output[[20]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13998 -0.03867 -0.00180  0.04508  0.13995 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.35496    0.02192  16.196 4.54e-14 ***
## climate      0.03746    0.01013   3.697  0.00119 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07147 on 23 degrees of freedom
## Multiple R-squared:  0.3728, Adjusted R-squared:  0.3455 
## F-statistic: 13.67 on 1 and 23 DF,  p-value: 0.001189
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[20]]$Dataset)
## $`Median Window Open`
## [1] 125
## 
## $`Median Window Close`
## [1] 45
# Randomisation test to assess if the detected signal is likely by chance
cool_day_randwin <- randwin(repeats   = 10,
                            window    = "sliding",
                            xvar      = list(cool_day_bi = extreme_weather_binary$cool_day),
                            cdate     = extreme_weather_binary$date,
                            bdate     = breeding_data$date,
                            baseline  = lm(bs ~ 1,
                                           data = breeding_data),
                            cohort    = breeding_data$season,
                            cinterval = "day",
                            range     = c(182, 0),
                            refday    = c(01, 04),
                            type      = "absolute",
                            stat      = c("sum"),
                            func      = c("lin")
                            )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[20]]$Dataset,
                datasetrand = cool_day_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.7500067
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[20]]$Dataset,
                 datasetrand    = cool_day_randwin[[1]],
                 bestmodel      = output[[20]]$BestModel,
                 bestmodeldata  = output[[20]]$BestModelData,
                 arrow          = TRUE
                 )

2.4.2 EWDP***

# Summary of the best model 
summary(output[[26]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13786 -0.04715  0.01296  0.05097  0.11407 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.45876    0.01595  28.761  < 2e-16 ***
## climate     -0.10592    0.02302  -4.601 0.000126 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06512 on 23 degrees of freedom
## Multiple R-squared:  0.4792, Adjusted R-squared:  0.4566 
## F-statistic: 21.17 on 1 and 23 DF,  p-value: 0.0001259
# Calculate the median window
medwin(output[[26]]$Dataset)
## $`Median Window Open`
## [1] 158
## 
## $`Median Window Close`
## [1] 105
# Randomisation test to assess if the detected signal is likely by chance
ewdp_randwin <- randwin(repeats   = 10,
                        window    = "sliding",
                        xvar      = list(ewdp_bi = extreme_weather_binary$ewdp),
                        cdate     = extreme_weather_binary$date,
                        bdate     = breeding_data$date,
                        baseline  = lm(bs ~ 1, 
                                       data = breeding_data),
                        cohort    = breeding_data$season,
                        cinterval = "day",
                        range     = c(182, 0),
                        refday    = c(01, 04),
                        type      = "absolute",
                        stat      = "sum",
                        func      = "lin"
                        )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[26]]$Dataset,
                datasetrand = ewdp_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.006334838
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[26]]$Dataset,
                 datasetrand    = ewdp_randwin[[1]],
                 bestmodel      = output[[26]]$BestModel,
                 bestmodeldata  = output[[26]]$BestModelData,
                 arrow          = TRUE
                 )

# k-fold cross-validation allows to improve the accuracy of the R^2 estimate as R^2 estimates using slidingwin can be biased at low sample size and/or effect size
ewdp_k_fold <- slidingwin(k         = 10,
                          xvar      = list(ewdp_bi = extreme_weather_binary$ewdp),
                          cdate     = extreme_weather_binary$date,
                          bdate     = breeding_data$date,
                          baseline  = lm(bs ~ 1, 
                                         data = breeding_data),
                          cohort    = breeding_data$season,
                          cinterval = "day",
                          range     = c(182, 0),
                          refday    = c(01, 04),
                          type      = "absolute",
                          stat      = "sum",
                          func      = "lin"
                          )
# Summary of the best model from k-fold cross-validation
summary(ewdp_k_fold[[1]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13786 -0.04715  0.01296  0.05097  0.11407 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.45876    0.01595  28.761  < 2e-16 ***
## climate     -0.10592    0.02302  -4.601 0.000126 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06512 on 23 degrees of freedom
## Multiple R-squared:  0.4792, Adjusted R-squared:  0.4566 
## F-statistic: 21.17 on 1 and 23 DF,  p-value: 0.0001259

2.4.3 Extreme wind chill

# Summary of the best model
summary(output[[32]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.135171 -0.045282  0.004504  0.043761  0.124805 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.35015    0.02250   15.56 1.05e-13 ***
## climate      0.04140    0.01093    3.79 0.000947 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0708 on 23 degrees of freedom
## Multiple R-squared:  0.3844, Adjusted R-squared:  0.3576 
## F-statistic: 14.36 on 1 and 23 DF,  p-value: 0.0009474
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[32]]$Dataset)
## $`Median Window Open`
## [1] 134
## 
## $`Median Window Close`
## [1] 40
# Randomisation test to assess if the detected signal is likely by chance
extreme_wind_chill_randwin <- randwin(repeats   = 10,
                                      window    = "sliding",
                                      xvar      = list(extreme_wind_chill_day_bi = extreme_weather_binary$extreme_wind_chill_day),
                                      cdate     = extreme_weather_binary$date,
                                      bdate     = breeding_data$date,
                                      baseline  = lm(bs ~ 1, 
                                                     data = breeding_data),
                                      cohort    = breeding_data$season,
                                      cinterval = "day",
                                      range     = c(182, 0),
                                      refday    = c(01, 04),
                                      type      = "absolute",
                                      stat      = "sum",
                                      func      = "lin"
                                      )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[32]]$Dataset,
                datasetrand = extreme_wind_chill_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.4511766
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[32]]$Dataset,
                 datasetrand    = extreme_wind_chill_randwin[[1]],
                 bestmodel      = output[[32]]$BestModel,
                 bestmodeldata  = output[[32]]$BestModelData,
                 arrow          = TRUE
                 )

2.4.4 Heavy rain

# Summary of the best model 
summary(output[[24]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13304 -0.04315  0.00101  0.05163  0.16286 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.34802    0.02250  15.465  1.2e-13 ***
## climate      0.05894    0.01517   3.885 0.000748 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07012 on 23 degrees of freedom
## Multiple R-squared:  0.3962, Adjusted R-squared:   0.37 
## F-statistic: 15.09 on 1 and 23 DF,  p-value: 0.0007481
# Calculate the median window
medwin(output[[24]]$Dataset)
## $`Median Window Open`
## [1] 122
## 
## $`Median Window Close`
## [1] 46
# Randomisation test to assess if the detected signal is likely by chance
heavy_rain_randwin <- randwin(repeats   = 10,
                              window    = "sliding",
                              xvar      = list(heavy_rain_day_bi = extreme_weather_binary$heavy_rain_day),
                              cdate     = extreme_weather_binary$date,
                              bdate     = breeding_data$date,
                              baseline  = lm(bs ~ 1, 
                                             data = breeding_data),
                              cohort    = breeding_data$season,
                              cinterval = "day",
                              range     = c(182, 0),
                              refday    = c(01, 04),
                              type      = "absolute",
                              stat      = "sum",
                              func      = "lin"
                              )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[24]]$Dataset,
                datasetrand = heavy_rain_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.7309673
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[24]]$Dataset,
                 datasetrand    = heavy_rain_randwin[[1]],
                 bestmodel      = output[[24]]$BestModel,
                 bestmodeldata  = output[[24]]$BestModelData,
                 arrow          = TRUE
                 )

2.4.5 Very heavy rain

# Summary of the best model 
summary(output[[25]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18516 -0.04576  0.01148  0.06678  0.12845 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.40014    0.01778  22.511   <2e-16 ***
## climate      0.10160    0.04444   2.286   0.0318 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08146 on 23 degrees of freedom
## Multiple R-squared:  0.1852, Adjusted R-squared:  0.1497 
## F-statistic: 5.227 on 1 and 23 DF,  p-value: 0.03178
# Calculate the median window
medwin(output[[25]]$Dataset)
## $`Median Window Open`
## [1] 129
## 
## $`Median Window Close`
## [1] 59
# Randomisation test to assess if the detected signal is likely by chance
very_heavy_rain_randwin <- randwin(repeats   = 10,
                                   window    = "sliding",
                                   xvar      = list(very_heavy_rain_day_bi = extreme_weather_binary$very_heavy_rain_day),
                                   cdate     = extreme_weather_binary$date,
                                   bdate     = breeding_data$date,
                                   baseline  = lm(bs ~ 1, 
                                                  data = breeding_data),
                                   cohort    = breeding_data$season,
                                   cinterval = "day",
                                   range     = c(182, 0),
                                   refday    = c(1, 4),
                                   type      = "absolute",
                                   stat      = "sum",
                                   func      = "lin"
                                   )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[25]]$Dataset,
                datasetrand = very_heavy_rain_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.80531
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[25]]$Dataset,
                 datasetrand    = very_heavy_rain_randwin[[1]],
                 bestmodel      = output[[25]]$BestModel,
                 bestmodeldata  = output[[25]]$BestModelData,
                 arrow          = TRUE
                 )

2.4.6 Warm day***

# Summary of the best model
summary(output[[17]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.168198 -0.021656  0.001045  0.032466  0.113392 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.531455   0.028535   18.62 2.27e-15 ***
## climate     -0.014828   0.003266   -4.54 0.000147 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06553 on 23 degrees of freedom
## Multiple R-squared:  0.4726, Adjusted R-squared:  0.4497 
## F-statistic: 20.61 on 1 and 23 DF,  p-value: 0.0001466
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[17]]$Dataset)
## $`Median Window Open`
## [1] 102
## 
## $`Median Window Close`
## [1] 18
# Randomisation test to assess if the detected signal is likely by chance
warm_day_randwin <- randwin(repeats   = 10,
                            window    = "sliding",
                            xvar      = list(warm_day_bi = extreme_weather_binary$warm_day),
                            cdate     = extreme_weather_binary$date,
                            bdate     = breeding_data$date,
                            baseline  = lm(bs ~ 1,
                                           data = breeding_data),
                            cohort    = breeding_data$season,
                            cinterval = "day",
                            range     = c(182, 0),
                            refday    = c(1, 4),
                            type      = "absolute",
                            stat      = c("sum"),
                            func      = c("lin")
                            )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[17]]$Dataset,
                datasetrand = warm_day_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.01505667
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[17]]$Dataset,
                 datasetrand    = warm_day_randwin[[1]],
                 bestmodel      = output[[17]]$BestModel,
                 bestmodeldata  = output[[17]]$BestModelData,
                 arrow          = TRUE
                 )

# k-fold cross-validation allows to improve the accuracy of the R^2 estimate as R^2 estimates using slidingwin can be biased at low sample size and/or effect size
warm_day_k_fold <- slidingwin(k         = 10,
                              xvar      = list(warm_day_bi = extreme_weather_binary$warm_day),
                              cdate     = extreme_weather_binary$date,
                              bdate     = breeding_data$date,
                              baseline  = lm(bs ~ 1, 
                                             data = breeding_data),
                              cohort    = breeding_data$season,
                              cinterval = "day",
                              range     = c(182, 0),
                              refday    = c(01, 04),
                              type      = "absolute",
                              stat      = "sum",
                              func      = "lin"
                              )
# Summary of the best model from k-fold cross-validation
summary(warm_day_k_fold[[1]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.180645 -0.026293  0.003438  0.030824  0.100957 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.529130   0.028448  18.600 2.34e-15 ***
## climate     -0.014834   0.003316  -4.473 0.000173 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06599 on 23 degrees of freedom
## Multiple R-squared:  0.4653, Adjusted R-squared:  0.442 
## F-statistic: 20.01 on 1 and 23 DF,  p-value: 0.0001729

2.4.7 Wet day

# Summary of the best model
summary(output[[23]]$BestModel)
## 
## Call:
## lm(formula = yvar ~ climate, data = modeldat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.136854 -0.048708  0.005442  0.040987  0.134690 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 0.195645   0.061388   3.187  0.00410 **
## climate     0.010413   0.002816   3.698  0.00119 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07146 on 23 degrees of freedom
## Multiple R-squared:  0.3728, Adjusted R-squared:  0.3455 
## F-statistic: 13.67 on 1 and 23 DF,  p-value: 0.001189
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[23]]$Dataset)
## $`Median Window Open`
## [1] 121
## 
## $`Median Window Close`
## [1] 39
# Randomisation test to assess if the detected signal is likely by chance
wet_day_randwin <- randwin(repeats   = 10,
                           window    = "sliding",
                           xvar      = list(wet_day_bi = extreme_weather_binary$wet_day),
                           cdate     = extreme_weather_binary$date,
                           bdate     = breeding_data$date,
                           baseline  = lm(bs ~ 1,
                                          data = breeding_data),
                           cohort    = breeding_data$season,
                           cinterval = "day",
                           range     = c(182, 0),
                           refday    = c(01, 04),
                           type      = "absolute",
                           stat      = c("sum"),
                           func      = c("lin")
                           )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[23]]$Dataset,
                datasetrand = wet_day_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.3638594
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[23]]$Dataset,
                 datasetrand    = wet_day_randwin[[1]],
                 bestmodel      = output[[23]]$BestModel,
                 bestmodeldata  = output[[23]]$BestModelData,
                 arrow          = TRUE
                 )

3 Colinearity

Check for colinearity between the climate signals.

# Add climate signals to the original breeding data
breeding_data <- breeding_data %>%
  mutate(
    ewdp_signal = output[[26]]$BestModelData$climate,
    warm_day_signal = output[[17]]$BestModelData$climate
  )


# Plot correlation matrix
corrplot(breeding_data %>%
         dplyr::select(ewdp_signal, warm_day_signal) %>%
         cor(use = "complete.obs"),
         method = "number",
         type = "upper",
         tl.col = "black",
         tl.srt = 45
         )

4 Full model

# Final model
model <- glm(bs ~ 1 + ewdp_signal + warm_day_signal,
             data = breeding_data)


# Summary of the final model
summary(model)
## 
## Call:
## glm(formula = bs ~ 1 + ewdp_signal + warm_day_signal, data = breeding_data)
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.517336   0.025495  20.292 9.84e-16 ***
## ewdp_signal     -0.068781   0.024369  -2.822  0.00992 ** 
## warm_day_signal -0.009463   0.003435  -2.755  0.01157 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.003296352)
## 
##     Null deviance: 0.18729  on 24  degrees of freedom
## Residual deviance: 0.07252  on 22  degrees of freedom
## AIC: -67.122
## 
## Number of Fisher Scoring iterations: 2

We evaluated multiple extreme weather indices as predictors of pup productivity. In the sliding window model, both ewdp_signal and warm_day_signal were statistically significant predictors (see model output above) and did not appear to be false positives. These predictors showed moderate collinearity (Pearson’s r = 0.55), which is below our pre-defined threshold (r < 0.7) for inclusion of correlated variables within the same model. Thus, both variables were retained in the final model.

5 Final model

# Final model
final_model <- glm(bs ~ 1 + ewdp_signal + warm_day_signal,
                   data = breeding_data)

# Summary of the final model
summary(final_model)
## 
## Call:
## glm(formula = bs ~ 1 + ewdp_signal + warm_day_signal, data = breeding_data)
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.517336   0.025495  20.292 9.84e-16 ***
## ewdp_signal     -0.068781   0.024369  -2.822  0.00992 ** 
## warm_day_signal -0.009463   0.003435  -2.755  0.01157 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.003296352)
## 
##     Null deviance: 0.18729  on 24  degrees of freedom
## Residual deviance: 0.07252  on 22  degrees of freedom
## AIC: -67.122
## 
## Number of Fisher Scoring iterations: 2

5.1 Model diagnostics

# Creates scaled residuals by simulating from the fitted model
simulationOutput <- simulateResiduals(fittedModel = final_model, plot = TRUE)

# Test for over/underdispersion
testDispersion(simulationOutput, plot = TRUE)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.92201, p-value = 0.872
## alternative hypothesis: two.sided

5.2 Visualise the signal

# Plot the fitted effect
effect_plot(final_model, pred = ewdp_signal, interval = TRUE, plot.points = TRUE,
            main.title = "Relationship between Breedin success and Extremely wet precipitation days",
            x.label = "No. of extremely wet precipitation days", 
            y.label = "Breeding success", 
            colors = c("#7B8FA1"),
            line.colors = c("#0D92F4"),
            line.thickness = 1,
            point.size = 2.5, 
            point.alpha = 0.5,
            rug = TRUE) +  
  drop_gridlines() + 
  theme_classic()

# Plot the fitted effect
effect_plot(final_model, pred = warm_day_signal, interval = TRUE, plot.points = TRUE,
            main.title = "Relationship between Breedin success and Warm day",
            x.label = "No. of warm days", 
            y.label = "Breeding success", 
            colors = c("#7B8FA1"),
            line.colors = c("#0D92F4"),
            line.thickness = 1,
            point.size = 2.5, 
            point.alpha = 0.5,
            rug = TRUE) +  
  drop_gridlines() + 
  theme_classic()

5.3 Save data

# Save the final model data with climate signals
write_xlsx(breeding_data, "Output_data/SHAL/SHAL_Albatross_signal.xlsx")


# Save best model dataset for significant climate signals
saveRDS(output[[26]]$Dataset, "Output_data/SHAL/Albatross_ewdp_dataset.rds")
saveRDS(output[[17]]$Dataset, "Output_data/SHAL/Albatross_warm_day_dataset.rds")

7 Structural equation model

# Standardise each numeric column to have mean zero and standard deviation of one
breeding_data_standardised <- breeding_data %>%
  mutate(across(where(is.numeric), ~ ( . - mean(.) ) / sd(.)))


# Extract individual regressions from SEM
m1 <- lm(warm_day_signal ~ season, data = breeding_data_standardised)
m2 <- lm(ewdp_signal ~ season, data = breeding_data_standardised)
m3 <- lm(bs ~ ewdp_signal + warm_day_signal + season, data = breeding_data_standardised)

# Summarise individual regressions
summary(m1)
## 
## Call:
## lm(formula = warm_day_signal ~ season, data = breeding_data_standardised)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.74578 -0.87532  0.09585  0.83111  1.66708 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.729e-15  1.972e-01   0.000    1.000
## season      2.622e-01  2.012e-01   1.303    0.205
## 
## Residual standard error: 0.9858 on 23 degrees of freedom
## Multiple R-squared:  0.06876,    Adjusted R-squared:  0.02827 
## F-statistic: 1.698 on 1 and 23 DF,  p-value: 0.2054
summary(m2)
## 
## Call:
## lm(formula = ewdp_signal ~ season, data = breeding_data_standardised)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0978 -0.6189 -0.4137  0.8053  2.6741 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.906e-15  1.973e-01   0.000    1.000
## season      2.601e-01  2.013e-01   1.292    0.209
## 
## Residual standard error: 0.9864 on 23 degrees of freedom
## Multiple R-squared:  0.06764,    Adjusted R-squared:  0.02711 
## F-statistic: 1.669 on 1 and 23 DF,  p-value: 0.2093
summary(m3)
## 
## Call:
## lm(formula = bs ~ ewdp_signal + warm_day_signal + season, data = breeding_data_standardised)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1397 -0.3863 -0.1516  0.3930  0.9804 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -3.572e-15  1.176e-01   0.000   1.0000  
## ewdp_signal     -3.991e-01  1.456e-01  -2.740   0.0123 *
## warm_day_signal -3.868e-01  1.457e-01  -2.655   0.0148 *
## season          -3.044e-01  1.257e-01  -2.421   0.0246 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5882 on 21 degrees of freedom
## Multiple R-squared:  0.6973, Adjusted R-squared:  0.6541 
## F-statistic: 16.13 on 3 and 21 DF,  p-value: 1.144e-05
# Check model assumptions for individual regressions
par(mfrow = c(2, 2))
plot(m1)

plot(m2)

plot(m3)

# Define the SEM model
sem_model <- '
  # Regress breeding success on climate signals and time
  bs ~ ewdp_signal + warm_day_signal + season
  
  # Regress climate signals on time
  warm_day_signal ~ season
  ewdp_signal ~ season
  
  # Correlation between climate signals
  ewdp_signal ~~ warm_day_signal

  # Define intercept for breeding success
  bs ~ 1
'

# Fit the SEM model with non-parametric bootstrapping with 1000 iterations
set.seed(666)
fit <- lavaan::sem(sem_model, data = breeding_data_standardised, se = "bootstrap", bootstrap = 1000)

# Summarise the model fit
summary(fit, fit.measures = TRUE, rsquare = TRUE)
## lavaan 0.6-20 ended normally after 9 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        12
## 
##   Number of observations                            25
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                41.311
##   Degrees of freedom                                 6
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)                -84.234
##   Loglikelihood unrestricted model (H1)        -84.234
##                                                       
##   Akaike (AIC)                                 192.469
##   Bayesian (BIC)                               207.095
##   Sample-size adjusted Bayesian (SABIC)        169.882
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             1000
##   Number of successful bootstrap draws            1000
## 
## Regressions:
##                     Estimate  Std.Err  z-value  P(>|z|)
##   bs ~                                                 
##     ewdp_signal       -0.399    0.154   -2.585    0.010
##     warm_day_signl    -0.387    0.147   -2.640    0.008
##     season            -0.304    0.151   -2.009    0.045
##   warm_day_signal ~                                    
##     season             0.262    0.213    1.229    0.219
##   ewdp_signal ~                                        
##     season             0.260    0.172    1.511    0.131
## 
## Covariances:
##                      Estimate  Std.Err  z-value  P(>|z|)
##  .warm_day_signal ~~                                    
##    .ewdp_signal         0.466    0.161    2.889    0.004
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .bs               -0.000    0.119   -0.000    1.000
##    .warm_day_signl    0.000    0.195    0.000    1.000
##    .ewdp_signal       0.000    0.202    0.000    1.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .bs                0.291    0.064    4.525    0.000
##    .warm_day_signl    0.894    0.166    5.382    0.000
##    .ewdp_signal       0.895    0.275    3.252    0.001
## 
## R-Square:
##                    Estimate
##     bs                0.697
##     warm_day_signl    0.069
##     ewdp_signal       0.068
# Extract parameter estimates
params <- lavaan::parameterEstimates(fit)
relationships <- params[params$op == "~", ]

# Create a data frame with selected outputs
results <- data.frame(
  SEM_int = params$est[params$lhs == "bs" & params$op == "~1"], 
  SEM_beta_warm_day = params$est[params$lhs == "bs" & params$rhs == "warm_day_signal"], 
  SEM_beta_ewdp = params$est[params$lhs == "bs" & params$rhs == "ewdp_signal"], 
  SEM_beta_season = params$est[params$lhs == "bs" & params$rhs == "season"], 
  SEM_SE_warm_day = params$se[params$lhs == "bs" & params$rhs == "warm_day_signal"], 
  SEM_SE_ewdp = params$se[params$lhs == "bs" & params$rhs == "ewdp_signal"], 
  SEM_SE_season = params$se[params$lhs == "bs" & params$rhs == "season"],
  Yr_beta_warm_day = params$est[params$lhs == "warm_day_signal" & params$rhs == "season"], 
  Yr_beta_ewdp = params$est[params$lhs == "ewdp_signal" & params$rhs == "season"], 
  Yr_SE_warm_day = params$se[params$lhs == "warm_day_signal" & params$rhs == "season"],
  Yr_SE_ewdp = params$se[params$lhs == "ewdp_signal" & params$rhs == "season"]
) %>%
  mutate(
    EWDP_pathway = abs(SEM_beta_ewdp) * abs(Yr_beta_ewdp),
    warm_day_pathway = abs(SEM_beta_warm_day) * abs(Yr_beta_warm_day),
    Total_effect_season = abs(SEM_beta_season) + abs(EWDP_pathway) + abs(warm_day_pathway),
    change_due_to_EWDP_pathway = (abs(EWDP_pathway) / abs(Total_effect_season)) * 100,
    change_due_to_warm_day_pathway = (abs(warm_day_pathway) / abs(Total_effect_season)) * 100,
    total_change_due_to_EWE_pathways = change_due_to_EWDP_pathway + change_due_to_warm_day_pathway
  )

# Print the results
print(results)
##         SEM_int SEM_beta_warm_day SEM_beta_ewdp SEM_beta_season SEM_SE_warm_day
## 1 -3.451998e-15        -0.3868199    -0.3990807      -0.3043722       0.1465145
##   SEM_SE_ewdp SEM_SE_season Yr_beta_warm_day Yr_beta_ewdp Yr_SE_warm_day
## 1   0.1543841      0.151482         0.262215    0.2600824      0.2133068
##   Yr_SE_ewdp EWDP_pathway warm_day_pathway Total_effect_season
## 1  0.1720963    0.1037939          0.10143           0.5095961
##   change_due_to_EWDP_pathway change_due_to_warm_day_pathway
## 1                   20.36787                         19.904
##   total_change_due_to_EWE_pathways
## 1                         40.27187
# Format relationships for plotting
relationships <- relationships %>%
  mutate(
    color = case_when(
      as.numeric(est) > 0 & as.numeric(pvalue) < 0.05 ~ "#0079FF",  # Positive & significant = blue
      as.numeric(est) < 0 & as.numeric(pvalue) < 0.05 ~ "#FF2929",  # Negative & significant = red
      TRUE ~ "#B7B7B7"  # Non-significant = grey
    ),
    style = ifelse(as.numeric(pvalue) < 0.05, "solid", "dashed"),
    unit = case_when(
      lhs == "bs" & rhs == "season" ~ "/season",  
      lhs == "bs" & rhs == "warm_day_signal" ~ "/warm day",  
      lhs == "bs" & rhs == "ewdp_signal" ~ "/EWDP",  
      lhs == "warm_day_signal" ~ "no./season",  
      lhs == "ewdp_signal" ~ "no./season",
      TRUE ~ "unit"
    ),
    stars = case_when(
      pvalue < 0.001 ~ "***",
      pvalue < 0.01 ~ "**",
      pvalue < 0.05 ~ "*",
      TRUE ~ ""
    )
  )

# Extract R² values
rsq <- inspect(fit, "rsquare")

# Create node labels with R² values
node_labels <- c(
  paste0("Breeding success\nr² = ", round(rsq["bs"], 2)),
  paste0("Warm days\nr² = ", round(rsq["warm_day_signal"], 2)),
  paste0("EWDPs\nr² = ", round(rsq["ewdp_signal"], 2)),
  "Season"
)

# Construct edges for the SEM diagram
edges <- apply(relationships, 1, function(row) {
  paste0(
    "\"", row["rhs"], "\" -> \"", row["lhs"], "\" ",
    "[color=\"", row["color"], "\", style=", row["style"], 
    ", label=\"", round(as.numeric(row["est"]), 2), row["stars"], "\n(", row["unit"], ")\", fontsize=10]"
  )
})

# Assemble DiagrammeR graph
graph_code <- paste0(
  "digraph SEM {",
  "\nrankdir=LR;",
  "\nnode [shape=rectangle, style=filled, fillcolor=white];", 
  "\n", paste(edges, collapse = "\n"), 
  "\n", paste0(
    "\"bs\" [label=\"Breeding success\\nr² = ", round(rsq["bs"], 2), "\", fontsize=12];",
    "\"warm_day_signal\" [label=\"Warm days\\nr² = ", round(rsq["warm_day_signal"], 2), "\", fontsize=12];",
    "\"ewdp_signal\" [label=\"EWDP\\nr² = ", round(rsq["ewdp_signal"], 2), "\", fontsize=12];",
    "\"season\" [label=\"Season\", fontsize=12];"
  ),
  "\n}"
)

# Render the graph
DiagrammeR::grViz(graph_code)