# Load libraries for data processing, modelling, and visualisation
library(tidyverse)
library(openxlsx2)
library(MASS)
library(corrplot)
library(DT)
library(climwin)
library(jtools)
library(DHARMa)
library(lavaan)
library(DiagrammeR)
library(lmtest)
library(glmmTMB)
library(scales)
library(performance)

1 Load and Process Data

# Load daily extreme weather event (EWE) data
extreme_weather <- wb_to_df("Breeding_colony_ewes/Pedra_ewes.xlsx") %>%
                   dplyr::mutate(across(4:20, ~ ifelse(is.na(.), 0, .)))  # Replace NA with 0 for analysis


# Create binary version: 1 = event occurred, 0 = no event, NA = missing
extreme_weather_binary <- extreme_weather %>%
                          dplyr::mutate(across(4:20, ~ ifelse(!is.na(.) & . != 0, 1, ifelse(is.na(.), NA, 0))))


# Note:
# Missing values in extreme weather data are replaced with zero. This is critical because the slidingwin method ("method1" and "method2") internally calculates means when NA values are present, which is not suitable when assessing extreme values. We are specifically interested in whether an extreme event occurred, not in average conditions.
# A small number of missing values are present in the dataset, and replacing them with zero ensures consistency without introducing bias in this context.


# Load breeding data
breeding_data <- wb_to_df("Breeding_data/SHAL.xlsx", sheet = "Pedra_Branca") %>%
                 dplyr::filter(!is.na(chicks))  # Remove seasons without breeding success data


# Record sample size
sample_size <- nrow(breeding_data)


# Assess normality of breeding success
# If p > 0.05, the data does not significantly deviate from normality.
shapiro.test(breeding_data$chicks)
## 
##  Shapiro-Wilk normality test
## 
## data:  breeding_data$chicks
## W = 0.85126, p-value = 0.0009913
# Histogram with density curve
hist(breeding_data$chicks,
     main = "Histogram of Chick Count",
     xlab = "Chick Count",
     col = "#a6d6fa",
     border = "white",
     prob = TRUE  
)

# Overlay kernel density estimate
lines(density(breeding_data$chicks, na.rm = TRUE), col = "#0D92F4", lwd = 2)

# Q-Q plot
ggplot(breeding_data, aes(sample = chicks)) +
  stat_qq() +
  stat_qq_line(colour = "red") +
  labs(title = "Q-Q Plot of Chick count",
       x = "Theoretical Quantiles",
       y = "Sample Quantiles") +
  theme_classic()

# Calculate mean and variance
mean(breeding_data$chicks)
## [1] 56.60714
var(breeding_data$chicks)
## [1] 1645.507

The Shapiro–Wilk test indicated a significant deviation from normality (W = 0.85126, p = 0.0009913), providing sufficient evidence to reject the null hypothesis. Visual assessments, including the histogram and Q–Q plot, also suggest that the data do not closely follow a normal distribution.

Furthermore, the mean (56.60714) and variance (1645.507) differ substantially, indicating overdispersion and suggesting that a Poisson distribution—where the mean and variance are expected to be equal—is not appropriate for this dataset. Therefore, we will use a Negative Binomial distribution to model the data.

2 Sliding window analysis

2.1 Actual above threshold values

# Run the sliding window analysis using actual (non-binary) values
output1 <- slidingwin(xvar = list(warm_day                = extreme_weather$warm_day,
                                  warm_night              = extreme_weather$warm_night,
                                  heatwave                = extreme_weather$heatwave,
                                  cool_day                = extreme_weather$cool_day,
                                  cool_night              = extreme_weather$cool_night,
                                  coldwave                = extreme_weather$coldwave,
                                  wet_day                 = extreme_weather$wet_day,
                                  heavy_rain_day          = extreme_weather$heavy_rain_day,
                                  very_heavy_rain_day     = extreme_weather$very_heavy_rain_day,
                                  ewdp                    = extreme_weather$ewdp,
                                  vwdp                    = extreme_weather$vwdp,
                                  extreme_wind_day        = extreme_weather$extreme_wind_day,
                                  extreme_wbt_day         = extreme_weather$extreme_wbt_day,
                                  extreme_wbgt_day        = extreme_weather$extreme_wbgt_day,
                                  extreme_at_day          = extreme_weather$extreme_at_day,
                                  extreme_wind_chill_day  = extreme_weather$extreme_wind_chill_day,
                                  extreme_wave_energy_day = extreme_weather$extreme_wave_energy_day),
                      cdate     = extreme_weather$date,                       # Climate date
                      bdate     = breeding_data$date,                         # Biological event date
                      baseline  = glm.nb(chicks ~ 1,
                                         link = "log",
                                         data = breeding_data),               # Baseline model
                      cohort    = breeding_data$season,                       # Group by season
                      cinterval = "day",                                      # Daily resolution
                      range     = c(195, 0),                                  # Check windows from 1 October to 14 April
                      refday    = c(14, 04),                                  # Reference date: 14 April
                      type      = "absolute",                                 # Absolute window type
                      stat      = "sum",                                      # Sum values within each window
                      func      = "lin"                                       # For linear relationship
                      )

2.2 Binary above threshold values

# Run the sliding window analysis using binary event indicators
output2 <- slidingwin(xvar = list(warm_day_bi               = extreme_weather_binary$warm_day,
                                  warm_night_bi             = extreme_weather_binary$warm_night,
                                  heatwave_bi               = extreme_weather_binary$heatwave,
                                  cool_day_bi               = extreme_weather_binary$cool_day,
                                  cool_night_bi             = extreme_weather_binary$cool_night,
                                  coldwave_bi               = extreme_weather_binary$coldwave,
                                  wet_day_bi                = extreme_weather_binary$wet_day,
                                  heavy_rain_day_bi         = extreme_weather_binary$heavy_rain_day,
                                  very_heavy_rain_day_bi    = extreme_weather_binary$very_heavy_rain_day,
                                  ewdp_bi                   = extreme_weather_binary$ewdp,
                                  vwdp_bi                   = extreme_weather_binary$vwdp,
                                  extreme_wind_day_bi       = extreme_weather_binary$extreme_wind_day,
                                  extreme_wbt_day_bi        = extreme_weather_binary$extreme_wbt_day,
                                  extreme_wbgt_day_bi       = extreme_weather_binary$extreme_wbgt_day,
                                  extreme_at_day_bi         = extreme_weather_binary$extreme_at_day,
                                  extreme_wind_chill_day_bi = extreme_weather_binary$extreme_wind_chill_day,
                                  extreme_wavenergy_day_bi  = extreme_weather_binary$extreme_wave_energy_day),
                      cdate     = extreme_weather_binary$date,                # Climate date
                      bdate     = breeding_data$date,                         # Biological event date
                      baseline  = glm.nb(chicks ~ 1,
                                         link = "log",
                                         data = breeding_data),               # Baseline model
                      cohort    = breeding_data$season,                       # Group by season
                      cinterval = "day",                                      # Daily resolution
                      range     = c(195, 0),                                  # Check windows from 1 October to 14 April
                      refday    = c(14, 04),                                  # Reference date: 14 April
                      type      = "absolute",                                 # Absolute window type
                      stat      = "sum",                                      # Sum values within each window
                      func      = "lin"                                       # For linear relationship
                      )

2.3 Wave direction

# Calculate the circular mean of directional data (in degrees)
# This function returns the mean direction of angular values (e.g. wave or wind direction), accounting for circularity (i.e. wrap-around at 360°).
circ_mean <- function(x) {
  # Convert degrees to radians
  radians <- x * pi / 180
  
  # Compute mean sine and cosine
  mean_sin <- mean(sin(radians), na.rm = TRUE)
  mean_cos <- mean(cos(radians), na.rm = TRUE)
  
  # Calculate circular mean in radians and convert back to degrees
  mean_angle <- atan2(mean_sin, mean_cos) * 180 / pi
  
  # Ensure result is within 0–360 degrees
  if (mean_angle < 0) mean_angle + 360 else mean_angle
}


# Run the sliding window analysis with daily mean wave direction data
output3 <- slidingwin(xvar      = list(wave_direction = extreme_weather$wave_direction),
                      cdate     = extreme_weather$date,                       # Daily climate record dates
                      bdate     = breeding_data$date,                         # Biological event dates (pup count dates)
                      baseline  = glm.nb(chicks ~ 1,
                                         link = "log",
                                         data = breeding_data),               # Baseline model: Negative Binomial
                      cohort    = breeding_data$season,                       # Group by season
                      cinterval = "day",                                      # Daily resolution
                      range     = c(195, 0),                                  # Test all possible windows within the range
                      refday    = c(14, 04),                                  # Reference date: 14 April
                      type      = "absolute",                                 # Absolute to each biological event date
                      stat      = "circ_mean",                                # Circular mean of daily wave direction values over the window
                      func      = c("lin", "quad"),                           # Test linear and quadratic relationships
                      cmissing  = "method1"
                      )
                

# Examine the tested combinations
datatable(output3$combos %>% 
          dplyr::mutate(WindowDuration = WindowOpen - WindowClose + 1),
          options = list(pageLength = 10, orderClasses = TRUE)
          )

2.4 Merge the results

# Combine output from actual and binary sliding window analyses
output <- merge_results(output1, output2)


# View merged model combinations with calculated window duration
datatable(output$combos %>% 
          dplyr::mutate(WindowDuration = WindowOpen - WindowClose + 1),
          options = list(pageLength = 10, orderClasses = TRUE))

2.5 Check best model for each variable

Before running the randomisation process, we need to identify the best-performing model for each extreme weather variable. This ensures that we are testing the most likely biologically relevant window against random expectation.

What we are doing here: For each weather variable (e.g., heavy rain, wet days), we extract the model with:

The lowest AIC value, and

A window duration longer than 14 days, to focus on ecologically meaningful timeframes.

These best models represent the strongest climate–breeding success relationships, and will be used for the randomisation test to assess whether the relationship is likely to have occurred by chance.

2.5.1 Coldwave

# Summary of the best model
summary(output[[6]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 2.570112498, 
##     link = "log")
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.88604    0.13308  29.201   <2e-16 ***
## climate     -0.07355    0.03549  -2.072   0.0383 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.5701) family taken to be 1)
## 
##     Null deviance: 34.545  on 27  degrees of freedom
## Residual deviance: 29.546  on 26  degrees of freedom
## AIC: 276.2
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.570 
##           Std. Err.:  0.684 
## 
##  2 x log-likelihood:  -270.197
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[6]]$Dataset)
## $`Median Window Open`
## [1] 144
## 
## $`Median Window Close`
## [1] 60
# Randomisation test to assess if the detected signal is likely by chance
coldwave_randwin <- randwin(repeats   = 10,
                            window    = "sliding",
                            xvar      = list(coldwave = extreme_weather$coldwave),
                            cdate     = extreme_weather$date,
                            bdate     = breeding_data$date,
                            baseline  = glm.nb(chicks ~ 1,
                                               link = "log",
                                               data = breeding_data),
                            cohort    = breeding_data$season,
                            cinterval = "day",
                            range     = c(195, 0),
                            refday    = c(14, 04),
                            type      = "absolute",
                            stat      = c("sum"),
                            func      = c("lin")
                            )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[6]]$Dataset,
                datasetrand = coldwave_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.7886899
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[6]]$Dataset,
                 datasetrand    = coldwave_randwin[[1]],
                 bestmodel      = output[[6]]$BestModel,
                 bestmodeldata  = output[[6]]$BestModelData,
                 arrow          = TRUE
                 )

2.5.2 Cool day

# Summary of the best model
summary(output[[21]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 3.453505901, 
##     link = "log")
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.5509     0.1383  25.675  < 2e-16 ***
## climate       0.1673     0.0371   4.511 6.44e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.4535) family taken to be 1)
## 
##     Null deviance: 45.581  on 27  degrees of freedom
## Residual deviance: 28.925  on 26  degrees of freedom
## AIC: 267.49
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.454 
##           Std. Err.:  0.947 
## 
##  2 x log-likelihood:  -261.488
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[21]]$Dataset)
## $`Median Window Open`
## [1] 154
## 
## $`Median Window Close`
## [1] 61
# Randomisation test to assess if the detected signal is likely by chance
cool_day_randwin <- randwin(repeats   = 10,
                            window    = "sliding",
                            xvar      = list(cool_day_bi = extreme_weather_binary$cool_day),
                            cdate     = extreme_weather_binary$date,
                            bdate     = breeding_data$date,
                            baseline  = glm.nb(chicks ~ 1,
                                               link = "log",
                                               data = breeding_data),
                            cohort    = breeding_data$season,
                            cinterval = "day",
                            range     = c(195, 0),
                            refday    = c(14, 04),
                            type      = "absolute",
                            stat      = c("sum"),
                            func      = c("lin")
                            )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[21]]$Dataset,
                datasetrand = cool_day_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.1506704
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[21]]$Dataset,
                 datasetrand    = cool_day_randwin[[1]],
                 bestmodel      = output[[21]]$BestModel,
                 bestmodeldata  = output[[21]]$BestModelData,
                 arrow          = TRUE
                 )

2.5.3 Cool night

# Summary of the best model
summary(output[[22]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 3.716175883, 
##     link = "log")
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.46739    0.14659  23.654  < 2e-16 ***
## climate      0.23360    0.05135   4.549  5.4e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.7162) family taken to be 1)
## 
##     Null deviance: 48.788  on 27  degrees of freedom
## Residual deviance: 28.926  on 26  degrees of freedom
## AIC: 265.53
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.72 
##           Std. Err.:  1.03 
## 
##  2 x log-likelihood:  -259.529
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[22]]$Dataset)
## $`Median Window Open`
## [1] 161
## 
## $`Median Window Close`
## [1] 62
# Randomisation test to assess if the detected signal is likely by chance
cool_night_randwin <- randwin(repeats   = 10,
                              window    = "sliding",
                              xvar      = list(cool_night_bi = extreme_weather_binary$cool_night),
                              cdate     = extreme_weather_binary$date,
                              bdate     = breeding_data$date,
                              baseline  = glm.nb(chicks ~ 1,
                                                 link = "log",
                                                 data = breeding_data),
                              cohort    = breeding_data$season,
                              cinterval = "day",
                              range     = c(195, 0),
                              refday    = c(14, 04),
                              type      = "absolute",
                              stat      = c("sum"),
                              func      = c("lin")
                              )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[22]]$Dataset,
                datasetrand = cool_night_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.1548412
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[22]]$Dataset,
                 datasetrand    = cool_night_randwin[[1]],
                 bestmodel      = output[[22]]$BestModel,
                 bestmodeldata  = output[[22]]$BestModelData,
                 arrow          = TRUE
                 )

2.5.4 EWDP

# Summary of the best model 
summary(output[[27]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 2.833779462, 
##     link = "log")
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   4.2135     0.1389  30.337  < 2e-16 ***
## climate      -0.5050     0.1719  -2.938  0.00331 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.8338) family taken to be 1)
## 
##     Null deviance: 37.881  on 27  degrees of freedom
## Residual deviance: 29.349  on 26  degrees of freedom
## AIC: 273.29
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.834 
##           Std. Err.:  0.762 
## 
##  2 x log-likelihood:  -267.293
# Calculate the median window
medwin(output[[27]]$Dataset)
## $`Median Window Open`
## [1] 136
## 
## $`Median Window Close`
## [1] 58
# Randomisation test to assess if the detected signal is likely by chance
ewdp_randwin <- randwin(repeats   = 10,
                        window    = "sliding",
                        xvar      = list(ewdp_bi = extreme_weather_binary$ewdp),
                        cdate     = extreme_weather_binary$date,
                        bdate     = breeding_data$date,
                        baseline  = glm.nb(chicks ~ 1,
                                           link = "log",
                                           data = breeding_data),
                        cohort    = breeding_data$season,
                        cinterval = "day",
                        range     = c(195, 0),
                        refday    = c(14, 04),
                        type      = "absolute",
                        stat      = c("sum"),
                        func      = c("lin")
                        )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[27]]$Dataset,
                datasetrand = ewdp_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.806704
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[27]]$Dataset,
                 datasetrand    = ewdp_randwin[[1]],
                 bestmodel      = output[[27]]$BestModel,
                 bestmodeldata  = output[[27]]$BestModelData,
                 arrow          = TRUE
                 )

2.5.5 Extreme wave energy***

# Summary of the best model
summary(output[[17]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 3.790385845, 
##     link = "log")
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.8147433  0.2035736  23.651  < 2e-16 ***
## climate     -0.0006305  0.0001293  -4.878 1.07e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.7904) family taken to be 1)
## 
##     Null deviance: 49.687  on 27  degrees of freedom
## Residual deviance: 28.603  on 26  degrees of freedom
## AIC: 264.68
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.79 
##           Std. Err.:  1.04 
## 
##  2 x log-likelihood:  -258.681
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[17]]$Dataset)
## $`Median Window Open`
## [1] 159
## 
## $`Median Window Close`
## [1] 68
# Randomisation test to assess if the detected signal is likely by chance
extreme_wave_energy_randwin <- randwin(repeats   = 10,
                                       window    = "sliding",
                                       xvar      = list(extreme_wave_energy_day = extreme_weather$extreme_wave_energy_day),
                                       cdate     = extreme_weather$date,
                                       bdate     = breeding_data$date,
                                       baseline  = glm.nb(chicks ~ 1,
                                                          link = "log",
                                                          data = breeding_data),
                                       cohort    = breeding_data$season,
                                       cinterval = "day",
                                       range     = c(195, 0),
                                       refday    = c(14, 04),
                                       type      = "absolute",
                                       stat      = c("sum"),
                                       func      = c("lin")
                                       )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[17]]$Dataset,
                datasetrand = extreme_wave_energy_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.04190213
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[17]]$Dataset,
                 datasetrand    = extreme_wave_energy_randwin[[1]],
                 bestmodel      = output[[17]]$BestModel,
                 bestmodeldata  = output[[17]]$BestModelData,
                 arrow          = TRUE
                 )

# k-fold cross-validation allows to improve the accuracy of the R^2 estimate as R^2 estimates using slidingwin can be biased at low sample size and/or effect size
extreme_wave_energy_k_fold <- slidingwin(k       = 10,
                                         xvar      = list(extreme_wave_energy_day = extreme_weather$extreme_wave_energy_day),
                                         cdate     = extreme_weather$date,
                                         bdate     = breeding_data$date,
                                         baseline  = glm.nb(chicks ~ 1,
                                                            link = "log",
                                                            data = breeding_data),
                                         cohort    = breeding_data$season,
                                         cinterval = "day",
                                         range     = c(195, 0),
                                         refday    = c(14, 04),
                                         type      = "absolute",
                                         stat      = c("sum"),
                                         func      = c("lin")
                                         )
# Summary of the best model from k-fold cross-validation
summary(extreme_wave_energy_k_fold[[1]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 3.276021956, 
##     link = "log")
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.8786334  0.2442782  19.972  < 2e-16 ***
## climate     -0.0004803  0.0001154  -4.161 3.16e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.276) family taken to be 1)
## 
##     Null deviance: 43.396  on 27  degrees of freedom
## Residual deviance: 28.864  on 26  degrees of freedom
## AIC: 268.85
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.276 
##           Std. Err.:  0.889 
## 
##  2 x log-likelihood:  -262.849
# Calculate Nagelkerke's R² for the best model from k-fold cross-validation
r2(extreme_wave_energy_k_fold[[1]]$BestModel)
## # R2 for Generalized Linear Regression
##   Nagelkerke's R2: 0.514

2.5.6 Heatwave

# Summary of the best model
summary(output[[20]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 2.398613351, 
##     link = "log")
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   4.0884     0.1306   31.30   <2e-16 ***
## climate      -0.2955     0.1624   -1.82   0.0688 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.3986) family taken to be 1)
## 
##     Null deviance: 32.356  on 27  degrees of freedom
## Residual deviance: 29.609  on 26  degrees of freedom
## AIC: 278.19
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.399 
##           Std. Err.:  0.632 
## 
##  2 x log-likelihood:  -272.195

2.5.7 Very heavy rain

# Summary of the best model 
summary(output[[9]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 2.668867508, 
##     link = "log")
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.177757   0.135744  30.777  < 2e-16 ***
## climate     -0.018916   0.006902  -2.741  0.00613 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.6689) family taken to be 1)
## 
##     Null deviance: 35.799  on 27  degrees of freedom
## Residual deviance: 29.381  on 26  degrees of freedom
## AIC: 274.98
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.669 
##           Std. Err.:  0.711 
## 
##  2 x log-likelihood:  -268.984
# Calculate the median window
medwin(output[[9]]$Dataset)
## $`Median Window Open`
## [1] 137
## 
## $`Median Window Close`
## [1] 54
# Randomisation test to assess if the detected signal is likely by chance
very_heavy_rain_randwin <- randwin(repeats   = 10,
                                   window    = "sliding",
                                   xvar      = list(very_heavy_rain_day = extreme_weather$very_heavy_rain_day),
                                   cdate     = extreme_weather$date,
                                   bdate     = breeding_data$date,
                                   baseline  = glm.nb(chicks ~ 1,
                                                      link = "log",
                                                      data = breeding_data),
                                   cohort    = breeding_data$season,
                                   cinterval = "day",
                                   range     = c(195, 0),
                                   refday    = c(14, 04),
                                   type      = "absolute",
                                   stat      = c("sum"),
                                   func      = c("lin")
                                   )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[9]]$Dataset,
                datasetrand = very_heavy_rain_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.7416909
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[9]]$Dataset,
                 datasetrand    = very_heavy_rain_randwin[[1]],
                 bestmodel      = output[[9]]$BestModel,
                 bestmodeldata  = output[[9]]$BestModelData,
                 arrow          = TRUE
                 )

2.5.8 VWDP

# Summary of the best model
summary(output[[11]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 3.164242932, 
##     link = "log")
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.269514   0.134557  31.730  < 2e-16 ***
## climate     -0.017496   0.004571  -3.828 0.000129 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.1642) family taken to be 1)
## 
##     Null deviance: 42.011  on 27  degrees of freedom
## Residual deviance: 29.038  on 26  degrees of freedom
## AIC: 269.96
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.164 
##           Std. Err.:  0.858 
## 
##  2 x log-likelihood:  -263.965
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[11]]$Dataset)
## $`Median Window Open`
## [1] 137
## 
## $`Median Window Close`
## [1] 51
# Randomisation test to assess if the detected signal is likely by chance
vwdp_randwin <- randwin(repeats   = 10,
                        window    = "sliding",
                        xvar      = list(vwdp = extreme_weather$vwdp),
                        cdate     = extreme_weather$date,
                        bdate     = breeding_data$date,
                        baseline  = glm.nb(chicks ~ 1,
                                           link = "log",
                                           data = breeding_data),
                        cohort    = breeding_data$season,
                        cinterval = "day",
                        range     = c(195, 0),
                        refday    = c(14, 04),
                        type      = "absolute",
                        stat      = c("sum"),
                        func      = c("lin")
                        )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[11]]$Dataset,
                datasetrand = vwdp_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.5223841
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[11]]$Dataset,
                 datasetrand    = vwdp_randwin[[1]],
                 bestmodel      = output[[11]]$BestModel,
                 bestmodeldata  = output[[11]]$BestModelData,
                 arrow          = TRUE
                 )

2.5.9 Warm day

# Summary of the best model
summary(output[[18]]$BestModel)
## 
## Call:
## glm.nb(formula = yvar ~ climate, data = modeldat, init.theta = 2.821312081, 
##     link = "log")
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.47718    0.20245  22.114  < 2e-16 ***
## climate     -0.27287    0.09242  -2.953  0.00315 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.8213) family taken to be 1)
## 
##     Null deviance: 37.724  on 27  degrees of freedom
## Residual deviance: 29.332  on 26  degrees of freedom
## AIC: 273.4
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.821 
##           Std. Err.:  0.758 
## 
##  2 x log-likelihood:  -267.398
# Calculate the median window from models within 95% confidence interval of the best model
medwin(output[[18]]$Dataset)
## $`Median Window Open`
## [1] 135
## 
## $`Median Window Close`
## [1] 58
# Randomisation test to assess if the detected signal is likely by chance
warm_day_randwin <- randwin(repeats   = 10,
                            window    = "sliding",
                            xvar      = list(warm_day_bi = extreme_weather_binary$warm_day),
                            cdate     = extreme_weather_binary$date,
                            bdate     = breeding_data$date,
                            baseline  = glm.nb(chicks ~ 1,
                                               link = "log",
                                               data = breeding_data),
                            cohort    = breeding_data$season,
                            cinterval = "day",
                            range     = c(195, 0),
                            refday    = c(14, 04),
                            type      = "absolute",
                            stat      = c("sum"),
                            func      = c("lin")
                            )
# Calculate the p-value using Climwin Metric C
climwin::pvalue(dataset     = output[[18]]$Dataset,
                datasetrand = warm_day_randwin[[1]],
                metric      = "C",
                sample.size = sample_size
                )
## [1] 0.8849574
# Plot sliding window and randomisation result
climwin::plotall(dataset        = output[[18]]$Dataset,
                 datasetrand    = warm_day_randwin[[1]],
                 bestmodel      = output[[18]]$BestModel,
                 bestmodeldata  = output[[18]]$BestModelData,
                 arrow          = TRUE
                 )

3 Colinearity

Check for colinearity between the climate signals.

# Add climate signals to the original breeding data
breeding_data <- breeding_data %>%
  dplyr::mutate(
                extreme_wave_energy_signal = output[[17]]$BestModelData$climate,
               )

4 Final model

# Final model
final_model <- glm.nb(chicks ~ 1 + extreme_wave_energy_signal ,
                      link = "log",
                      data = breeding_data)


# Summary of the final model
summary(final_model)
## 
## Call:
## glm.nb(formula = chicks ~ 1 + extreme_wave_energy_signal, data = breeding_data, 
##     link = "log", init.theta = 3.790385764)
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 4.8147433  0.2035736  23.651  < 2e-16 ***
## extreme_wave_energy_signal -0.0006305  0.0001293  -4.878 1.07e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.7904) family taken to be 1)
## 
##     Null deviance: 49.687  on 27  degrees of freedom
## Residual deviance: 28.603  on 26  degrees of freedom
## AIC: 264.68
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.79 
##           Std. Err.:  1.04 
## 
##  2 x log-likelihood:  -258.681

4.1 Model diagnostics

# Creates scaled residuals by simulating from the fitted model
simulationOutput <- simulateResiduals(fittedModel = final_model, plot = TRUE)

# Test for over/underdispersion
testDispersion(simulationOutput, plot = TRUE)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1005, p-value = 0.584
## alternative hypothesis: two.sided

4.2 Visualise the signal

# Plot 1
effect_plot(final_model, pred = extreme_wave_energy_signal, interval = TRUE, plot.points = TRUE,
            main.title = "Relationship between Chick count and Extreme wave energy",
            x.label = "Sum of extreme wave energy (kW/m)", 
            y.label = "Chick count", 
            colors = c("#7B8FA1"),
            line.colors = c("#0D92F4"),
            line.thickness = 1,
            point.size = 2.5, 
            point.alpha = 0.5,
            rug = TRUE) +  
  drop_gridlines() + 
  theme_classic()

4.3 Save data

# Save the final model data with climate signals
write_xlsx(breeding_data, "Output_data/SHAL/SHAL_Pedra_relative_signal.xlsx")


# Save 95% confidence set for each climate signal
saveRDS(output[[17]]$Dataset, "Output_data/SHAL/Pedra_extreme_wave_energy_dataset.rds")

6 Structural equation model

# Standardise each numeric column to have mean zero and standard deviation of one
breeding_data_standardised <- breeding_data %>%
  mutate(across(where(is.numeric), ~ ( . - mean(.) ) / sd(.)))


# Extract individual regressions from SEM
m1 <- lm(extreme_wave_energy_signal ~ season, data = breeding_data_standardised)
m2 <- lm(chicks ~ extreme_wave_energy_signal + season, data = breeding_data_standardised)

# Summarise individual regressions
summary(m1)
## 
## Call:
## lm(formula = extreme_wave_energy_signal ~ season, data = breeding_data_standardised)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.19409 -0.61034  0.03468  0.32634  2.20912 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.373e-15  1.543e-01   0.000 1.000000    
## season       5.983e-01  1.571e-01   3.807 0.000771 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8166 on 26 degrees of freedom
## Multiple R-squared:  0.3579, Adjusted R-squared:  0.3332 
## F-statistic: 14.49 on 1 and 26 DF,  p-value: 0.0007715
summary(m2)
## 
## Call:
## lm(formula = chicks ~ extreme_wave_energy_signal + season, data = breeding_data_standardised)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0069 -0.4243  0.1475  0.3246  1.1189 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.045e-15  1.030e-01   0.000    1.000    
## extreme_wave_energy_signal -5.581e-02  1.309e-01  -0.426    0.674    
## season                     -8.168e-01  1.309e-01  -6.239 1.59e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5451 on 25 degrees of freedom
## Multiple R-squared:  0.7248, Adjusted R-squared:  0.7028 
## F-statistic: 32.93 on 2 and 25 DF,  p-value: 9.881e-08
# Check model assumptions for individual regressions
par(mfrow = c(2, 2))
plot(m1)

plot(m2)

# Define the SEM model
sem_model <- '
  # Regress breeding success on climate signals and time
  chicks ~ extreme_wave_energy_signal + season
  
  # Regress climate signals on time
  extreme_wave_energy_signal ~ season

  # Define intercept for breeding success
  chicks ~ 1
'

# Fit the SEM model with non-parametric bootstrapping with 1000 iterations
set.seed(666)
fit <- lavaan::sem(sem_model, data = breeding_data_standardised, se = "bootstrap", bootstrap = 1000)


# Summarise the model fit
summary(fit, fit.measures = TRUE, rsquare=TRUE)
## lavaan 0.6-20 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         7
## 
##   Number of observations                            28
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                48.537
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)                -54.174
##   Loglikelihood unrestricted model (H1)        -54.174
##                                                       
##   Akaike (AIC)                                 122.347
##   Bayesian (BIC)                               131.673
##   Sample-size adjusted Bayesian (SABIC)        109.909
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             1000
##   Number of successful bootstrap draws            1000
## 
## Regressions:
##                                Estimate  Std.Err  z-value  P(>|z|)
##   chicks ~                                                        
##     extrm_wv_nrgy_               -0.056    0.117   -0.478    0.633
##     season                       -0.817    0.135   -6.037    0.000
##   extreme_wave_energy_signal ~                                    
##     season                        0.598    0.174    3.432    0.001
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .chicks            0.000    0.106    0.000    1.000
##    .extrm_wv_nrgy_   -0.000    0.148   -0.000    1.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .chicks            0.265    0.061    4.338    0.000
##    .extrm_wv_nrgy_    0.619    0.178    3.475    0.001
## 
## R-Square:
##                    Estimate
##     chicks            0.725
##     extrm_wv_nrgy_    0.358
# Extract parameter estimates and standard errors using parameterEstimates()
params <- lavaan::parameterEstimates(fit)
relationships <- params[params$op == "~", ]


# Create a data frame with the desired results
results <- data.frame(
  SEM_int = params$est[params$lhs == "chicks" & params$op == "~1"], 
  SEM_beta_wave_energy = params$est[params$lhs == "chicks" & params$rhs == "extreme_wave_energy_signal"], 
  SEM_beta_season = params$est[params$lhs == "chicks" & params$rhs == "season"], 
  SEM_SE_wave_energy = params$se[params$lhs == "chicks" & params$rhs == "extreme_wave_energy_signal"], 
  SEM_SE_season = params$se[params$lhs == "chicks" & params$rhs == "season"],
  Yr_beta_wave_energy = params$est[params$lhs == "extreme_wave_energy_signal" & params$rhs == "season"], 
  Yr_SE_wave_energy = params$se[params$lhs == "extreme_wave_energy_signal" & params$rhs == "season"]
) %>%
  mutate(
    EWE_pathway = abs(SEM_beta_wave_energy) * abs(Yr_beta_wave_energy),
    Total_effect_season = abs(SEM_beta_season) + abs(EWE_pathway),
    change_due_to_EWE_pathway = (abs(EWE_pathway) / abs(Total_effect_season)) * 100
  )


# Print results
print(results)
##        SEM_int SEM_beta_wave_energy SEM_beta_season SEM_SE_wave_energy
## 1 2.952599e-15          -0.05580952      -0.8168107          0.1168243
##   SEM_SE_season Yr_beta_wave_energy Yr_SE_wave_energy EWE_pathway
## 1      0.135295           0.5982794         0.1743461  0.03338969
##   Total_effect_season change_due_to_EWE_pathway
## 1           0.8502004                  3.927273
# Add arrow colors, line styles, and significance stars based on significance and direction
relationships <- relationships %>%
  mutate(
    color = case_when(
      as.numeric(est) > 0 & as.numeric(pvalue) < 0.05 ~ "#0079FF",  # Positive and significant = blue
      as.numeric(est) < 0 & as.numeric(pvalue) < 0.05 ~ "#FF2929",   # Negative and significant = red
      TRUE ~ "#B7B7B7"  # Non-significant = grey
    ),
    style = ifelse(as.numeric(pvalue) < 0.05, "solid", "dashed"),  # Significant = solid, Non-significant = dashed
    unit = case_when(
      lhs == "chicks" & rhs == "season" ~ "no./season",  
      lhs == "chicks" & rhs == "extreme_wave_energy_signal" ~ "no./kW m⁻¹",  
      lhs == "extreme_wave_energy_signal" ~ "kW m⁻¹/season",  
      TRUE ~ "unit"  # Default unit for other relationships
    ),
    stars = case_when(  # Add stars based on significance level
      pvalue < 0.001 ~ "***",
      pvalue < 0.01 ~ "**",
      pvalue < 0.05 ~ "*",
      TRUE ~ ""
    )
  )


# Extract R² values
rsq <- lavaan::inspect(fit, "rsquare")
node_labels <- c(
  paste0("Chick count\nr² = ", round(rsq["chicks"], 2)),
  paste0("Extreme wave energy\nr² = ", round(rsq["extreme_wave_energy_signal"], 2)),
  "Season"
)

# Construct edges for the plot, adding significance stars in the label
edges <- apply(relationships, 1, function(row) {
  paste0(
    "\"", row["rhs"], "\" -> \"", row["lhs"], "\" ",  # Add double quotes to handle special characters in node names
    "[color=\"", row["color"], "\", style=", row["style"], 
    ", label=\"", round(as.numeric(row["est"]), 2), row["stars"], "\n(", row["unit"], ")\", fontsize=10]"
  )
})


# Generate DiagrammeR graph with rectangular nodes and custom labels
graph_code <- paste0(
  "digraph SEM {",
  "\nnode [shape=rectangle, style=filled, fillcolor=white];", 
  "\n", paste(edges, collapse = "\n"), 
  "\n", paste0(
    "\"chicks\" [label=\"Chick count\\nr² = ", round(rsq["chicks"], 2), "\", fontsize=12];",
    "\"extreme_wave_energy_signal\" [label=\"Extreme wave energy\\nr² = ", round(rsq["extreme_wave_energy_signal"], 2), "\", fontsize=12];",
    "\"season\" [label=\"Season\", fontsize=12];"
  ),
  "\n}"
)


# Render the DiagrammeR graph
DiagrammeR::grViz(graph_code)