## Script to produce figures and stats for Predator's Pick
## J Smith
## 16 Oct 2025

## Packages
library(tidyverse)
library(glmmTMB) # allows zero-inflated model
library(emmeans) # marginal means for pairwise comparisons
library(marginaleffects) # allows incorporation of the zero inflation
library(MASS) # for negative binomial

##### Figure 1 - Predation Rate (live and roe) #####
## Data comes from Exp 1 (live) and Exp 3 (roe)

# Data from ICES rds
data <- readRDS("data_files/df1.RDS")

# Rename columns
urchins <- data %>%
  filter(Treatment == "Consumption",
         Trial_Species == "Centro" | Trial_Species == "Helio") %>%
  mutate(Lobster_fam = case_when(Origin == "Elephant Rock" ~ "Familiar",
                                 Origin == "Crayfish Point" ~ "Naïve"))

#### BOX PLOT (Fig 1A & B) ####
urchins %>%
  ggplot(aes(x = Size_Class, y = Rate_perglob, fill = Trial_Species))+
  geom_boxplot()+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  scale_x_discrete(labels= c("Large","Medium","Small")) +
  ylab("Predation rate (g/day/glob)") + xlab("Lobster size class")+
  theme_bw(base_size = 14)+
  facet_wrap(~Lobster_fam, ncol=1)

# ggsave("plots/Fig1/pred_per_day_glob_box.jpg", height = 6, width = 6)

## STATS
# Use a glm() but zero-inflated gamma, this accounts for lots of zeros and a positive skew
# gram of urchin per gram of lobster
mod_ices_glob <- glmmTMB(Rate_perglob ~ Trial_Species + Size_Class + Lobster_fam +
                           Trial_Species:Size_Class + 
                           Trial_Species:Lobster_fam,
                         family=ziGamma(link="log"),
                         ziformula=~ Trial_Species + Size_Class + Lobster_fam,
                         data= urchins) 

summary(mod_ices_glob)

# Predict from the model
preds <- predictions(mod_ices_glob, newdata = urchins, type = "response") # this warning relates to random (non-fixed) factors - can be ignored

avg_preds <- avg_predictions(mod_ices_glob, variables = c("Trial_Species", "Size_Class", "Lobster_fam"))

as.data.frame(avg_preds) %>%
  ggplot(aes(x = Size_Class, y = estimate, fill = Trial_Species))+
  geom_col(position = "dodge2")+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, colour = "skyblue2", position=position_dodge(.9))+
  geom_errorbar(aes(ymin = estimate-std.error, ymax = estimate+std.error), width = 0.3, colour = "black", linewidth =  0.7, position=position_dodge(.9))+
  facet_wrap(~Lobster_fam, ncol = 1)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  scale_x_discrete(labels= c("Large","Medium","Small")) +
  ylab("modelled Predation rate (urchins/day)") + xlab("Lobster size class")+
  theme_bw(base_size = 14)

# Get estimated marginal means on the conditional model
emm_size <- emmeans(mod_ices_glob, ~ Size_Class, type = "response")

# Get all pairwise comparisons
# these are the stats reported for fig 1
pairs(emm_size)
plot(emm_size, comparisons = TRUE)

#######--------########
# Data from NN study (Exp3)
roedf <- readRDS("data_files/df2.rds")

#### BAR PLOT (Fig 1C) ####
summary_roedf <- roedf %>% 
  group_by(Species, Size) %>%
  summarise(n = n(), 
            eaten = sum(Eaten),
            perc = eaten/n*100) %>%
  mutate(Lobster_fam = "Naïve")

summary_roedf %>%
  ggplot(aes(x = Size, y = perc, fill = Species)) +
  geom_col(position = "dodge")+
  labs(#title = "Percentage of roe consumed (n = 29 lobsters)",
    y = "Roe consumption (% of lobsters)",
    x = "Lobster size class") +
  scale_x_discrete(labels= c("Large","Medium","Small")) +
  scale_fill_manual(values=c("mediumpurple2", "forestgreen"),labels=c("Centro","Helio"))+
  theme_bw(base_size = 14)+
  facet_wrap(~Lobster_fam, ncol=1)

#ggsave("plots/Fig1/roe_consumed_origin.jpg", height = 3.5, width = 5.5)





#######--------########
##### Figure 2 - Finescale behvaiour comparisons (live and roe) #####

df <- readRDS("data_files/df3.rds")
interactions <- df %>% filter(Interaction == "I")
predations <- df %>% filter(Interaction == "P")


#### Fig 2A - pc eaten ####
totals <- readRDS("data_files/df4.rds")

totals %>%
  filter(!is.na(Species))%>%
  ggplot(aes(x = Species, y = pc_eaten, fill = Species))+
  geom_col()+
  # labs(title = "Urchins/roe consumed")+
  ylab("% consumed")+
  facet_wrap(~Exp)+
  theme_bw(base_size = 14)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))

#ggsave("plots/Fig2_panel/pc_eaten_combined.jpg", height = 4, width = 5)

#### Fig 2B - Duration of interations (not inc. the predation) ####

# use glm() bc we are dealing with small sample size and skewed data
# (both violate ANOVA assumptions)
mod1 <- glm(duration_min ~ Exp*Species, family=Gamma(link="log"), data = interactions)
summary(mod1)

emm <- emmeans(mod1, ~ Exp * Species)  # m = your glm
pairs(emm)                          # all pairwise comparisons
summary(emm, type = "response")     # back-transformed means (in minutes)

# Pairwise comparisons with Satterthwaite approximation
contrast_results <- contrast(emm, method = "pairwise", 
                             adjust = "bonferroni")  # You can also choose other adjustments
summary(contrast_results)

# Turn into a plain data.frame (back-transformed)
emm_df <- as.data.frame(summary(emm, type = "response"))

emm_df %>%
  ggplot(aes(x = Species, y = response, fill = Species))+
  geom_col()+
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, colour = "skyblue2")+
  geom_errorbar(aes(ymin = response-SE, ymax = response+SE), width = 0.3, colour = "black", linewidth =  0.7)+
  facet_wrap(~Exp)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  #  labs(title = "Average interaction duration")+
  ylab("Interation duration (mins)")+
  theme_bw(base_size = 14)

# ggsave("plots/Fig2_panel/2B_glm.jpg", height = 4, width = 5)

#### Fig 2C - Interactions count ####

df_count <- readRDS("data_files/df5.rds")

mod2 <- glm.nb(n ~ Exp * Species, data = df_count)
summary(mod2)

emm2 <- emmeans(mod2, ~ Exp * Species)
pairs(emm2)                          # all pairwise comparisons
summary(emm2, type = "response")     # back-transformed means (in minutes)

# Pairwise comparisons with Satterthwaite approximation
contrast_results <- contrast(emm2, method = "pairwise", 
                             adjust = "bonferroni")  # You can also choose other adjustments
summary(contrast_results)

# Turn into a plain data.frame (back-transformed)
emm_df2 <- as.data.frame(summary(emm2, type = "response"))

emm_df2 %>%
  ggplot(aes(x = Species, y = response, fill = Species))+
  geom_col()+
  geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.2, colour = "skyblue2")+
  geom_errorbar(aes(ymin = response-SE, ymax = response+SE), width = 0.3, colour = "black", linewidth =  0.7)+
  facet_wrap(~Exp)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  #  labs(title = "Interactions before predation")+
  ylab("Interactions (n)")+
  theme_bw(base_size = 14)

# ggsave("plots/Fig2_panel/2C_glm.jpg", height = 4, width = 5)

#### Fig 2D - Interactions per hour until a predation ####

time_to <- predations %>%
  dplyr::select(Exp, Tank, Night, ID, time_to_pred)

h <- df_count %>%
  left_join(time_to)

# In some trials two urchins were eaten, these trials are removed here
h <- h %>%
  slice(-c(2, 3, 15, 16, 22, 23)) %>% # removing the trials where two urchins were eaten
  mutate(int_hour = n/time_to_pred*60)

# use glm() bc we are dealing with small sample size and skewed data
# (both violate ANOVA assumptions)

mod3 <- glm(int_hour ~ Exp*Species, family=Gamma(link="log"), data = h)
summary(mod3)

emm3 <- emmeans(mod3, ~ Exp * Species)  # m = your glm
pairs(emm3)                          # all pairwise comparisons
summary(emm3, type = "response")     # back-transformed means (in minutes)


# Pairwise comparisons with Satterthwaite approximation
contrast_results <- contrast(emm3, method = "pairwise", 
                             adjust = "bonferroni")  # You can also choose other adjustments
summary(contrast_results)

# Turn into a plain data.frame (back-transformed)
emm_df3 <- as.data.frame(summary(emm3, type = "response"))

emm_df3 %>%
  ggplot(aes(x = Species, y = response, fill = Species))+
  geom_col()+
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, colour = "skyblue2")+
  geom_errorbar(aes(ymin = response-SE, ymax = response+SE), width = 0.3, colour = "black", linewidth =  0.7)+
  facet_wrap(~Exp)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  #  labs(title = "Interactions per hour")+
  ylab("Interactions (n / hr)")+
  theme_bw(base_size = 14)

# ggsave("plots/Fig2_panel/2D_glm.jpg", height = 4, width = 5)

#### Fig 2E - Time to predation ####

# use glm() bc we are dealing with small sample size and skewed data
# (both violate ANOVA assumptions)

mod4 <- glm(time_to_pred ~ Exp*Species, family=Gamma(link="log"), data = predations)
summary(mod4)

emm4 <- emmeans(mod4, ~ Exp * Species)  # m = your glm
pairs(emm4)                          # all pairwise comparisons
summary(emm4, type = "response")     # back-transformed means (in minutes)

# Pairwise comparisons with Satterthwaite approximation
contrast_results <- contrast(emm4, method = "pairwise", 
                             adjust = "bonferroni")  # You can also choose other adjustments
summary(contrast_results)

# Turn into a plain data.frame (back-transformed)
emm_df4 <- as.data.frame(summary(emm4, type = "response"))

emm_df4 %>%
  ggplot(aes(x = Species, y = response, fill = Species))+
  geom_col()+
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, colour = "skyblue2")+
  geom_errorbar(aes(ymin = response-SE, ymax = response+SE), width = 0.3, colour = "black", linewidth =  0.7)+
  facet_wrap(~Exp)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  #  labs(title = "Time to pred")+
  ylab("Time until consumption (mins)")+
  theme_bw(base_size = 14)

#ggsave("plots/Fig2_panel/2E_glm.jpg", height = 4, width = 5)






#### Fig 2F - Handling time (duration of the predation event) ####

# use glm() bc we are dealing with small sample size and skewed data
# (both violate ANOVA assumptions)

mod5 <- glm(duration_min ~ Exp*Species, family=Gamma(link="log"), data = predations)
summary(mod5)

emm5 <- emmeans(mod5, ~ Exp * Species)  # m = your glm
pairs(emm5)                          # all pairwise comparisons
summary(emm5, type = "response")     # back-transformed means (in minutes)

# Pairwise comparisons with Satterthwaite approximation
contrast_results <- contrast(emm5, method = "pairwise", 
                             adjust = "bonferroni")  # You can also choose other adjustments
summary(contrast_results)

# Turn into a plain data.frame (back-transformed)
emm_df5 <- as.data.frame(summary(emm5, type = "response"))


emm_df5 %>%
  ggplot(aes(x = Species, y = response, fill = Species))+
  geom_col()+
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, colour = "skyblue2")+
  geom_errorbar(aes(ymin = response-SE, ymax = response+SE), width = 0.3, colour = "black", linewidth =  0.7)+
  facet_wrap(~Exp)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  #  labs(title = "Handling time")+
  ylab("Handling time (min)")+
  theme_bw(base_size = 14)

#ggsave("plots/Fig2_panel/2F_glm.jpg", height = 4, width = 5)


#######--------########
##### Figure 3 - handling time and size classes #####

#### Fig 3A - Handling time ####
h_time <- readRDS("data_files/df6.rds")

# Fit the negative binomial model
nb_model <- glm.nb(duration_min ~ Species * TD, data = h_time)

summary(nb_model)

#Predict values in new df
newdata <- expand.grid(
  TD = seq(min(h_time$TD), max(h_time$TD), length.out = 100),
  Species = unique(h_time$Species)
)

#Predict with confidence intervals
preds_ci <- predict(nb_model, newdata, type = "link", se.fit = TRUE)

# Calculate 95% CI on the link scale
newdata$fit <- preds_ci$fit
newdata$se <- preds_ci$se.fit
newdata$ci_lower <- newdata$fit - 1.96 * newdata$se
newdata$ci_upper <- newdata$fit + 1.96 * newdata$se

# Convert back to response scale
newdata$predicts <- exp(newdata$fit)
newdata$ci_lower <- exp(newdata$ci_lower)
newdata$ci_upper <- exp(newdata$ci_upper)

ggplot() +
  geom_point(data = h_time, aes(x = TD, y = duration_min, colour = Species), size = 4, alpha = 0.5) +
  geom_line(data = newdata, aes(x = TD, y = predicts, colour = Species), linewidth = 1.2) +
  geom_ribbon(data = newdata, aes(x = TD, ymin = ci_lower, ymax = ci_upper, fill = Species), alpha = 0.09, colour = NA) +
  labs(
    #title = "Negative Binomial Fit with 95% Confidence Intervals",
    x = "TD", y = "Duration (min)") +
  scale_colour_manual(values = c("Centro" = "purple", "Helio" = "darkgreen"))+
  scale_fill_manual(values = c("Centro" = "purple", "Helio" = "darkgreen"))+
  theme_bw(base_size = 14)+
  coord_cartesian(ylim = c(0, 150))

#ggsave("plots/Fig3/Fig_3handling_time_TD_species_negbin.png", width = 5, height = 4)

# Where does the CI_lower of the centro part with CI_Upper of Helio?
em <- emmeans(nb_model, ~ Species | TD, at = list(TD = seq(10, 60, by = 1)))
pairs(em)
# TD = 35:
# contrast       estimate    SE  df z.ratio p.value
# Centro - Helio  0.37663 0.191 Inf   1.971  0.0487
# At TD smaller than 35, Centro and Helio handling time is not significantly different













#### Fig 3B & 3C - Compare behaviour by urchin size ####

# Fig 3B - Split up handling time in live urchin data by large and small TD <35

predations_live <- predations %>%
  filter(Exp == "Live") %>%
  mutate(Urchin_size = case_when(TD < 34 ~ "Small urchin", # smaller than 34, small
                                 TD > 34 ~ "Large urchin", # larger than 34, large
                                 TRUE ~ "Small urchin")) # if it is 34, then small

mod5_size <- glm.nb(duration_min ~ Species*Urchin_size, data = predations_live)
summary(mod5_size)

emm5_size <- emmeans(mod5_size, ~  Species*Urchin_size)  # m = your glm
pairs(emm5_size)                          # all pairwise comparisons
summary(emm5_size, type = "response")     # back-transformed means (in minutes)

# Pairwise comparisons with Satterthwaite approximation
contrast_results <- contrast(emm5_size, method = "pairwise", 
                             adjust = "bonferroni")  # You can also choose other adjustments
summary(contrast_results)

# Turn into a plain data.frame (back-transformed)
emm_df5_size <- as.data.frame(summary(emm5_size, type = "response"))

emm_df5_size %>%
  ggplot(aes(x = Species, y = response, fill = Species))+
  geom_col()+
  geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.2, colour = "skyblue2")+
  geom_errorbar(aes(ymin = response-SE, ymax = response+SE), width = 0.3, colour = "black", linewidth =  0.7)+
  facet_wrap(~Urchin_size)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  #  labs(title = "Handling time (live)")+
  ylab("Handling time (mins)")+
  theme_bw(base_size = 14)

#ggsave("plots/Fig3/3B_glm.jpg", height = 4, width = 5)

# Fig 3C - Split up number of interactions

time_to_live <- predations %>%
  filter(Exp == "Live") %>%
  mutate(Urchin_size = case_when(TD < 34 ~ "Small urchin",
                                 TD > 34 ~ "Large urchin",
                                 TRUE ~ "Small urchin")) %>%
  dplyr::select(Tank, Night, time_to_pred, Urchin_size)

h_live <- time_to_live %>%
  left_join(df_count)

# In some trials two urchins were eaten, these trials are removed here
h_live <- h_live %>%
  slice(-c(2, 3, 15, 16, 22, 23)) %>% # removing the trials where two urchins were eaten
  mutate(int_hour = n/time_to_pred*60)

mod3_size <- glm(int_hour ~ Species*Urchin_size, family=Gamma(link="log"), data = h_live)
summary(mod3_size)

emm3_size <- emmeans(mod3_size, ~ Species * Urchin_size)  # m = your glm
pairs(emm3_size)                          # all pairwise comparisons
summary(emm3_size, type = "response")     # back-transformed means (in minutes)

# Pairwise comparisons with Satterthwaite approximation
contrast_results <- contrast(emm3_size, method = "pairwise", 
                             adjust = "bonferroni")  # You can also choose other adjustments
summary(contrast_results)

# Turn into a plain data.frame (back-transformed)
emm_df3_size <- as.data.frame(summary(emm3_size, type = "response"))

emm_df3_size %>%
  ggplot(aes(x = Species, y = response, fill = Species))+
  geom_col()+
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, colour = "skyblue2")+
  geom_errorbar(aes(ymin = response-SE, ymax = response+SE), width = 0.3, colour = "black", linewidth =  0.7)+
  facet_wrap(~Urchin_size)+
  scale_fill_manual(values = c("Centro" = "mediumpurple2", "Helio" = "forestgreen"))+
  #  labs(title = "Interactions per hour before predation")+
  ylab("Interactions (n / hr)")+
  theme_bw(base_size = 14)

#ggsave("plots/Fig3/3C_glm.jpg", height = 4, width = 5)





#######--------########

##### Figure 4 - combining predations from all experiments #####

fig4_df <- readRDS("data_files/df7.rds")

# Model the eaten urchins by size of urchin and lobster
eaten <- fig4_df %>%
  filter(Eaten == "Yes") %>%
  filter(Trimmed == "UT") # remove trimmed urchins from the model

# Fit exponential
mod_exp <- nls(TD ~ a * exp(b * Carapace), data = eaten, start = list(a = 5.12, b = 0.023))
summary(mod_exp)

fig4_df %>%
  mutate(Eaten_Trim = interaction(Eaten, Trimmed, sep = "_")) %>%
  ggplot(aes(x = Carapace, y = TD, colour = Lobster, shape = Eaten_Trim)) +
  geom_point(size = 4, alpha = 0.8) +
  scale_shape_manual(values = c("Yes_T" = 17, "Yes_UT" = 16, "No_T" = 2, "No_UT" = 1),
                     labels = c("Yes_T" = "Trimmed eaten", "Yes_UT" = "Untrimmed eaten", "No_T" = "Trimmed survived", "No_UT" = "Untrimmed survived"),
                     name = "Urchin spines / survival")+
  scale_colour_manual(values = c("Naive" = "forestgreen", "Familiar" = "skyblue3"),
                      name = "Lobster familiarity")+
  # labs(title = "Combining three experiments of captive lobsters predating on Centro") +
  xlab("Lobster Carapace Length (mm)") + ylab("Urchin Test Diameter (mm)")+
  # Ling: theoretical limit
  geom_line(
    data = tibble(x = 50:180, y = 5.12 * exp(0.023 * x)),
    aes(x = x, y = y),
    colour = "black",
    linetype = "dotted",
    linewidth = 0.7,
    inherit.aes = FALSE) + 
  # the exp fit for our data for eaten urchins
  geom_line(
    data = tibble(x=104:180, y=1.3*(11.91*exp(0.0093*x))),
    aes(x=x, y=y),
    colour = "black", 
    linetype="dashed", 
    linewidth = 0.7,
    inherit.aes = FALSE)+
  ylim(0, 130) +
  theme_bw(base_size = 14)

#ggsave("plots/Fig4_exp.jpg", width = 8, height = 6) 

#### Summary Statistics ####
# how many trials and how many eaten
fig4_df %>%
  summarise(total = n(),
            eaten = sum(Eaten == "Yes"))

