library(tidyverse)
library(vegan)
library(indicspecies)

## 0) Define target spider species ####
all_spiders <- c("Erigone.dentipalpis", "Oedothorax.apicatus", "Erigone.atra",
                 "Tenuiphantes.tenuis", "Agyneta.rurestris", "Pardosa.agrestis",
                 "Bathyphantes.gracilis", "Trochosa.ruricola", "Pardosa.spp.",
                 "Pachygnatha.degeeri", "Araeoncus.humilis", "Oedothorax.retusus",
                 "Pardosa.palustris", "Oedothorax.agrestis", "Drassyllus.spp.",
                 "Gnaphosidae.spp.", "Porrhomma.spp.", "Diplostyla.concolor",
                 "Micaria.spp.", "Pardosa.amentata", "Pelecopsis.parallela",
                 "Pardosa.agricolae", "Phrurolithus.minimus", "Xysticus.cochi",
                 "Xysticus.spp.", "Histopona.torpida", "Callilepis.nocturna",
                 "Coelotes.terrestris", "Haplodrassus.minor", "Oedothorax.fuscus",
                 "Ostearius.melanophygius", "Tenuiphantes.spp.", "Tetragnatidae.spp.",
                 "Tibellus.oblungus", "Trochosa.spp.", "Diplocephalus.cristatus",
                 "Robertus.spp.", "Ozyptila.simplex", "Pelecopsis.parallela",
                 "Trachyzelothes.pedestris", "Walckenaeria.vigilax",
                 "Centromerus.cavenarum", "Drassyllus.pusillus")


## 1) Read data and join days_standing ####

# 2021a
df1_s <- read.csv("2021a spiders.csv", sep = ";", header = TRUE) %>%
  dplyr::select(DATE, BLOCK, TREATMENT, ROW, DIRECTION, SURROUNDING,
                intersect(names(.), all_spiders)) %>%
  mutate(DATE = as.Date(DATE)) %>%
  filter(DATE < as.Date("2021-08-01"))

date_mapping_4 <- tibble(
  DATE = as.Date(c("2021-06-17", "2021-07-12")),
  days_standing = c(3, 5)
)

df1_s <- df1_s %>%
  inner_join(date_mapping_4, by = "DATE") %>%
  mutate(TRIAL = "2021.RS")

# 2021b
df2_s <- read.csv("2021b spiders.csv", sep = ";", header = TRUE) %>%
  dplyr::select(DATE, BLOCK, TREATMENT, ROW, DIRECTION, SURROUNDING,
                intersect(names(.), all_spiders)) %>%
  mutate(DATE = as.Date(DATE)) %>%
  filter(DATE < as.Date("2021-08-01"))

date_mapping_5 <- tibble(
  DATE = as.Date(c("2021-06-18", "2021-07-16")),
  days_standing = c(3, 4)
)

df2_s <- df2_s %>%
  inner_join(date_mapping_5, by = "DATE") %>%
  mutate(TRIAL = "2021.MS")

# 2022
df3_s <- read.csv("2022 spiders.csv", sep = ";", header = TRUE) %>%
  dplyr::select(DATE, BLOCK, TREATMENT, ROW,
                intersect(names(.), all_spiders)) %>%
  mutate(DATE = as.Date(DATE))

date_mapping_6 <- tibble(
  DATE = as.Date(c("2022-06-13", "2022-07-11")),
  days_standing = c(3, 4)
)

df3_s <- df3_s %>%
  inner_join(date_mapping_6, by = "DATE") %>%
  mutate(TRIAL = "2022.MS")


## 2) Harmonise columns: add dummy DIRECTION/SURROUNDING for 2022 ####

if (!"DIRECTION"   %in% names(df3_s)) df3_s$DIRECTION   <- "std"
if (!"SURROUNDING" %in% names(df3_s)) df3_s$SURROUNDING <- "mulch"  # only one trap type


## 3) Combine trials and select trap types (mulch traps for WT/S/SIL) ####

combined_df_s <- bind_rows(df1_s, df2_s, df3_s)

combined_df_s <- combined_df_s %>%
  filter(
    TREATMENT == "C" |
      (TREATMENT %in% c("WT", "S", "SIL") & SURROUNDING == "mulch")
  )

meta_cols_s <- c("TRIAL","DATE","BLOCK","TREATMENT","ROW",
                 "DIRECTION","SURROUNDING","days_standing")
species_cols_s <- setdiff(names(combined_df_s), meta_cols_s)


## 4) Long format and remove NA trap records ####

combined_long_s <- combined_df_s %>%
  pivot_longer(cols = all_of(species_cols_s),
               names_to = "species",
               values_to = "count") %>%
  filter(!is.na(count))


## 5) Aggregate to row level and correct for sampling effort ####
# sampling effort = sum of trap-days per row
# activity density = total individuals / total trap-days

row_agg_s <- combined_long_s %>%
  group_by(TRIAL, TREATMENT, BLOCK, ROW, DATE, DIRECTION, SURROUNDING, species) %>%
  summarise(
    count_day     = sum(count),
    days_standing = first(days_standing),
    .groups = "drop"
  ) %>%
  group_by(TRIAL, TREATMENT, BLOCK, ROW, species) %>%
  summarise(
    total_count    = sum(count_day),
    total_trapdays = sum(days_standing),
    .groups = "drop"
  ) %>%
  filter(total_trapdays > 0) %>%
  mutate(activity_density = total_count / total_trapdays)


## 6) Community matrix for NMDS (row level) ####

row_agg_s <- row_agg_s %>%
  unite("Group", TRIAL, TREATMENT, BLOCK, ROW, sep = "_", remove = FALSE)

# remove species with zero total activity across all rows
row_agg_s <- row_agg_s %>%
  group_by(species) %>%
  filter(sum(activity_density) > 0) %>%
  ungroup()

comm_mat_s <- row_agg_s %>%
  dplyr::select(Group, species, activity_density) %>%
  pivot_wider(names_from = species,
              values_from = activity_density,
              values_fill = 0) %>%
  as.data.frame()

lookup_rows_s <- comm_mat_s %>%
  dplyr::select(Group) %>%
  separate(Group, into = c("TRIAL","TREATMENT","BLOCK","ROW"),
           sep = "_", remove = FALSE)

rownames(comm_mat_s) <- comm_mat_s$Group
comm_mat_s <- as.matrix(comm_mat_s[ , -1, drop = FALSE])

# remove empty rows (no species activity)
nonempty_rows <- rowSums(comm_mat_s) > 0
comm_mat_s    <- comm_mat_s[nonempty_rows, , drop = FALSE]
lookup_rows_s <- lookup_rows_s[nonempty_rows, , drop = FALSE]


## 7) NMDS + plotting ####

# 2021.RS / 2021a
sel_RS_2021_s  <- lookup_rows_s$TRIAL == "2021.RS"
mat_RS_2021_s  <- comm_mat_s[sel_RS_2021_s, ]

dist_RS_2021_s <- vegdist(mat_RS_2021_s, method = "bray")
set.seed(23841)
nmds_RS_2021_s <- metaMDS(dist_RS_2021_s)

scores_RS_2021_s <- scores(nmds_RS_2021_s) %>%
  as_tibble(rownames = "Group") %>%
  inner_join(lookup_rows_s, by = "Group")

centroid_RS_2021_s <- scores_RS_2021_s %>%
  group_by(TREATMENT) %>%
  mutate(
    centroid1 = mean(NMDS1),
    centroid2 = mean(NMDS2),
    .groups   = "drop"
  )

scores_RS_2021_s %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = TREATMENT, fill = TREATMENT)) +
  theme_light() +
  stat_ellipse(geom = "polygon", level = 0.7, alpha = 0.1, show.legend = FALSE) +
  geom_point() +
  geom_point(data = centroid_RS_2021_s,
             mapping = aes(x = centroid1, y = centroid2, color = TREATMENT),
             size = 4, shape = 17) +
  labs(x = "NMDS 1", y = "NMDS 2",
       title = "spiders - trial 2021a") +
  theme(
    axis.text.x   = element_text(size = 8),
    axis.text.y   = element_text(size = 8),
    axis.title.x  = element_text(size = 9, colour = "gray30"),
    axis.title.y  = element_text(size = 9, colour = "gray30"),
    plot.title    = element_text(hjust = 0.98, size = 11, vjust = 0),
    panel.border  = element_rect(colour = "lightgray", linewidth = 0.5),
    legend.position = "none"
  ) +
  scale_color_manual(name = "treatment",
                     breaks = c("C", "WT", "S"),
                     labels = c("control", "triticale/vetch", "straw"),
                     values = c("#EE6677", "#CCBB44", "#4477AA")) +
  scale_fill_manual(name = "treatment",
                    breaks = c("C", "WT", "S"),
                    labels = c("control", "triticale/vetch", "straw"),
                    values = c("#EE6677", "#CCBB44", "#4477AA"))

ggsave("NMDS 2021a - spiders.pdf", width = 8, height = 7, units = "cm")


# 2021.MS / 2021b
sel_MS_2021_s  <- lookup_rows_s$TRIAL == "2021.MS"
mat_MS_2021_s  <- comm_mat_s[sel_MS_2021_s, ]

dist_MS_2021_s <- vegdist(mat_MS_2021_s, method = "bray")
set.seed(571903)
nmds_MS_2021_s <- metaMDS(dist_MS_2021_s)

scores_MS_2021_s <- scores(nmds_MS_2021_s) %>%
  as_tibble(rownames = "Group") %>%
  inner_join(lookup_rows_s, by = "Group")

centroid_MS_2021_s <- scores_MS_2021_s %>%
  group_by(TREATMENT) %>%
  mutate(
    centroid1 = mean(NMDS1),
    centroid2 = mean(NMDS2),
    .groups   = "drop"
  )

scores_MS_2021_s %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = TREATMENT, fill = TREATMENT)) +
  theme_light() +
  stat_ellipse(geom = "polygon", level = 0.7, alpha = 0.1, show.legend = FALSE) +
  geom_point() +
  geom_point(data = centroid_MS_2021_s,
             mapping = aes(x = centroid1, y = centroid2, color = TREATMENT),
             size = 4, shape = 17) +
  labs(x = "NMDS 1", y = "NMDS 2",
       title = "spiders - trial 2021b") +
  theme(
    axis.text.x   = element_text(size = 8),
    axis.text.y   = element_text(size = 8),
    axis.title.x  = element_text(size = 9, colour = "gray30"),
    axis.title.y  = element_text(size = 9, colour = "gray30"),
    plot.title    = element_text(hjust = 0.98, size = 11, vjust = 0),
    panel.border  = element_rect(colour = "lightgray", linewidth = 0.5),
    legend.position = "none"
  ) +
  scale_color_manual(name = "treatment",
                     breaks = c("C", "WT", "S"),
                     labels = c("control", "triticale/vetch", "straw"),
                     values = c("#EE6677", "#CCBB44", "#4477AA")) +
  scale_fill_manual(name = "treatment",
                    breaks = c("C", "WT", "S"),
                    labels = c("control", "triticale/vetch", "straw"),
                    values = c("#EE6677", "#CCBB44", "#4477AA"))

ggsave("NMDS 2021b - spiders.pdf", width = 8, height = 7, units = "cm")


# 2022.MS / 2022
sel_MS_2022_s  <- lookup_rows_s$TRIAL == "2022.MS"
mat_MS_2022_s  <- comm_mat_s[sel_MS_2022_s, ]

dist_MS_2022_s <- vegdist(mat_MS_2022_s, method = "bray")
set.seed(98217)
nmds_MS_2022_s <- metaMDS(dist_MS_2022_s)

scores_MS_2022_s <- scores(nmds_MS_2022_s) %>%
  as_tibble(rownames = "Group") %>%
  inner_join(lookup_rows_s, by = "Group")

centroid_MS_2022_s <- scores_MS_2022_s %>%
  group_by(TREATMENT) %>%
  mutate(
    centroid1 = mean(NMDS1),
    centroid2 = mean(NMDS2),
    .groups   = "drop"
  )

scores_MS_2022_s %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = TREATMENT, fill = TREATMENT)) +
  theme_light() +
  stat_ellipse(geom = "polygon", level = 0.7, alpha = 0.1, show.legend = FALSE) +
  geom_point() +
  geom_point(data = centroid_MS_2022_s,
             mapping = aes(x = centroid1, y = centroid2, color = TREATMENT),
             size = 4, shape = 17) +
  labs(x = "NMDS 1", y = "NMDS 2",
       title = "spiders - trial 2022") +
  theme(
    axis.text.x   = element_text(size = 8),
    axis.text.y   = element_text(size = 8),
    axis.title.x  = element_text(size = 9, colour = "gray30"),
    axis.title.y  = element_text(size = 9, colour = "gray30"),
    plot.title    = element_text(hjust = 0.98, size = 11, vjust = 0),
    panel.border  = element_rect(colour = "lightgray", linewidth = 0.5),
    legend.position = "none"
  ) +
  scale_color_manual(name = "treatment",
                     breaks = c("C", "WT", "SIL"),
                     labels = c("control", "triticale/vetch", "silage"),
                     values = c("#EE6677", "#CCBB44", "#228833")) +
  scale_fill_manual(name = "treatment",
                    breaks = c("C", "WT", "SIL"),
                    labels = c("control", "triticale/vetch", "silage"),
                    values = c("#EE6677", "#CCBB44", "#228833"))

ggsave("NMDS 2022 - spiders.pdf", width = 8, height = 7, units = "cm")


## 8) PERMANOVA (adonis2) and dispersion tests ####

pairwise_permanova <- function(dist_mat, meta_df, treat_col = "TREATMENT",
                               block_col = "BLOCK", seed = 446531) {
  treatments <- unique(meta_df[[treat_col]])
  pairs <- combn(treatments, 2, simplify = FALSE)
  
  pairwise_results <- lapply(pairs, function(pair) {
    subset_indices <- meta_df[[treat_col]] %in% pair
    subset_dist <- as.dist(as.matrix(dist_mat)[subset_indices, subset_indices])
    subset_meta <- meta_df[subset_indices, , drop = FALSE]
    
    set.seed(seed)
    result <- adonis2(subset_dist ~ subset_meta[[treat_col]],
                      strata = subset_meta[[block_col]])
    
    data.frame(
      Comparison = paste(pair[1], "vs", pair[2]),
      R2 = result$R2[1],
      F  = result$F[1],
      P  = result$`Pr(>F)`[1]
    )
  })
  
  pairwise_table <- do.call(rbind, pairwise_results)
  pairwise_table$P_adjusted <- p.adjust(pairwise_table$P, method = "bonferroni")
  pairwise_table
}

# 2021.RS / 2021a
dist <- dist_RS_2021_s
lookup <- scores_RS_2021_s %>%
  dplyr::select(Group, TRIAL, TREATMENT, BLOCK, ROW)

adonis2(dist ~ lookup$TREATMENT,
        strata = lookup$BLOCK)


# 2021.MS / 2021b
dist <- dist_MS_2021_s
lookup <- scores_MS_2021_s %>%
  dplyr::select(Group, TRIAL, TREATMENT, BLOCK, ROW)

adonis2(dist ~ lookup$TREATMENT,
        strata = lookup$BLOCK)


# 2022.MS / 2022
dist <- dist_MS_2022_s
lookup <- scores_MS_2022_s %>%
  dplyr::select(Group, TRIAL, TREATMENT, BLOCK, ROW)

adonis2(dist ~ lookup$TREATMENT,
        strata = lookup$BLOCK)

disp <- betadisper(dist, lookup$TREATMENT)
print(disp)
perm_test <- permutest(disp)
print(perm_test)

pairwise_table_MS_2022_s <- pairwise_permanova(dist, lookup,
                                               treat_col = "TREATMENT",
                                               block_col = "BLOCK")
print(pairwise_table_MS_2022_s)


## 9) Diversity calculations (row-level activity densities) ####

comm_df_s <- as.data.frame(comm_mat_s) %>%
  rownames_to_column(var = "Group")

total_activity_s <- comm_df_s %>%
  mutate(total_activity = rowSums(across(-Group))) %>%
  dplyr::select(Group, total_activity)

diversities_s <- comm_df_s %>%
  column_to_rownames("Group") %>%
  as.matrix() %>%
  {
    tibble(
      Group            = rownames(.),
      species_richness = specnumber(.),
      shannon          = diversity(., index = "shannon"),
      simpson          = diversity(., index = "simpson"),
      invsimpson       = 1 / diversity(., index = "simpson"),
      evenness         = shannon / log(species_richness)
    )
  } %>%
  left_join(total_activity_s, by = "Group")

metadata_diversities_s <- diversities_s %>%
  inner_join(lookup_rows_s, by = "Group")

metadata_diversities_s$TRIAL <- factor(metadata_diversities_s$TRIAL, 
                                       levels = c("2021.RS", "2021.MS", "2022.MS"))

trial_labels_s <- c(
  "2021.RS" = "trial 2021a",
  "2021.MS" = "trial 2021b",
  "2022.MS" = "trial 2022"
)


## 10) Plots: species number and Shannon (spiders) ####

ggplot(metadata_diversities_s,
       aes(y = factor(TREATMENT, levels = c("SIL", "S", "WT", "C")),
           color = factor(TREATMENT, levels = c("SIL", "S", "WT", "C")),
           x = species_richness)) +
  geom_jitter(width = 0, height = 0.15, size = 1.5, alpha = 0.5) +
  scale_y_discrete(breaks = c("C", "WT", "SIL", "S"),
                   labels = c("control", "triticale/vetch",
                              "silage", "straw")) +
  scale_x_continuous(breaks = seq(3, 13, by = 2)) + 
  scale_color_manual(values = c("C" = "#EE6677", "WT" = "#CCBB44", 
                                "SIL" = "#228833", "S" = "#4477AA")) +
  stat_summary(fun = mean, geom = "point", 
               shape = 4, size = 2, stroke = 1,  
               color = "black", aes(group = TREATMENT)) +
  theme_light() +
  labs(x = "Number of species", y = NULL) +
  theme(axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 9),
        axis.title.x = element_text(size = 9),
        legend.position = "none") +
  facet_grid(~TRIAL, scales = "free_y", space = "free_y",
             labeller = labeller(TRIAL = trial_labels_s))

ggsave("Speciesnumber jitter - spiders.pdf",
       width = 17, height = 4.5, units = "cm")

ggplot(metadata_diversities_s,
       aes(y = factor(TREATMENT, levels = c("SIL", "S", "WT", "C")),
           color = factor(TREATMENT, levels = c("SIL", "S", "WT", "C")),
           x = shannon)) +
  geom_jitter(width = 0, height = 0.15, size = 1.5, alpha = 0.5) +
  scale_y_discrete(breaks = c("C", "WT", "SIL", "S"),
                   labels = c("control", "triticale/vetch",
                              "silage", "straw")) +
  scale_color_manual(values = c("C" = "#EE6677", "WT" = "#CCBB44", 
                                "SIL" = "#228833", "S" = "#4477AA")) +
  stat_summary(fun = mean, geom = "point", 
               shape = 4, size = 2, stroke = 1,  
               color = "black", aes(group = TREATMENT)) +
  theme_light() +
  labs(x = "Shannon index", y = NULL) +
  theme(axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 9),
        axis.title.x = element_text(size = 9),
        legend.position = "none") +
  facet_grid(~TRIAL, scales = "free_y", space = "free_y",
             labeller = labeller(TRIAL = trial_labels_s)) +
  scale_x_continuous(breaks = c(0.5, 1.0, 1.5, 2.0))

ggsave("Shannon jitter - spiders.pdf",
       width = 17, height = 4.5, units = "cm")


## 11) Indicator species analysis (IndVal) – spiders ####

run_indval <- function(trial_id,
                       comm_mat,
                       lookup_rows,
                       p_adj = "BH",
                       seed  = 1) {
  sel       <- lookup_rows$TRIAL == trial_id
  comm_sub  <- comm_mat[sel, , drop = FALSE]
  meta_sub  <- lookup_rows[sel, , drop = FALSE]
  
  df_sub <- as.data.frame(comm_sub)
  df_sub$TREATMENT <- factor(meta_sub$TREATMENT)
  
  set.seed(seed)
  indval <- multipatt(df_sub %>% dplyr::select(-TREATMENT),
                      df_sub$TREATMENT,
                      duleg   = TRUE,
                      control = how(nperm = 999))
  
  p_values   <- indval$sign$p.value
  p_adjusted <- p.adjust(p_values, method = p_adj)
  indval$sign$p.adjusted <- p_adjusted
  
  indval
}

# 2021.RS / 2021a / 2021a
indval_s_2021_RS <- run_indval("2021.RS",
                                        comm_mat    = comm_mat_s,
                                        lookup_rows = lookup_rows_s,
                                        p_adj = "BH",
                                        seed  = 3654)
summary(indval_s_2021_RS)

# 2021.MS / 2021b / 2021b
indval_s_2021_MS <- run_indval("2021.MS",
                                        comm_mat    = comm_mat_s,
                                        lookup_rows = lookup_rows_s,
                                        p_adj = "BH",
                                        seed  = 76543)
summary(indval_s_2021_MS)

# 2022.MS / 2022 / 2022
indval_s_2022_MS <- run_indval("2022.MS",
                                        comm_mat    = comm_mat_s,
                                        lookup_rows = lookup_rows_s,
                                        p_adj = "BH",
                                        seed  = 98765)
summary(indval_s_2022_MS)
