Purpose

Look at results of the preferred models during model selection: separate GAMs for each station (RD22, STTD, LIB, and RVB) using an interaction between weekly average flow and Year with a cyclic penalized cubic regression spline smooth term for week number to account for seasonality. Create summary figures and tables for the manuscript. The model selection process and diagnostics can be found in the manuscript_synthesis/notebooks/rtm_chl_models_flow_weekly_avg.Rmd document.

Global code and functions

# Load packages
library(tidyverse)
library(scales)
library(mgcv)
library(ggeffects)
library(emmeans)
library(multcomp)
library(gratia)
library(patchwork)
library(here)
library(conflicted)

# Declare package conflict preferences 
conflicts_prefer(dplyr::filter(), dplyr::lag(), dplyr::select())

Display current versions of R and packages used for this analysis:

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.4.1 (2024-06-14 ucrt)
##  os       Windows 11 x64 (build 26100)
##  system   x86_64, mingw32
##  ui       RTerm
##  language (EN)
##  collate  English_United States.utf8
##  ctype    English_United States.utf8
##  tz       America/Los_Angeles
##  date     2024-12-19
##  pandoc   3.1.11 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package      * version  date (UTC) lib source
##  bslib          0.8.0    2024-07-29 [1] CRAN (R 4.4.1)
##  cachem         1.1.0    2024-05-16 [1] CRAN (R 4.4.1)
##  cli            3.6.3    2024-06-21 [1] CRAN (R 4.4.1)
##  coda           0.19-4.1 2024-01-31 [1] CRAN (R 4.4.0)
##  codetools      0.2-20   2024-03-31 [2] CRAN (R 4.4.1)
##  colorspace     2.1-1    2024-07-26 [1] CRAN (R 4.4.1)
##  conflicted   * 1.2.0    2023-02-01 [1] CRAN (R 4.4.0)
##  devtools       2.4.5    2022-10-11 [1] CRAN (R 4.4.0)
##  digest         0.6.37   2024-08-19 [1] CRAN (R 4.4.1)
##  dplyr        * 1.1.4    2023-11-17 [1] CRAN (R 4.4.0)
##  ellipsis       0.3.2    2021-04-29 [1] CRAN (R 4.4.0)
##  emmeans      * 1.10.4   2024-08-21 [1] CRAN (R 4.4.1)
##  estimability   1.5.1    2024-05-12 [1] CRAN (R 4.4.1)
##  evaluate       0.24.0   2024-06-10 [1] CRAN (R 4.4.1)
##  fansi          1.0.6    2023-12-08 [1] CRAN (R 4.4.0)
##  farver         2.1.2    2024-05-13 [1] CRAN (R 4.4.1)
##  fastmap        1.2.0    2024-05-15 [1] CRAN (R 4.4.1)
##  forcats      * 1.0.0    2023-01-29 [1] CRAN (R 4.4.0)
##  fs             1.6.4    2024-04-25 [1] CRAN (R 4.4.0)
##  generics       0.1.3    2022-07-05 [1] CRAN (R 4.4.0)
##  ggeffects    * 1.7.1    2024-09-01 [1] CRAN (R 4.4.1)
##  ggokabeito     0.1.0    2021-10-18 [1] CRAN (R 4.4.0)
##  ggplot2      * 3.5.1    2024-04-23 [1] CRAN (R 4.4.0)
##  glue           1.7.0    2024-01-09 [1] CRAN (R 4.4.0)
##  gratia       * 0.9.2    2024-06-25 [1] CRAN (R 4.4.1)
##  gtable         0.3.5    2024-04-22 [1] CRAN (R 4.4.0)
##  here         * 1.0.1    2020-12-13 [1] CRAN (R 4.4.0)
##  hms            1.1.3    2023-03-21 [1] CRAN (R 4.4.0)
##  htmltools      0.5.8.1  2024-04-04 [1] CRAN (R 4.4.0)
##  htmlwidgets    1.6.4    2023-12-06 [1] CRAN (R 4.4.0)
##  httpuv         1.6.15   2024-03-26 [1] CRAN (R 4.4.0)
##  insight        0.20.5   2024-10-02 [1] CRAN (R 4.4.2)
##  jquerylib      0.1.4    2021-04-26 [1] CRAN (R 4.4.0)
##  jsonlite       1.8.8    2023-12-04 [1] CRAN (R 4.4.0)
##  knitr          1.48     2024-07-07 [1] CRAN (R 4.4.1)
##  later          1.3.2    2023-12-06 [1] CRAN (R 4.4.0)
##  lattice        0.22-6   2024-03-20 [1] CRAN (R 4.4.0)
##  lifecycle      1.0.4    2023-11-07 [1] CRAN (R 4.4.0)
##  lubridate    * 1.9.3    2023-09-27 [1] CRAN (R 4.4.0)
##  magrittr       2.0.3    2022-03-30 [1] CRAN (R 4.4.0)
##  MASS         * 7.3-61   2024-06-13 [1] CRAN (R 4.4.1)
##  Matrix         1.7-0    2024-03-22 [1] CRAN (R 4.4.0)
##  memoise        2.0.1    2021-11-26 [1] CRAN (R 4.4.0)
##  mgcv         * 1.9-1    2023-12-21 [1] CRAN (R 4.4.0)
##  mime           0.12     2021-09-28 [1] CRAN (R 4.4.0)
##  miniUI         0.1.1.1  2018-05-18 [1] CRAN (R 4.4.0)
##  multcomp     * 1.4-26   2024-07-18 [1] CRAN (R 4.4.1)
##  munsell        0.5.1    2024-04-01 [1] CRAN (R 4.4.0)
##  mvnfast        0.2.8    2023-02-23 [1] CRAN (R 4.4.0)
##  mvtnorm      * 1.3-1    2024-09-03 [1] CRAN (R 4.4.1)
##  nlme         * 3.1-166  2024-08-14 [1] CRAN (R 4.4.1)
##  patchwork    * 1.3.0    2024-09-16 [1] CRAN (R 4.4.2)
##  pillar         1.9.0    2023-03-22 [1] CRAN (R 4.4.0)
##  pkgbuild       1.4.4    2024-03-17 [1] CRAN (R 4.4.0)
##  pkgconfig      2.0.3    2019-09-22 [1] CRAN (R 4.4.0)
##  pkgload        1.4.0    2024-06-28 [1] CRAN (R 4.4.1)
##  profvis        0.3.8    2023-05-02 [1] CRAN (R 4.4.0)
##  promises       1.3.0    2024-04-05 [1] CRAN (R 4.4.0)
##  purrr        * 1.0.2    2023-08-10 [1] CRAN (R 4.4.0)
##  R6             2.5.1    2021-08-19 [1] CRAN (R 4.4.0)
##  Rcpp           1.0.13   2024-07-17 [1] CRAN (R 4.4.1)
##  readr        * 2.1.5    2024-01-10 [1] CRAN (R 4.4.0)
##  remotes        2.5.0    2024-03-17 [1] CRAN (R 4.4.0)
##  rlang          1.1.4    2024-06-04 [1] CRAN (R 4.4.1)
##  rmarkdown      2.28     2024-08-17 [1] CRAN (R 4.4.1)
##  rprojroot      2.0.4    2023-11-05 [1] CRAN (R 4.4.0)
##  rstudioapi     0.16.0   2024-03-24 [1] CRAN (R 4.4.0)
##  sandwich       3.1-0    2023-12-11 [1] CRAN (R 4.4.0)
##  sass           0.4.9    2024-03-15 [1] CRAN (R 4.4.0)
##  scales       * 1.3.0    2023-11-28 [1] CRAN (R 4.4.0)
##  sessioninfo    1.2.2    2021-12-06 [1] CRAN (R 4.4.0)
##  shiny          1.9.1    2024-08-01 [1] CRAN (R 4.4.1)
##  stringi        1.8.4    2024-05-06 [1] CRAN (R 4.4.0)
##  stringr      * 1.5.1    2023-11-14 [1] CRAN (R 4.4.0)
##  survival     * 3.7-0    2024-06-05 [1] CRAN (R 4.4.1)
##  TH.data      * 1.1-2    2023-04-17 [1] CRAN (R 4.4.0)
##  tibble       * 3.2.1    2023-03-20 [1] CRAN (R 4.4.0)
##  tidyr        * 1.3.1    2024-01-24 [1] CRAN (R 4.4.0)
##  tidyselect     1.2.1    2024-03-11 [1] CRAN (R 4.4.0)
##  tidyverse    * 2.0.0    2023-02-22 [1] CRAN (R 4.4.0)
##  timechange     0.3.0    2024-01-18 [1] CRAN (R 4.4.0)
##  tzdb           0.4.0    2023-05-12 [1] CRAN (R 4.4.0)
##  urlchecker     1.0.1    2021-11-30 [1] CRAN (R 4.4.0)
##  usethis        3.0.0    2024-07-29 [1] CRAN (R 4.4.1)
##  utf8           1.2.4    2023-10-22 [1] CRAN (R 4.4.0)
##  vctrs          0.6.5    2023-12-01 [1] CRAN (R 4.4.0)
##  withr          3.0.1    2024-07-31 [1] CRAN (R 4.4.1)
##  xfun           0.47     2024-08-17 [1] CRAN (R 4.4.1)
##  xtable         1.8-4    2019-04-21 [1] CRAN (R 4.4.0)
##  yaml           2.3.10   2024-07-26 [1] CRAN (R 4.4.1)
##  zoo            1.8-12   2023-04-13 [1] CRAN (R 4.4.0)
## 
##  [1] C:/R/win-library/4.4
##  [2] C:/Program Files/R/R-4.4.1/library
## 
## ──────────────────────────────────────────────────────────────────────────────
# Calculate effects of flow on chlorophyll for each year holding the
  # non-focal variables constant - marginal effects/adjusted predictions
calc_flow_yr_marg_eff <- function(df_data, m_gam) {
  # Calculate min and max flows for each year to narrow down x-axis in the
    # effects plots
  df_flow_yr_summ <- df_data %>% 
    summarize(
      Flow_min = min(Flow),
      Flow_max = max(Flow),
      .by = c(Year_fct)
    ) %>% 
    mutate(
      Flow_buffer = (Flow_max - Flow_min) * 0.05,
      Flow_min = Flow_min - Flow_buffer,
      Flow_max = Flow_max + Flow_buffer
    ) %>% 
    select(-Flow_buffer)
  
  # Calculate marginal effects
  as.data.frame(
    predict_response(m_gam, terms = c("Flow", "Year_fct"), margin = "marginalmeans"),
    terms_to_colnames = TRUE
  ) %>% 
  as_tibble() %>% 
  # Narrow down range of flow values for each station
  left_join(df_flow_yr_summ, by = join_by(Year_fct)) %>% 
  filter(Flow >= Flow_min & Flow <= Flow_max) %>% 
  transmute(
    Year_fct,
    Flow,
    # Back calculate model fits and confidence levels
    across(c(predicted, conf.low, conf.high), \(x) exp(x) / 1000)
  )
}

# Create effects plot of flow on chlorophyll by year
plot_eff_flow_by_yr <- function(df_eff, df_data) {
  df_eff %>% 
    ggplot(aes(x = Flow, y = predicted)) +
    geom_point(data = df_data, aes(y = Chla), alpha = 0.6) +
    geom_line(linewidth = 1) +
    geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.25) +
    facet_wrap(vars(Year_fct), scales = "free_x", nrow = 1) +
    theme_bw() +
    labs(
      title = df_data$StationCode[1],
      x = "Flow (cfs)",
      y = expression(Chlorophyll~Fluoresence~(mu*g~L^{-1}))
    ) +
    scale_x_continuous(labels = label_comma(), breaks = breaks_extended()) +
    theme(
      axis.text = element_text(size = 8),
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
    )
}

# Create boxplots of chlorophyll by year showing Tukey post-hoc results
boxplot_cont_yr <- function(em_obj, df_data, xlab = TRUE) {
  # Create table of contrasts and convert it to a tibble for plot
  df_yr_cont <- em_obj %>% 
    cld(sort = FALSE, Letters = letters) %>% 
    as_tibble() %>% 
    mutate(
      group = str_remove_all(.group, fixed(" ")),
      # back transform log-transformed results
      across(c(emmean, lower.CL, upper.CL), ~ exp(.x) / 1000)
    ) %>% 
    # Add min and max values of observed data to the Tukey post-hoc results and
      # calculate vertical positioning of letters
    left_join(
      df_data %>% 
        summarize(
          max_val = max(Chla),
          min_val = min(Chla),
          .by = Year_fct
      ), 
      by = join_by(Year_fct)
    ) %>% 
    mutate(
      max_val = if_else(upper.CL > max_val, upper.CL, max_val),
      y_pos = max_val + (max_val - min_val) / 10,
      # Make all post-hoc contrast letters at same height equal to max of all
      y_pos = max(y_pos)
    ) %>% 
    select(
      Year_fct,
      emmean,
      lower.CL,
      upper.CL,
      group,
      y_pos
    )
  
  # Create boxplot showing Tukey post-hoc results
  df_yr_cont %>% 
    ggplot(
      aes(
        x = Year_fct,
        y = emmean,
        ymin = lower.CL,
        ymax = upper.CL
      )
    ) +
    geom_boxplot(
      data = df_data,
      aes(x = Year_fct, y = Chla),
      inherit.aes = FALSE
    ) +
    geom_crossbar(
      color = "grey82", 
      fill = "grey", 
      alpha = 0.7, 
      linewidth = 0.1
    ) +
    geom_point(color = "red") +
    geom_text(aes(y = y_pos, label = group), size = 3) +
    labs(
      title = df_data$StationCode[1],
      x = "Year",
      y = expression(Chlorophyll~Fluoresence~(mu*g~L^{-1}))
    ) +
    theme_bw() +
    theme(axis.text = element_text(size = 8))
}

# Create smooth of Week plot
plot_smooth_week <- function(m_gam, station) {
  ls_plt <- draw(m_gam, residuals = TRUE, rug = FALSE, wrap = FALSE)
  
  # Extract plot from list and format
  chuck(ls_plt, 1) + 
    theme_bw() + 
    labs(title = station, caption = NULL)
}

Import Data

# Define file path for processed data
fp_data <- here("manuscript_synthesis/data/processed")

# Import weekly average water quality data
df_wq <- readRDS(file.path(fp_data, "wq_week_avg_2013-2019.rds"))

# Import weekly average flow data
df_flow <- readRDS(file.path(fp_data, "flow_week_avg_2013-2019.rds"))

Prepare Data

# Create a vector for the factor order of StationCode
sta_order <- c("RD22", "STTD", "LIB", "RVB")

# We will use LIS flow data as a proxy for STTD
df_flow_c <- df_flow %>% mutate(StationCode = if_else(StationCode == "LIS", "STTD", StationCode))

# Prepare chlorophyll and flow data for analysis
ls_chla <- df_wq %>% 
  select(StationCode, Year, Week, Chla) %>% 
  drop_na(Chla) %>% 
  # Filter to only include representative stations for 4 habitat types (RD22,
    # STTD, LIB, RVB) and only include years 2015-2019
  filter(
    StationCode %in% sta_order,
    Year %in% 2015:2019
  ) %>% 
  # Join flow data to chlorophyll data
  left_join(df_flow_c, by = join_by(StationCode, Year, Week)) %>% 
  # Remove all NA flow values
  drop_na(Flow) %>% 
  mutate(
    # Scale and log transform chlorophyll values
    Chla_log = log(Chla * 1000),
    # Add a column for Year as a factor for the models
    Year_fct = factor(Year)
  ) %>% 
  arrange(StationCode, Year, Week) %>% 
  split(~ StationCode)

# Add 2 lag variables for chlorophyll to be used in the LIB model to help with
  # serial autocorrelation
df_chla_lib <- ls_chla$LIB %>% 
  group_by(Year) %>% 
  mutate(
    lag1 = lag(Chla_log),
    lag2 = lag(Chla_log, 2)
  ) %>% 
  ungroup() %>% 
  drop_na(lag1, lag2)

# Add 1 lag variables for chlorophyll to be used in the RVB model to help with
  # serial autocorrelation
df_chla_rvb <- ls_chla$RVB %>% 
  group_by(Year) %>% 
  mutate(lag1 = lag(Chla_log)) %>% 
  ungroup() %>% 
  drop_na(lag1)

# Combine all data sets into a nested data frame to be used for analysis
ndf_chla <- 
  tibble(
    StationCode = sta_order,
    df_data = list(ls_chla$RD22, ls_chla$STTD, df_chla_lib, df_chla_rvb)
  ) %>% 
  # Apply factor order to StationCode
  mutate(StationCode = factor(StationCode, levels = sta_order)) %>% 
  arrange(StationCode)

Create GAM models

# Define base model formula
f_base <- "Chla_log ~ Year_fct * Flow + s(Week, bs = 'cc', k = 5)"

# Create gam models for each station
ndf_gam <- ndf_chla %>% 
  mutate(
    str_formula = case_when(
      StationCode == "LIB" ~ paste(f_base, "+ lag1 + lag2"),
      StationCode == "RVB" ~ paste(f_base, "+ lag1"),
      TRUE ~ f_base
    ),
    model = map2(
      str_formula,
      df_data,
      \(x, y) gam(
        formula = as.formula(x),
        data = y,
        method = "REML", 
        knots = list(week = c(0, 52))
      )
    )
  )

Model Results

ANOVA Tables

# Create ANOVA tables for each GAM model and extract elements for export
ndf_gam_anova <- ndf_gam %>% 
  mutate(
    an_gam = map(model, anova),
    # Parametric terms
    df_parametric = map(
      an_gam, 
      \(x) as_tibble(chuck(x, "pTerms.table"), rownames = "Term")
    ),
    # Smooth terms
    df_smooth = map(
      an_gam, 
      \(x) as_tibble(chuck(x, "s.table"), rownames = "Term") %>% 
        rename(df = edf) %>% 
        select(-Ref.df)
    ),
    df_anova_tbl = map2(df_parametric, df_smooth, bind_rows)
  )

# Combine ANOVA tables and format for export
df_gam_anova_tbl_c <- ndf_gam_anova %>% 
  select(StationCode, df_anova_tbl) %>% 
  unnest(df_anova_tbl) %>% 
  transmute(
    Station = StationCode,
    Term,
    across(
      c("df", "F"),
      \(x) if_else(
        abs(x) < 0.01,
        formatC(signif(x, 3), digits = 2, format = "e"),
        formatC(signif(x, 3), digits = 3, format = "fg", flag = "#")
      )
    ),
    p_value = if_else(
      `p-value` < 0.001, 
      "< 0.001", 
      formatC(`p-value`, digits = 3, format = "f")
    )
  )

df_gam_anova_tbl_c
## # A tibble: 19 × 5
##    Station Term          df    F      p_value
##    <fct>   <chr>         <chr> <chr>  <chr>  
##  1 RD22    Year_fct      4.00  7.37   < 0.001
##  2 RD22    Flow          1.00  6.96   0.010  
##  3 RD22    Year_fct:Flow 4.00  2.38   0.060  
##  4 RD22    s(Week)       2.33  8.92   < 0.001
##  5 STTD    Year_fct      4.00  69.3   < 0.001
##  6 STTD    Flow          1.00  37.0   < 0.001
##  7 STTD    Year_fct:Flow 4.00  5.09   0.001  
##  8 STTD    s(Week)       0.342 0.137  0.303  
##  9 LIB     Year_fct      4.00  0.648  0.631  
## 10 LIB     Flow          1.00  0.170  0.682  
## 11 LIB     lag1          1.00  42.1   < 0.001
## 12 LIB     lag2          1.00  2.46   0.123  
## 13 LIB     Year_fct:Flow 4.00  0.454  0.769  
## 14 LIB     s(Week)       0.133 0.0473 0.351  
## 15 RVB     Year_fct      4.00  3.52   0.011  
## 16 RVB     Flow          1.00  2.98   0.089  
## 17 RVB     lag1          1.00  18.1   < 0.001
## 18 RVB     Year_fct:Flow 4.00  2.44   0.055  
## 19 RVB     s(Week)       2.24  3.69   0.005

Effect of Flow by Year

Effect Plots

# Create effects plot of flow on chlorophyll by year for each station
ndf_gam_eff_flow_plt <- ndf_gam %>% 
  mutate(
    df_marg_eff = map2(df_data, model, calc_flow_yr_marg_eff),
    plt_marg_eff = map2(df_marg_eff, df_data, plot_eff_flow_by_yr)
  )

# Combine effects plots together using patchwork
plt_gam_eff_flow <- 
  wrap_plots(ndf_gam_eff_flow_plt$plt_marg_eff, ncol = 1) + 
  plot_layout(axes = "collect")

plt_gam_eff_flow

Slopes and p-values

# Calculate slopes and p-values of the modeled effects of flow on chlorophyll by
  # year for each station
ndf_gam_eff_flow_stat <- ndf_gam %>% 
  mutate(
    em_gam_flow = map(model, \(x) emtrends(x, ~ Year_fct, var = "Flow")),
    df_stat = map(em_gam_flow, \(x) as_tibble(test(x)))
  )
## Warning: There were 4 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `em_gam_flow = map(model, function(x) emtrends(x, ~Year_fct, var
##   = "Flow"))`.
## Caused by warning in `model.matrix.default()`:
## ! partial argument match of 'contrasts' to 'contrasts.arg'
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.
# Combine slopes and p-values into one data frame for export
df_gam_eff_flow_stat <- ndf_gam_eff_flow_stat %>% 
  select(StationCode, df_stat) %>% 
  unnest(df_stat) %>% 
  # Format for export
  rename(
    Station = StationCode,
    Year = Year_fct,
    Flow_slope = Flow.trend,
    t_ratio = t.ratio,
    p_value = p.value
  ) %>% 
  mutate(
    p_value = if_else(
      p_value < 0.001, 
      "< 0.001", 
      formatC(p_value, digits = 3, format = "f")
    ),
    across(
      where(is.numeric),
      \(x) if_else(
        abs(x) < 0.01,
        formatC(signif(x, 3), digits = 2, format = "e"),
        formatC(signif(x, 3), digits = 3, format = "fg", flag = "#")
      )
    )
  )

df_gam_eff_flow_stat
## # A tibble: 20 × 7
##    Station Year  Flow_slope SE       df    t_ratio p_value
##    <fct>   <fct> <chr>      <chr>    <chr> <chr>   <chr>  
##  1 RD22    2015  -1.39e-03  5.28e-04 70.7  -2.64   0.010  
##  2 RD22    2016  -1.86e-05  6.37e-04 70.7  -0.0291 0.977  
##  3 RD22    2017  0.0294     0.0138   70.7  2.13    0.036  
##  4 RD22    2018  -8.72e-04  5.13e-04 70.7  -1.70   0.094  
##  5 RD22    2019  -1.65e-03  3.74e-04 70.7  -4.42   < 0.001
##  6 STTD    2015  2.73e-03   4.49e-04 60.7  6.08    < 0.001
##  7 STTD    2016  1.25e-03   4.81e-04 60.7  2.60    0.012  
##  8 STTD    2017  0.0171     5.16e-03 60.7  3.31    0.002  
##  9 STTD    2018  2.48e-03   3.72e-04 60.7  6.68    < 0.001
## 10 STTD    2019  1.39e-03   2.64e-04 60.7  5.25    < 0.001
## 11 LIB     2015  -1.11e-04  2.69e-04 49.9  -0.412  0.682  
## 12 LIB     2016  6.02e-04   6.78e-04 49.9  0.889   0.378  
## 13 LIB     2017  2.79e-06   2.77e-04 49.9  0.0101  0.992  
## 14 LIB     2018  5.94e-04   7.32e-04 49.9  0.811   0.421  
## 15 LIB     2019  -1.65e-04  4.30e-04 49.9  -0.385  0.702  
## 16 RVB     2015  1.73e-04   1.00e-04 69.8  1.73    0.089  
## 17 RVB     2016  -1.03e-04  5.03e-05 69.8  -2.04   0.045  
## 18 RVB     2017  -1.99e-05  2.43e-05 69.8  -0.817  0.417  
## 19 RVB     2018  -1.38e-05  2.55e-05 69.8  -0.543  0.589  
## 20 RVB     2019  2.76e-05   2.67e-05 69.8  1.03    0.306

Year Contrasts

Boxplots

# Calculate estimated marginal means of year for each station
ndf_gam_cont_yr <- ndf_gam %>% 
  mutate(em_gam_yr = map(model, \(x) emmeans(x, ~ Year_fct)))

# Create boxplots showing Tukey post-hoc results of year contrasts for each station
ndf_gam_cont_yr_plt <- ndf_gam_cont_yr %>% 
  mutate(plt_cont_yr = map2(em_gam_yr, df_data, boxplot_cont_yr))

# Combine boxplots together using patchwork
plt_gam_cont_yr <- 
  wrap_plots(ndf_gam_cont_yr_plt$plt_cont_yr, ncol = 2) + 
  plot_layout(axes = "collect")

plt_gam_cont_yr

Statistics

# Generate pairwise Tukey post-hoc year contrasts for each station
ndf_gam_cont_yr_stat <- ndf_gam_cont_yr %>% 
  mutate(df_stat = map(em_gam_yr, \(x) as_tibble(pairs(x))))

# Combine pairwise Tukey post-hoc year contrasts into one data frame for export
df_gam_cont_yr_stat <- ndf_gam_cont_yr_stat %>% 
  select(StationCode, df_stat) %>% 
  unnest(df_stat) %>% 
  # Format for export
  rename(
    Station = StationCode,
    Contrast = contrast,
    Estimate = estimate,
    t_ratio = t.ratio,
    p_value = p.value
  ) %>% 
  mutate(
    p_value = if_else(
      p_value < 0.001, 
      "< 0.001", 
      formatC(p_value, digits = 3, format = "f")
    ),
    across(
      where(is.numeric),
      \(x) if_else(
        abs(x) < 0.01,
        formatC(signif(x, 3), digits = 2, format = "e"),
        formatC(signif(x, 3), digits = 3, format = "fg", flag = "#")
      )
    )
  )

print(df_gam_cont_yr_stat, n = Inf)
## # A tibble: 40 × 7
##    Station Contrast                    Estimate SE     df    t_ratio p_value
##    <fct>   <chr>                       <chr>    <chr>  <chr> <chr>   <chr>  
##  1 RD22    Year_fct2015 - Year_fct2016 0.569    0.131  70.7  4.35    < 0.001
##  2 RD22    Year_fct2015 - Year_fct2017 -2.37    1.25   70.7  -1.90   0.327  
##  3 RD22    Year_fct2015 - Year_fct2018 0.351    0.120  70.7  2.92    0.036  
##  4 RD22    Year_fct2015 - Year_fct2019 0.151    0.121  70.7  1.25    0.722  
##  5 RD22    Year_fct2016 - Year_fct2017 -2.94    1.23   70.7  -2.38   0.133  
##  6 RD22    Year_fct2016 - Year_fct2018 -0.218   0.125  70.7  -1.74   0.417  
##  7 RD22    Year_fct2016 - Year_fct2019 -0.418   0.126  70.7  -3.32   0.012  
##  8 RD22    Year_fct2017 - Year_fct2018 2.72     1.24   70.7  2.19    0.196  
##  9 RD22    Year_fct2017 - Year_fct2019 2.52     1.24   70.7  2.03    0.264  
## 10 RD22    Year_fct2018 - Year_fct2019 -0.200   0.116  70.7  -1.73   0.423  
## 11 STTD    Year_fct2015 - Year_fct2016 -0.395   0.0977 60.7  -4.04   0.001  
## 12 STTD    Year_fct2015 - Year_fct2017 -1.09    0.388  60.7  -2.81   0.050  
## 13 STTD    Year_fct2015 - Year_fct2018 0.674    0.0971 60.7  6.94    < 0.001
## 14 STTD    Year_fct2015 - Year_fct2019 1.13     0.0949 60.7  11.9    < 0.001
## 15 STTD    Year_fct2016 - Year_fct2017 -0.696   0.388  60.7  -1.79   0.386  
## 16 STTD    Year_fct2016 - Year_fct2018 1.07     0.102  60.7  10.5    < 0.001
## 17 STTD    Year_fct2016 - Year_fct2019 1.52     0.100  60.7  15.2    < 0.001
## 18 STTD    Year_fct2017 - Year_fct2018 1.77     0.389  60.7  4.54    < 0.001
## 19 STTD    Year_fct2017 - Year_fct2019 2.22     0.389  60.7  5.70    < 0.001
## 20 STTD    Year_fct2018 - Year_fct2019 0.452    0.0995 60.7  4.54    < 0.001
## 21 LIB     Year_fct2015 - Year_fct2016 -0.512   0.255  49.9  -2.01   0.278  
## 22 LIB     Year_fct2015 - Year_fct2017 -0.0289  0.166  49.9  -0.174  1.000  
## 23 LIB     Year_fct2015 - Year_fct2018 1.16     0.444  49.9  2.62    0.082  
## 24 LIB     Year_fct2015 - Year_fct2019 0.0902   0.275  49.9  0.328   0.997  
## 25 LIB     Year_fct2016 - Year_fct2017 0.484    0.230  49.9  2.10    0.237  
## 26 LIB     Year_fct2016 - Year_fct2018 1.68     0.505  49.9  3.32    0.014  
## 27 LIB     Year_fct2016 - Year_fct2019 0.603    0.341  49.9  1.77    0.405  
## 28 LIB     Year_fct2017 - Year_fct2018 1.19     0.442  49.9  2.70    0.068  
## 29 LIB     Year_fct2017 - Year_fct2019 0.119    0.266  49.9  0.448   0.991  
## 30 LIB     Year_fct2018 - Year_fct2019 -1.07    0.480  49.9  -2.24   0.183  
## 31 RVB     Year_fct2015 - Year_fct2016 0.498    0.364  69.8  1.37    0.649  
## 32 RVB     Year_fct2015 - Year_fct2017 0.735    0.367  69.8  2.00    0.275  
## 33 RVB     Year_fct2015 - Year_fct2018 0.795    0.363  69.8  2.19    0.196  
## 34 RVB     Year_fct2015 - Year_fct2019 1.07     0.366  69.8  2.93    0.036  
## 35 RVB     Year_fct2016 - Year_fct2017 0.237    0.113  69.8  2.11    0.230  
## 36 RVB     Year_fct2016 - Year_fct2018 0.297    0.101  69.8  2.94    0.035  
## 37 RVB     Year_fct2016 - Year_fct2019 0.573    0.127  69.8  4.51    < 0.001
## 38 RVB     Year_fct2017 - Year_fct2018 0.0597   0.101  69.8  0.594   0.976  
## 39 RVB     Year_fct2017 - Year_fct2019 0.336    0.113  69.8  2.96    0.033  
## 40 RVB     Year_fct2018 - Year_fct2019 0.276    0.0825 69.8  3.35    0.011

s(Week) Plots

# Create smooth of Week plots for each station
ndf_gam_s_week_plt <- ndf_gam %>% 
  mutate(plt_s_week = map2(model, StationCode, plot_smooth_week))

# Combine smooth of Week plots together using patchwork
plt_gam_s_week <- 
  wrap_plots(ndf_gam_s_week_plt$plt_s_week, ncol = 2) + 
  plot_layout(axis_titles = "collect") +
  plot_annotation(caption = "Basis: Cyclic CRS")

plt_gam_s_week

Export Results

# Define file path for figures and tables for the manuscript
fp_figures <- here("manuscript_synthesis/results/figures")
fp_tables <- here("manuscript_synthesis/results/tables")

# Export effects plot of flow by year for each station
ggsave(
  file.path(fp_figures, "chl_flow_eff_plot_by_year_each_sta.jpg"),
  plot = plt_gam_eff_flow,
  width = 7,
  height = 9,
  units = "in"
)

# Export boxplots of year contrasts for each station
ggsave(
  file.path(fp_figures, "chl_year_contrasts_each_sta.jpg"),
  plot = plt_gam_cont_yr,
  width = 6.5,
  height = 5,
  units = "in"
)

# Export effects plots of smooth of Week
ggsave(
  file.path(fp_figures, "chl_smooth_week.jpg"),
  plot = plt_gam_s_week,
  width = 6.5,
  height = 5,
  units = "in"
)

# Export combined ANOVA tables for GAMs
df_gam_anova_tbl_c %>% 
  mutate(across(c("df", "F", "p_value"), ~ paste0(.x, "##"))) %>% 
  write_csv(file.path(fp_tables, "chl_GAM_anova.csv"))

# Export slopes and p-values of the modeled effects of flow by year for each station
df_gam_eff_flow_stat %>% 
  mutate(across(c("Flow_slope", "SE", "df", "t_ratio", "p_value"), ~ paste0(.x, "##"))) %>% 
  write_csv(file.path(fp_tables, "chl_GAM_eff_flow_stat.csv"))

# Export pairwise Tukey post-hoc year contrasts
df_gam_cont_yr_stat %>% 
  mutate(across(c("Estimate", "SE", "df", "t_ratio", "p_value"), ~ paste0(.x, "##"))) %>% 
  write_csv(file.path(fp_tables, "chl_GAM_year_contrasts_stat.csv"))