Purpose

Look at results of the preferred model during model selection: GAM using smooths for weekly average flow by Station and Year with a cyclic penalized cubic regression spline smooth term for week number to account for seasonality. Create summary figures and tables for the manuscript. The model selection process and diagnostics can be found in the manuscript_synthesis/notebooks/rtm_chl_models_flow_weekly_avg.Rmd document.

Global code and functions

# Load packages
library(tidyverse)
library(scales)
library(mgcv)
library(broom)
library(ggeffects)
library(emmeans)
library(multcomp)
library(gratia)
library(here)
library(conflicted)

# Declare package conflict preferences 
conflicts_prefer(dplyr::filter(), dplyr::lag(), dplyr::select())

Display current versions of R and packages used for this analysis:

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.2.3 (2023-03-15 ucrt)
##  os       Windows 10 x64 (build 19045)
##  system   x86_64, mingw32
##  ui       RTerm
##  language (EN)
##  collate  English_United States.utf8
##  ctype    English_United States.utf8
##  tz       America/Los_Angeles
##  date     2024-03-01
##  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package      * version  date (UTC) lib source
##  backports      1.4.1    2021-12-13 [1] CRAN (R 4.2.0)
##  broom        * 1.0.4    2023-03-11 [1] CRAN (R 4.2.2)
##  bslib          0.4.2    2022-12-16 [1] CRAN (R 4.2.2)
##  cachem         1.0.8    2023-05-01 [1] CRAN (R 4.2.3)
##  callr          3.7.3    2022-11-02 [1] CRAN (R 4.2.2)
##  cli            3.6.2    2023-12-11 [1] CRAN (R 4.2.3)
##  coda           0.19-4   2020-09-30 [1] CRAN (R 4.2.1)
##  codetools      0.2-19   2023-02-01 [2] CRAN (R 4.2.3)
##  colorspace     2.1-0    2023-01-23 [1] CRAN (R 4.2.2)
##  conflicted   * 1.2.0    2023-02-01 [1] CRAN (R 4.2.2)
##  crayon         1.5.2    2022-09-29 [1] CRAN (R 4.2.1)
##  devtools       2.4.5    2022-10-11 [1] CRAN (R 4.2.1)
##  digest         0.6.33   2023-07-07 [1] CRAN (R 4.2.3)
##  dplyr        * 1.1.4    2023-11-17 [1] CRAN (R 4.2.3)
##  ellipsis       0.3.2    2021-04-29 [1] CRAN (R 4.2.1)
##  emmeans      * 1.8.5    2023-03-08 [1] CRAN (R 4.2.2)
##  estimability   1.4.1    2022-08-05 [1] CRAN (R 4.2.1)
##  evaluate       0.21     2023-05-05 [1] CRAN (R 4.2.3)
##  fansi          1.0.6    2023-12-08 [1] CRAN (R 4.2.3)
##  fastmap        1.1.1    2023-02-24 [1] CRAN (R 4.2.2)
##  forcats      * 1.0.0    2023-01-29 [1] CRAN (R 4.2.2)
##  fs             1.6.3    2023-07-20 [1] CRAN (R 4.2.3)
##  generics       0.1.3    2022-07-05 [1] CRAN (R 4.2.1)
##  ggeffects    * 1.3.3    2023-12-15 [1] CRAN (R 4.2.3)
##  ggplot2      * 3.4.3    2023-08-14 [1] CRAN (R 4.2.3)
##  glue           1.7.0    2024-01-09 [1] CRAN (R 4.2.3)
##  gratia       * 0.8.1    2023-02-02 [1] CRAN (R 4.2.2)
##  gtable         0.3.4    2023-08-21 [1] CRAN (R 4.2.3)
##  here         * 1.0.1    2020-12-13 [1] CRAN (R 4.2.1)
##  hms            1.1.3    2023-03-21 [1] CRAN (R 4.2.3)
##  htmltools      0.5.5    2023-03-23 [1] CRAN (R 4.2.3)
##  htmlwidgets    1.6.2    2023-03-17 [1] CRAN (R 4.2.3)
##  httpuv         1.6.9    2023-02-14 [1] CRAN (R 4.2.2)
##  jquerylib      0.1.4    2021-04-26 [1] CRAN (R 4.2.1)
##  jsonlite       1.8.7    2023-06-29 [1] CRAN (R 4.2.3)
##  knitr          1.42     2023-01-25 [1] CRAN (R 4.2.2)
##  later          1.3.0    2021-08-18 [1] CRAN (R 4.2.1)
##  lattice        0.21-8   2023-04-05 [1] CRAN (R 4.2.3)
##  lifecycle      1.0.4    2023-11-07 [1] CRAN (R 4.2.3)
##  lubridate    * 1.9.3    2023-09-27 [1] CRAN (R 4.2.3)
##  magrittr       2.0.3    2022-03-30 [1] CRAN (R 4.2.1)
##  MASS         * 7.3-58.2 2023-01-23 [2] CRAN (R 4.2.3)
##  Matrix         1.5-4    2023-04-04 [1] CRAN (R 4.2.3)
##  memoise        2.0.1    2021-11-26 [1] CRAN (R 4.2.1)
##  mgcv         * 1.8-42   2023-03-02 [1] CRAN (R 4.2.2)
##  mime           0.12     2021-09-28 [1] CRAN (R 4.2.0)
##  miniUI         0.1.1.1  2018-05-18 [1] CRAN (R 4.2.1)
##  multcomp     * 1.4-23   2023-03-09 [1] CRAN (R 4.2.2)
##  munsell        0.5.0    2018-06-12 [1] CRAN (R 4.2.1)
##  mvnfast        0.2.8    2023-02-23 [1] CRAN (R 4.2.2)
##  mvtnorm      * 1.1-3    2021-10-08 [1] CRAN (R 4.2.0)
##  nlme         * 3.1-162  2023-01-31 [2] CRAN (R 4.2.3)
##  patchwork      1.1.2    2022-08-19 [1] CRAN (R 4.2.1)
##  pillar         1.9.0    2023-03-22 [1] CRAN (R 4.2.3)
##  pkgbuild       1.4.2    2023-06-26 [1] CRAN (R 4.2.3)
##  pkgconfig      2.0.3    2019-09-22 [1] CRAN (R 4.2.1)
##  pkgload        1.3.2.1  2023-07-08 [1] CRAN (R 4.2.3)
##  prettyunits    1.2.0    2023-09-24 [1] CRAN (R 4.2.3)
##  processx       3.8.2    2023-06-30 [1] CRAN (R 4.2.3)
##  profvis        0.3.7    2020-11-02 [1] CRAN (R 4.2.1)
##  promises       1.2.0.1  2021-02-11 [1] CRAN (R 4.2.1)
##  ps             1.7.5    2023-04-18 [1] CRAN (R 4.2.3)
##  purrr        * 1.0.2    2023-08-10 [1] CRAN (R 4.2.3)
##  R6             2.5.1    2021-08-19 [1] CRAN (R 4.2.1)
##  Rcpp           1.0.11   2023-07-06 [1] CRAN (R 4.2.3)
##  readr        * 2.1.5    2024-01-10 [1] CRAN (R 4.2.3)
##  remotes        2.4.2    2021-11-30 [1] CRAN (R 4.2.1)
##  rlang          1.1.3    2024-01-10 [1] CRAN (R 4.2.3)
##  rmarkdown      2.21     2023-03-26 [1] CRAN (R 4.2.3)
##  rprojroot      2.0.3    2022-04-02 [1] CRAN (R 4.2.1)
##  rstudioapi     0.14     2022-08-22 [1] CRAN (R 4.2.1)
##  sandwich       3.0-2    2022-06-15 [1] CRAN (R 4.2.1)
##  sass           0.4.6    2023-05-03 [1] CRAN (R 4.2.3)
##  scales       * 1.2.1    2022-08-20 [1] CRAN (R 4.2.1)
##  sessioninfo    1.2.2    2021-12-06 [1] CRAN (R 4.2.1)
##  shiny          1.7.4    2022-12-15 [1] CRAN (R 4.2.2)
##  stringi        1.8.3    2023-12-11 [1] CRAN (R 4.2.3)
##  stringr      * 1.5.1    2023-11-14 [1] CRAN (R 4.2.3)
##  survival     * 3.5-5    2023-03-12 [1] CRAN (R 4.2.3)
##  TH.data      * 1.1-2    2023-04-17 [1] CRAN (R 4.2.3)
##  tibble       * 3.2.1    2023-03-20 [1] CRAN (R 4.2.3)
##  tidyr        * 1.3.1    2024-01-24 [1] CRAN (R 4.2.3)
##  tidyselect     1.2.0    2022-10-10 [1] CRAN (R 4.2.1)
##  tidyverse    * 2.0.0    2023-02-22 [1] CRAN (R 4.2.2)
##  timechange     0.3.0    2024-01-18 [1] CRAN (R 4.2.3)
##  tzdb           0.4.0    2023-05-12 [1] CRAN (R 4.2.3)
##  urlchecker     1.0.1    2021-11-30 [1] CRAN (R 4.2.1)
##  usethis        2.1.6    2022-05-25 [1] CRAN (R 4.2.1)
##  utf8           1.2.3    2023-01-31 [1] CRAN (R 4.2.2)
##  vctrs          0.6.5    2023-12-01 [1] CRAN (R 4.2.3)
##  withr          3.0.0    2024-01-16 [1] CRAN (R 4.2.3)
##  xfun           0.39     2023-04-20 [1] CRAN (R 4.2.3)
##  xtable         1.8-4    2019-04-21 [1] CRAN (R 4.2.1)
##  yaml           2.3.7    2023-01-23 [1] CRAN (R 4.2.2)
##  zoo            1.8-12   2023-04-13 [1] CRAN (R 4.2.3)
## 
##  [1] C:/R/win-library/4.2
##  [2] C:/Program Files/R/R-4.2.3/library
## 
## ──────────────────────────────────────────────────────────────────────────────

Import Data

# Define file path for processed data
fp_data <- here("manuscript_synthesis/data/processed")

# Import weekly average water quality data
df_wq <- readRDS(file.path(fp_data, "wq_week_avg_2013-2019.rds"))

# Import weekly average flow data
df_flow <- readRDS(file.path(fp_data, "flow_week_avg_2013-2019.rds"))

Prepare Data

# Create a vector for the factor order of StationCode
sta_order <- c("RD22", "STTD", "LIB", "RVB")

# We will use LIS flow data as a proxy for STTD
df_flow_c <- df_flow %>% mutate(StationCode = if_else(StationCode == "LIS", "STTD", StationCode))

# Prepare chlorophyll and flow data for analysis
df_chla_c <- df_wq %>% 
  select(StationCode, Year, Week, Chla) %>% 
  drop_na(Chla) %>% 
  # Filter to only include representative stations for 4 habitat types (RD22,
    # STTD, LIB, RVB) and only include years 2015-2019
  filter(
    StationCode %in% sta_order,
    Year %in% 2015:2019
  ) %>% 
  # Join flow data to chlorophyll data
  left_join(df_flow_c, by = join_by(StationCode, Year, Week)) %>% 
  # Remove all NA flow values
  drop_na(Flow) %>% 
  mutate(
    # Scale and log transform chlorophyll values
    Chla_log = log(Chla * 1000),
    # Apply factor order to StationCode
    StationCode = factor(StationCode, levels = sta_order),
    # Add a column for Year as a factor for the model
    Year_fct = factor(Year)
  ) %>% 
  arrange(StationCode, Year, Week) %>% 
  # Add 2 lag variables for chlorophyll to be used in the model to help with
    # serial autocorrelation
  group_by(StationCode, Year) %>% 
  mutate(
    lag1 = lag(Chla_log),
    lag2 = lag(Chla_log, 2)
  ) %>% 
  ungroup() %>% 
  drop_na(lag1, lag2)

Create GAM model

m_gam_sflow_lag2 <- gam(
  Chla_log ~ s(Flow, by = StationCode) + s(Flow, by = Year_fct) + Year_fct * StationCode + s(Week, bs = "cc", k = 5) + lag1 + lag2, 
  data = df_chla_c,
  method = "REML", 
  knots = list(week = c(0, 52))
)

Model Results

ANOVA table

# Create ANOVA table for GAM
an_gam_sflow_lag2 <- anova(m_gam_sflow_lag2)
an_gam_sflow_lag2
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Chla_log ~ s(Flow, by = StationCode) + s(Flow, by = Year_fct) + 
##     Year_fct * StationCode + s(Week, bs = "cc", k = 5) + lag1 + 
##     lag2
## 
## Parametric Terms:
##                      df      F  p-value
## Year_fct              4  2.543   0.0403
## StationCode           3  0.406   0.7490
## lag1                  1 82.411  < 2e-16
## lag2                  1  5.407   0.0209
## Year_fct:StationCode 12  5.961 4.38e-09
## 
## Approximate significance of smooth terms:
##                            edf Ref.df      F  p-value
## s(Flow):StationCodeRD22 1.0000 1.0000  0.003    0.953
## s(Flow):StationCodeSTTD 0.9546 1.1487  0.440    0.640
## s(Flow):StationCodeLIB  1.0000 1.0000  0.000    0.999
## s(Flow):StationCodeRVB  1.0000 1.0000  0.000    0.998
## s(Flow):Year_fct2015    1.3234 1.5644  0.065    0.926
## s(Flow):Year_fct2016    1.0095 1.0189  0.000    0.999
## s(Flow):Year_fct2017    1.0000 1.0000  0.000    0.998
## s(Flow):Year_fct2018    1.0000 1.0000  0.000    0.998
## s(Flow):Year_fct2019    1.0001 1.0003  0.000    1.000
## s(Week)                 2.5102 3.0000 10.067 2.17e-07
# Extract elements from ANOVA table for export
# Parametric terms
df_an_gam_ptable <- an_gam_sflow_lag2$pTerms.table %>% as_tibble(rownames = "Term")

# Smooth terms
df_an_gam_stable <- an_gam_sflow_lag2$s.table %>% 
  as_tibble(rownames = "Term") %>% 
  rename(df = edf) %>% 
  select(-Ref.df)

# Combine and format for export
df_an_gam_sflow_lag2 <- 
  bind_rows(df_an_gam_ptable, df_an_gam_stable) %>% 
  transmute(
    Term,
    across(
      c("df", "F"),
      ~ if_else(
        abs(.x) < 0.1,
        formatC(signif(.x, 4), digits = 3, format = "e"),
        formatC(signif(.x, 4), digits = 4, format = "fg", flag = "#")
      )
    ),
    p_value = if_else(
      `p-value` < 0.001, 
      "< 0.001", 
      formatC(`p-value`, digits = 3, format = "f")
    )
  ) %>% 
  mutate(across(c("df", "F", "p_value"), ~ paste0(.x, "##")))

Effect of Flow by Station

# Calculate min and max flows for each station to narrow down x-axis in the plot
df_chla_flow_sta_summ <- df_chla_c %>% 
  summarize(
    Flow_min = min(Flow),
    Flow_max = max(Flow),
    .by = c(StationCode)
  ) %>% 
  mutate(
    Flow_buffer = (Flow_max - Flow_min) * 0.05,
    Flow_min = Flow_min - Flow_buffer,
    Flow_max = Flow_max + Flow_buffer
  )

# Calculate effects of flow on chlorophyll for each station holding the
  # non-focal variables constant - marginal effects/adjusted predictions
df_gam_flow_sta_eff <- 
  as.data.frame(
    ggemmeans(m_gam_sflow_lag2, terms = c("Flow", "StationCode")),
    terms_to_colnames = TRUE
  ) %>% 
  as_tibble() %>% 
  # Narrow down range of flow values for each station
  left_join(df_chla_flow_sta_summ, by = join_by(StationCode)) %>% 
  filter(Flow >= Flow_min & Flow <= Flow_max) %>% 
  transmute(
    StationCode,
    Flow,
    # Back calculate model fits and confidence levels
    across(c("predicted", "conf.low", "conf.high"), ~ exp(.x) / 1000)
  )

# Create effects plot
plt_gam_flow_sta_eff <- df_gam_flow_sta_eff %>% 
  ggplot(aes(x = Flow, y = predicted)) +
  geom_point(
    data = df_chla_c,
    aes(y = Chla, color = Year_fct),
    alpha = 0.6
  ) +
  geom_line(linewidth = 1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.25) +
  facet_wrap(vars(StationCode), scales = "free") +
  theme_bw() +
  labs(
    x = "Flow (cfs)",
    y = expression(Chlorophyll~Fluoresence~(mu*g~L^{-1}))
  ) +
  scale_x_continuous(breaks = breaks_extended(6)) +
  scale_color_viridis_d(name = "Year", option = "C")

plt_gam_flow_sta_eff

Effect of Flow by Year

# Calculate min and max flows for each station to narrow down x-axis in the plot
df_chla_flow_yr_summ <- df_chla_c %>% 
  summarize(
    Flow_min = min(Flow),
    Flow_max = max(Flow),
    .by = c(Year_fct)
  ) %>% 
  mutate(
    Flow_buffer = (Flow_max - Flow_min) * 0.05,
    Flow_min = Flow_min - Flow_buffer,
    Flow_max = Flow_max + Flow_buffer
  )

# Calculate effects of flow on chlorophyll for each year holding the
  # non-focal variables constant - marginal effects/adjusted predictions
df_gam_flow_yr_eff <- 
  as.data.frame(
    ggemmeans(m_gam_sflow_lag2, terms = c("Flow", "Year_fct")),
    terms_to_colnames = TRUE
  ) %>% 
  as_tibble() %>% 
  # Narrow down range of flow values for each station
  left_join(df_chla_flow_yr_summ, by = join_by(Year_fct)) %>% 
  filter(Flow >= Flow_min & Flow <= Flow_max) %>% 
  transmute(
    Year_fct,
    Flow,
    # Back calculate model fits and confidence levels
    across(c("predicted", "conf.low", "conf.high"), ~ exp(.x) / 1000)
  )

# Create effects plot
plt_gam_flow_yr_eff <- df_gam_flow_yr_eff %>% 
  ggplot(aes(x = Flow, y = predicted)) +
  geom_point(
    data = df_chla_c,
    aes(y = Chla, color = StationCode),
    alpha = 0.6
  ) +
  geom_line(linewidth = 1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.25) +
  facet_wrap(vars(Year_fct), scales = "free") +
  theme_bw() +
  labs(
    x = "Flow (cfs)",
    y = expression(Chlorophyll~Fluoresence~(mu*g~L^{-1}))
  ) +
  scale_x_continuous(breaks = breaks_extended(6)) +
  scale_color_viridis_d(name = "Station", option = "C") +
  theme(
    legend.margin = margin(0, 0, 0, 0),
    legend.position = c(0.8, 0.3)
  )

plt_gam_flow_yr_eff

Station by Year Contrasts

# Estimated marginal means for station by year
em_gam_sta_yr <- emmeans(m_gam_sflow_lag2, ~ StationCode | Year_fct)

# Tukey post-hoc contrasts
pairs(em_gam_sta_yr)
## Year_fct = 2015:
##  contrast    estimate    SE  df t.ratio p.value
##  RD22 - STTD    3.922 4.145 240   0.946  0.7799
##  RD22 - LIB    -0.492 0.676 240  -0.729  0.8856
##  RD22 - RVB    -0.553 0.639 240  -0.865  0.8228
##  STTD - LIB    -4.414 4.145 240  -1.065  0.7113
##  STTD - RVB    -4.475 4.193 240  -1.067  0.7098
##  LIB - RVB     -0.061 0.798 240  -0.076  0.9998
## 
## Year_fct = 2016:
##  contrast    estimate    SE  df t.ratio p.value
##  RD22 - STTD    3.398 4.140 240   0.821  0.8446
##  RD22 - LIB    -1.323 0.679 240  -1.949  0.2104
##  RD22 - RVB    -1.544 0.721 240  -2.139  0.1437
##  STTD - LIB    -4.721 4.160 240  -1.135  0.6684
##  STTD - RVB    -4.941 4.163 240  -1.187  0.6356
##  LIB - RVB     -0.221 0.857 240  -0.257  0.9940
## 
## Year_fct = 2017:
##  contrast    estimate    SE  df t.ratio p.value
##  RD22 - STTD    3.689 4.136 240   0.892  0.8092
##  RD22 - LIB    -0.849 0.635 240  -1.337  0.5404
##  RD22 - RVB    -0.633 0.587 240  -1.080  0.7023
##  STTD - LIB    -4.538 4.147 240  -1.094  0.6933
##  STTD - RVB    -4.322 4.128 240  -1.047  0.7220
##  LIB - RVB      0.216 0.600 240   0.360  0.9840
## 
## Year_fct = 2018:
##  contrast    estimate    SE  df t.ratio p.value
##  RD22 - STTD    4.030 4.141 240   0.973  0.7648
##  RD22 - LIB     0.478 0.598 240   0.798  0.8553
##  RD22 - RVB    -0.601 0.480 240  -1.252  0.5942
##  STTD - LIB    -3.553 4.147 240  -0.857  0.8270
##  STTD - RVB    -4.632 4.127 240  -1.122  0.6761
##  LIB - RVB     -1.079 0.504 240  -2.143  0.1427
## 
## Year_fct = 2019:
##  contrast    estimate    SE  df t.ratio p.value
##  RD22 - STTD    4.449 4.118 240   1.080  0.7018
##  RD22 - LIB    -0.352 0.594 240  -0.592  0.9345
##  RD22 - RVB     0.119 0.488 240   0.243  0.9949
##  STTD - LIB    -4.801 4.127 240  -1.163  0.6505
##  STTD - RVB    -4.330 4.090 240  -1.059  0.7150
##  LIB - RVB      0.470 0.521 240   0.904  0.8029
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
# Create table of contrasts and convert it to a tibble for plot
df_gam_sta_yr <- em_gam_sta_yr %>% 
  cld(sort = FALSE, Letters = letters) %>% 
  as_tibble() %>% 
  mutate(
    group = str_remove_all(.group, fixed(" ")),
    # back transform log-transformed results
    across(c(emmean, lower.CL, upper.CL), ~ exp(.x) / 1000)
  ) %>% 
  # Add min and max values of observed data to the Tukey post-hoc results and
    # calculate vertical positioning of letters
  left_join(
    df_chla_c %>% 
      summarize(
        max_val = max(Chla),
        min_val = min(Chla),
        .by = Year_fct
    ), 
    by = join_by(Year_fct)
  ) %>% 
  mutate(max_val = if_else(upper.CL > max_val, upper.CL, max_val)) %>% 
  group_by(Year_fct) %>% 
  mutate(max_val = max(max_val)) %>% 
  ungroup() %>% 
  mutate(y_pos = max_val + (max_val - min_val) / 10) %>% 
  select(
    StationCode,
    Year_fct,
    emmean,
    lower.CL,
    upper.CL,
    group,
    y_pos
  )

# Create boxplot showing Tukey post-hoc results
plt_gam_sta_yr <- df_gam_sta_yr %>% 
  ggplot(
    aes(
      x = StationCode,
      y = emmean,
      ymin = lower.CL,
      ymax = upper.CL
    )
  ) +
  geom_boxplot(
    data = df_chla_c,
    aes(x = StationCode, y = Chla),
    inherit.aes = FALSE
  ) +
  geom_crossbar(color = "grey82", fill = "grey", alpha = 0.7, linewidth = 0.1) +
  geom_point(color = "red") +
  geom_text(aes(y = y_pos, label = group), size = 3.5) +
  facet_wrap(vars(Year_fct), scales = "free_y") +
  xlab("Station") +
  ylab(expression(Chlorophyll~Fluoresence~(mu*g~L^{-1}))) +
  theme_bw()

plt_gam_sta_yr

Year by Station Contrasts

# Estimated marginal means for year by station
em_gam_yr_sta <- emmeans(m_gam_sflow_lag2, ~ Year_fct | StationCode)

# Tukey post-hoc contrasts
pairs(em_gam_yr_sta)
## StationCode = RD22:
##  contrast                    estimate    SE  df t.ratio p.value
##  Year_fct2015 - Year_fct2016   0.4746 0.333 240   1.427  0.6108
##  Year_fct2015 - Year_fct2017   0.2222 0.312 240   0.713  0.9533
##  Year_fct2015 - Year_fct2018   0.1650 0.305 240   0.541  0.9829
##  Year_fct2015 - Year_fct2019  -0.0565 0.303 240  -0.186  0.9997
##  Year_fct2016 - Year_fct2017  -0.2524 0.193 240  -1.306  0.6875
##  Year_fct2016 - Year_fct2018  -0.3096 0.186 240  -1.661  0.4604
##  Year_fct2016 - Year_fct2019  -0.5311 0.188 240  -2.820  0.0412
##  Year_fct2017 - Year_fct2018  -0.0572 0.142 240  -0.404  0.9944
##  Year_fct2017 - Year_fct2019  -0.2787 0.144 240  -1.935  0.3017
##  Year_fct2018 - Year_fct2019  -0.2215 0.134 240  -1.648  0.4685
## 
## StationCode = STTD:
##  contrast                    estimate    SE  df t.ratio p.value
##  Year_fct2015 - Year_fct2016  -0.0493 0.345 240  -0.143  0.9999
##  Year_fct2015 - Year_fct2017  -0.0111 0.332 240  -0.034  1.0000
##  Year_fct2015 - Year_fct2018   0.2736 0.326 240   0.839  0.9181
##  Year_fct2015 - Year_fct2019   0.4706 0.331 240   1.424  0.6129
##  Year_fct2016 - Year_fct2017   0.0382 0.207 240   0.184  0.9997
##  Year_fct2016 - Year_fct2018   0.3229 0.198 240   1.632  0.4786
##  Year_fct2016 - Year_fct2019   0.5199 0.205 240   2.536  0.0861
##  Year_fct2017 - Year_fct2018   0.2848 0.168 240   1.692  0.4411
##  Year_fct2017 - Year_fct2019   0.4817 0.176 240   2.743  0.0507
##  Year_fct2018 - Year_fct2019   0.1970 0.150 240   1.316  0.6813
## 
## StationCode = LIB:
##  contrast                    estimate    SE  df t.ratio p.value
##  Year_fct2015 - Year_fct2016  -0.3561 0.585 240  -0.609  0.9737
##  Year_fct2015 - Year_fct2017  -0.1351 0.529 240  -0.256  0.9991
##  Year_fct2015 - Year_fct2018   1.1348 0.534 240   2.126  0.2124
##  Year_fct2015 - Year_fct2019   0.0840 0.530 240   0.158  0.9999
##  Year_fct2016 - Year_fct2017   0.2210 0.293 240   0.755  0.9429
##  Year_fct2016 - Year_fct2018   1.4909 0.333 240   4.481  0.0001
##  Year_fct2016 - Year_fct2019   0.4401 0.314 240   1.400  0.6284
##  Year_fct2017 - Year_fct2018   1.2699 0.215 240   5.914  <.0001
##  Year_fct2017 - Year_fct2019   0.2190 0.202 240   1.084  0.8148
##  Year_fct2018 - Year_fct2019  -1.0508 0.213 240  -4.923  <.0001
## 
## StationCode = RVB:
##  contrast                    estimate    SE  df t.ratio p.value
##  Year_fct2015 - Year_fct2016  -0.5157 0.559 240  -0.922  0.8882
##  Year_fct2015 - Year_fct2017   0.1420 0.437 240   0.325  0.9976
##  Year_fct2015 - Year_fct2018   0.1168 0.359 240   0.325  0.9976
##  Year_fct2015 - Year_fct2019   0.6154 0.376 240   1.635  0.4763
##  Year_fct2016 - Year_fct2017   0.6577 0.563 240   1.168  0.7694
##  Year_fct2016 - Year_fct2018   0.6325 0.506 240   1.250  0.7220
##  Year_fct2016 - Year_fct2019   1.1311 0.518 240   2.185  0.1890
##  Year_fct2017 - Year_fct2018  -0.0252 0.372 240  -0.068  1.0000
##  Year_fct2017 - Year_fct2019   0.4734 0.387 240   1.225  0.7370
##  Year_fct2018 - Year_fct2019   0.4986 0.293 240   1.704  0.4332
## 
## P value adjustment: tukey method for comparing a family of 5 estimates
# Create table of contrasts and convert it to a tibble for plot
df_gam_yr_sta <- em_gam_yr_sta %>% 
  cld(sort = FALSE, Letters = letters) %>% 
  as_tibble() %>% 
  mutate(
    group = str_remove_all(.group, fixed(" ")),
    # back transform log-transformed results
    across(c(emmean, lower.CL, upper.CL), ~ exp(.x) / 1000)
  ) %>% 
  # Add min and max values of observed data to the Tukey post-hoc results and
    # calculate vertical positioning of letters
  left_join(
    df_chla_c %>% 
      summarize(
        max_val = max(Chla),
        min_val = min(Chla),
        .by = StationCode
    ), 
    by = join_by(StationCode)
  ) %>% 
  mutate(max_val = if_else(upper.CL > max_val, upper.CL, max_val)) %>% 
  group_by(StationCode) %>% 
  mutate(max_val = max(max_val)) %>% 
  ungroup() %>% 
  mutate(y_pos = max_val + (max_val - min_val) / 10) %>% 
  select(
    StationCode,
    Year_fct,
    emmean,
    lower.CL,
    upper.CL,
    group,
    y_pos
  )

# Create boxplot showing Tukey post-hoc results
plt_gam_yr_sta <- df_gam_yr_sta %>% 
  ggplot(
    aes(
      x = Year_fct,
      y = emmean,
      ymin = lower.CL,
      ymax = upper.CL
    )
  ) +
  geom_boxplot(
    data = df_chla_c,
    aes(x = Year_fct, y = Chla),
    inherit.aes = FALSE
  ) +
  geom_crossbar(color = "grey82", fill = "grey", alpha = 0.7, linewidth = 0.1) +
  geom_point(color = "red") +
  geom_text(aes(y = y_pos, label = group), size = 3.5) +
  facet_wrap(vars(StationCode), scales = "free_y") +
  xlab("Year") +
  ylab(expression(Chlorophyll~Fluoresence~(mu*g~L^{-1}))) +
  theme_bw()

plt_gam_yr_sta

s(Week) Figure

plt_gam_s_week <- m_gam_sflow_lag2 %>% 
  draw(select = 10, residuals = TRUE, rug = FALSE) +
  theme_bw() +
  ggtitle(NULL)

plt_gam_s_week

Export Results

# Define file path for figures for the manuscript
fp_figures <- here("manuscript_synthesis/results/figures")

# Export effects plot of flow by station
ggsave(
  file.path(fp_figures, "chl_flow_effects_plot_by_station.jpg"),
  plot = plt_gam_flow_sta_eff,
  width = 6.5,
  height = 5,
  units = "in"
)

# Export effects plot of flow by year
ggsave(
  file.path(fp_figures, "chl_flow_effects_plot_by_year.jpg"),
  plot = plt_gam_flow_yr_eff,
  width = 7,
  height = 5,
  units = "in"
)

# Export boxplot of station contrasts by year
ggsave(
  file.path(fp_figures, "chl_station_contrasts_by_year.jpg"),
  plot = plt_gam_sta_yr,
  width = 7,
  height = 5,
  units = "in"
)

# Export boxplot of station contrasts by year
ggsave(
  file.path(fp_figures, "chl_year_contrasts_by_station.jpg"),
  plot = plt_gam_yr_sta,
  width = 6.5,
  height = 5,
  units = "in"
)

# Export effects plot of smooth of Week
ggsave(
  file.path(fp_figures, "chl_smooth_week.jpg"),
  plot = plt_gam_s_week,
  width = 4.5,
  height = 3.5,
  units = "in"
)

# Export ANOVA table for GAM
df_an_gam_sflow_lag2 %>% write_csv(here("manuscript_synthesis/results/tables/chl_GAM_anova.csv"))