Purpose

Explore the relationship between continuous chlorophyll data and the modelled percent flow pulse water to see if should be included in the NDFS synthesis manuscript.

Global code and functions

# Load packages
library(tidyverse)
library(scales)
library(knitr)
library(here)
library(conflicted)

# Declare package conflict preferences 
conflicts_prefer(dplyr::filter())

Display current versions of R and packages used for this analysis:

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.2.3 (2023-03-15 ucrt)
##  os       Windows 10 x64 (build 19045)
##  system   x86_64, mingw32
##  ui       RTerm
##  language (EN)
##  collate  English_United States.utf8
##  ctype    English_United States.utf8
##  tz       America/Los_Angeles
##  date     2024-02-28
##  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version date (UTC) lib source
##  bslib         0.4.2   2022-12-16 [1] CRAN (R 4.2.2)
##  cachem        1.0.8   2023-05-01 [1] CRAN (R 4.2.3)
##  callr         3.7.3   2022-11-02 [1] CRAN (R 4.2.2)
##  cli           3.6.2   2023-12-11 [1] CRAN (R 4.2.3)
##  colorspace    2.1-0   2023-01-23 [1] CRAN (R 4.2.2)
##  conflicted  * 1.2.0   2023-02-01 [1] CRAN (R 4.2.2)
##  crayon        1.5.2   2022-09-29 [1] CRAN (R 4.2.1)
##  devtools      2.4.5   2022-10-11 [1] CRAN (R 4.2.1)
##  digest        0.6.33  2023-07-07 [1] CRAN (R 4.2.3)
##  dplyr       * 1.1.4   2023-11-17 [1] CRAN (R 4.2.3)
##  ellipsis      0.3.2   2021-04-29 [1] CRAN (R 4.2.1)
##  evaluate      0.21    2023-05-05 [1] CRAN (R 4.2.3)
##  fansi         1.0.6   2023-12-08 [1] CRAN (R 4.2.3)
##  fastmap       1.1.1   2023-02-24 [1] CRAN (R 4.2.2)
##  forcats     * 1.0.0   2023-01-29 [1] CRAN (R 4.2.2)
##  fs            1.6.3   2023-07-20 [1] CRAN (R 4.2.3)
##  generics      0.1.3   2022-07-05 [1] CRAN (R 4.2.1)
##  ggplot2     * 3.4.3   2023-08-14 [1] CRAN (R 4.2.3)
##  glue          1.7.0   2024-01-09 [1] CRAN (R 4.2.3)
##  gtable        0.3.4   2023-08-21 [1] CRAN (R 4.2.3)
##  here        * 1.0.1   2020-12-13 [1] CRAN (R 4.2.1)
##  hms           1.1.3   2023-03-21 [1] CRAN (R 4.2.3)
##  htmltools     0.5.5   2023-03-23 [1] CRAN (R 4.2.3)
##  htmlwidgets   1.6.2   2023-03-17 [1] CRAN (R 4.2.3)
##  httpuv        1.6.9   2023-02-14 [1] CRAN (R 4.2.2)
##  jquerylib     0.1.4   2021-04-26 [1] CRAN (R 4.2.1)
##  jsonlite      1.8.7   2023-06-29 [1] CRAN (R 4.2.3)
##  knitr       * 1.42    2023-01-25 [1] CRAN (R 4.2.2)
##  later         1.3.0   2021-08-18 [1] CRAN (R 4.2.1)
##  lifecycle     1.0.4   2023-11-07 [1] CRAN (R 4.2.3)
##  lubridate   * 1.9.3   2023-09-27 [1] CRAN (R 4.2.3)
##  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.2.1)
##  memoise       2.0.1   2021-11-26 [1] CRAN (R 4.2.1)
##  mime          0.12    2021-09-28 [1] CRAN (R 4.2.0)
##  miniUI        0.1.1.1 2018-05-18 [1] CRAN (R 4.2.1)
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 4.2.1)
##  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.2.3)
##  pkgbuild      1.4.2   2023-06-26 [1] CRAN (R 4.2.3)
##  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.2.1)
##  pkgload       1.3.2.1 2023-07-08 [1] CRAN (R 4.2.3)
##  prettyunits   1.2.0   2023-09-24 [1] CRAN (R 4.2.3)
##  processx      3.8.2   2023-06-30 [1] CRAN (R 4.2.3)
##  profvis       0.3.7   2020-11-02 [1] CRAN (R 4.2.1)
##  promises      1.2.0.1 2021-02-11 [1] CRAN (R 4.2.1)
##  ps            1.7.5   2023-04-18 [1] CRAN (R 4.2.3)
##  purrr       * 1.0.2   2023-08-10 [1] CRAN (R 4.2.3)
##  R6            2.5.1   2021-08-19 [1] CRAN (R 4.2.1)
##  Rcpp          1.0.11  2023-07-06 [1] CRAN (R 4.2.3)
##  readr       * 2.1.5   2024-01-10 [1] CRAN (R 4.2.3)
##  remotes       2.4.2   2021-11-30 [1] CRAN (R 4.2.1)
##  rlang         1.1.3   2024-01-10 [1] CRAN (R 4.2.3)
##  rmarkdown     2.21    2023-03-26 [1] CRAN (R 4.2.3)
##  rprojroot     2.0.3   2022-04-02 [1] CRAN (R 4.2.1)
##  rstudioapi    0.14    2022-08-22 [1] CRAN (R 4.2.1)
##  sass          0.4.6   2023-05-03 [1] CRAN (R 4.2.3)
##  scales      * 1.2.1   2022-08-20 [1] CRAN (R 4.2.1)
##  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.2.1)
##  shiny         1.7.4   2022-12-15 [1] CRAN (R 4.2.2)
##  stringi       1.8.3   2023-12-11 [1] CRAN (R 4.2.3)
##  stringr     * 1.5.1   2023-11-14 [1] CRAN (R 4.2.3)
##  tibble      * 3.2.1   2023-03-20 [1] CRAN (R 4.2.3)
##  tidyr       * 1.3.1   2024-01-24 [1] CRAN (R 4.2.3)
##  tidyselect    1.2.0   2022-10-10 [1] CRAN (R 4.2.1)
##  tidyverse   * 2.0.0   2023-02-22 [1] CRAN (R 4.2.2)
##  timechange    0.3.0   2024-01-18 [1] CRAN (R 4.2.3)
##  tzdb          0.4.0   2023-05-12 [1] CRAN (R 4.2.3)
##  urlchecker    1.0.1   2021-11-30 [1] CRAN (R 4.2.1)
##  usethis       2.1.6   2022-05-25 [1] CRAN (R 4.2.1)
##  utf8          1.2.3   2023-01-31 [1] CRAN (R 4.2.2)
##  vctrs         0.6.5   2023-12-01 [1] CRAN (R 4.2.3)
##  withr         3.0.0   2024-01-16 [1] CRAN (R 4.2.3)
##  xfun          0.39    2023-04-20 [1] CRAN (R 4.2.3)
##  xtable        1.8-4   2019-04-21 [1] CRAN (R 4.2.1)
##  yaml          2.3.7   2023-01-23 [1] CRAN (R 4.2.2)
## 
##  [1] C:/R/win-library/4.2
##  [2] C:/Program Files/R/R-4.2.3/library
## 
## ──────────────────────────────────────────────────────────────────────────────

Import Data

# Define file path for processed data
fp_data <- here("manuscript_synthesis/data/processed")

# Import daily average water quality data
df_wq <- readRDS(file.path(fp_data, "wq_daily_avg_2013-2019.rds"))
  
# Import daily average percent flow pulse water data
df_pfp <- readRDS(file.path(fp_data, "perc_flow_pulse_daily_avg_2016-2019.rds"))

Prepare Data

# Create a vector for the factor order of StationCode
sta_order <- c("LIS", "STTD", "LIB", "RYI", "RVB")

# Prepare chlorophyll and percent flow pulse water data for exploration and analysis
df_chla_c <- df_wq %>% 
  select(StationCode, Date, Chla) %>% 
  drop_na(Chla) %>% 
  # Join flow data to chlorophyll data
  left_join(df_pfp, by = join_by(StationCode, Date)) %>% 
  # Remove all NA flow values
  drop_na(PercFlowPulseAvg) %>% 
  mutate(
    # Add Year variable
    Year = year(Date),
    # Apply factor order to StationCode
    StationCode = factor(StationCode, levels = sta_order),
    # Convert percent flow pulse water values to proportions so they scale
      # correctly with scales::label_percent()
    PercFlowPulseAvg = PercFlowPulseAvg / 100
  ) %>% 
  arrange(StationCode, Date)

Explore sample counts by Station

df_chla_c %>% 
  summarize(
    min_date = min(Date),
    max_date = max(Date),
    num_samples = n(),
    .by = c(StationCode, Year)
  ) %>% 
  arrange(StationCode, Year) %>% 
  kable()
StationCode Year min_date max_date num_samples
LIS 2016 2016-07-15 2016-09-16 64
LIS 2017 2017-09-09 2017-11-03 47
LIS 2018 2018-08-28 2018-11-11 76
LIS 2019 2019-08-27 2019-11-06 72
STTD 2016 2016-07-17 2016-09-16 62
STTD 2017 2017-09-20 2017-09-25 6
STTD 2018 2018-08-30 2018-10-15 47
STTD 2019 2019-08-29 2019-11-06 70
LIB 2016 2016-07-22 2016-09-16 57
LIB 2017 2017-10-01 2017-10-23 23
LIB 2018 2018-09-04 2018-11-11 69
LIB 2019 2019-09-05 2019-11-06 56
RYI 2016 2016-07-22 2016-09-16 57
RYI 2018 2018-09-06 2018-11-11 67
RYI 2019 2019-09-02 2019-11-06 66
RVB 2016 2016-07-23 2016-09-16 47
RVB 2017 2017-10-10 2017-10-18 9
RVB 2018 2018-09-07 2018-11-11 66
RVB 2019 2019-09-03 2019-11-06 65

Except for RYI missing data for 2017 and RVB having limited data for 2017, it looks like we pretty good data coverage.

Plots

Let’s explore the data with some plots. First, lets plot the data in scatterplots of chlorophyll and percent flow pulse water facetted by Station and grouping all years together.

df_chla_c %>% 
  ggplot(aes(x = PercFlowPulseAvg, y = Chla)) +
  geom_point() +
  geom_smooth(formula = "y ~ x") +
  facet_wrap(vars(StationCode), scales = "free") +
  theme_bw() +
  ylab("Chlorophyll (ug/L)") +
  scale_x_continuous(name = "Percent Flow Pulse Water", labels = label_percent())

There’s some strange patterns in the data. Let’s break these scatterplots apart by year to see if they can be explained by annual differences.

df_chla_c %>% 
  ggplot(aes(x = PercFlowPulseAvg, y = Chla)) +
  geom_point() +
  geom_smooth(formula = "y ~ x") +
  facet_wrap(
    vars(StationCode, Year),
    ncol = 4,
    scales = "free",
    labeller = labeller(.multi_line = FALSE)
  ) +
  theme_bw() +
  ylab("Chlorophyll (ug/L)") +
  scale_x_continuous(name = "Percent Flow Pulse Water", labels = label_percent()) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Hmmm, a lot of these relationships look complicated. I’m not sure we’ll try using the daily average percent flow pulse water as a continuous predictor to model chlorophyll concentrations.