Purpose

Process the data for the WQ parameters for the long-term WQ publication. Parameters include: Water Temperature, Salinity, Secchi depth, Dissolved Ammonia, Dissolved Nitrate + Nitrite, Dissolved Ortho-phosphate, and Chlorophyll. Data is from the discretewq EDI data package, version 731.7.

Global code and functions

# Load packages
library(tidyverse)
library(dtplyr)
library(hms)
library(scales)
# Make sure we are using `deltamapr` version 1.0.0, commit d0a6f9c22aa074f906176e99a0ed70f97f26fffd
# install.packages("devtools")
# devtools::install_github("InteragencyEcologicalProgram/deltamapr", ref = "d0a6f9c22aa074f906176e99a0ed70f97f26fffd")
library(deltamapr)
library(sf)
library(leaflet)
library(here)
library(contentid)
library(qs)
library(conflicted)

# Source global data processing functions
source(here("src/data_processing/global_data_proc_func.R"))
# Declare package conflict preferences
conflicts_prefer(dplyr::filter(), hms::hms())
## [conflicted] Will prefer dplyr::filter over any other package.
## [conflicted] Will prefer hms::hms over any other package.
# Check if we are in the correct working directory
i_am("src/data_processing/process_data_wq_nutr_chla.Rmd")
## here() starts at C:/Repositories/04_IEP_Org/WQ-LT-Publication
# Run session info to display package versions
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.2.3 (2023-03-15 ucrt)
##  os       Windows 10 x64 (build 19044)
##  system   x86_64, mingw32
##  ui       RTerm
##  language (EN)
##  collate  English_United States.utf8
##  ctype    English_United States.utf8
##  tz       America/Los_Angeles
##  date     2024-01-03
##  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  ! package       * version date (UTC) lib source
##    askpass         1.2.0   2023-09-03 [1] CRAN (R 4.2.3)
##    bslib           0.4.2   2022-12-16 [1] CRAN (R 4.2.2)
##    cachem          1.0.8   2023-05-01 [1] CRAN (R 4.2.3)
##    callr           3.7.3   2022-11-02 [1] CRAN (R 4.2.2)
##    class           7.3-21  2023-01-23 [2] CRAN (R 4.2.3)
##    classInt        0.4-9   2023-02-28 [1] CRAN (R 4.2.2)
##    cli             3.6.1   2023-03-23 [1] CRAN (R 4.2.3)
##    colorspace      2.1-0   2023-01-23 [1] CRAN (R 4.2.2)
##    conflicted    * 1.2.0   2023-02-01 [1] CRAN (R 4.2.2)
##    contentid     * 0.0.17  2023-04-21 [1] CRAN (R 4.2.3)
##    crayon          1.5.2   2022-09-29 [1] CRAN (R 4.2.1)
##    crosstalk       1.2.0   2021-11-04 [1] CRAN (R 4.2.1)
##    curl            5.1.0   2023-10-02 [1] CRAN (R 4.2.3)
##    data.table      1.14.8  2023-02-17 [1] CRAN (R 4.2.2)
##    DBI             1.1.3   2022-06-18 [1] CRAN (R 4.2.1)
##    deltamapr     * 1.0.0   2021-06-18 [1] Github (InteragencyEcologicalProgram/deltamapr@d0a6f9c)
##    devtools        2.4.5   2022-10-11 [1] CRAN (R 4.2.1)
##    digest          0.6.33  2023-07-07 [1] CRAN (R 4.2.3)
##    dplyr         * 1.1.3   2023-09-03 [1] CRAN (R 4.2.3)
##    dtplyr        * 1.3.1   2023-03-22 [1] CRAN (R 4.2.3)
##    e1071           1.7-13  2023-02-01 [1] CRAN (R 4.2.2)
##    ellipsis        0.3.2   2021-04-29 [1] CRAN (R 4.2.1)
##    evaluate        0.21    2023-05-05 [1] CRAN (R 4.2.3)
##    fansi           1.0.4   2023-01-22 [1] CRAN (R 4.2.2)
##    fastmap         1.1.1   2023-02-24 [1] CRAN (R 4.2.2)
##    forcats       * 1.0.0   2023-01-29 [1] CRAN (R 4.2.2)
##    fs              1.6.3   2023-07-20 [1] CRAN (R 4.2.3)
##    generics        0.1.3   2022-07-05 [1] CRAN (R 4.2.1)
##    ggplot2       * 3.4.3   2023-08-14 [1] CRAN (R 4.2.3)
##    glue            1.6.2   2022-02-24 [1] CRAN (R 4.2.1)
##    gtable          0.3.4   2023-08-21 [1] CRAN (R 4.2.3)
##    here          * 1.0.1   2020-12-13 [1] CRAN (R 4.2.1)
##    hms           * 1.1.3   2023-03-21 [1] CRAN (R 4.2.3)
##    htmltools       0.5.5   2023-03-23 [1] CRAN (R 4.2.3)
##    htmlwidgets     1.6.2   2023-03-17 [1] CRAN (R 4.2.3)
##    httpuv          1.6.9   2023-02-14 [1] CRAN (R 4.2.2)
##    httr            1.4.7   2023-08-15 [1] CRAN (R 4.2.3)
##    jquerylib       0.1.4   2021-04-26 [1] CRAN (R 4.2.1)
##    jsonlite        1.8.7   2023-06-29 [1] CRAN (R 4.2.3)
##    KernSmooth      2.23-20 2021-05-03 [2] CRAN (R 4.2.3)
##    knitr           1.42    2023-01-25 [1] CRAN (R 4.2.2)
##    later           1.3.0   2021-08-18 [1] CRAN (R 4.2.1)
##    leaflet       * 2.1.2   2023-03-10 [1] CRAN (R 4.2.2)
##    lifecycle       1.0.3   2022-10-07 [1] CRAN (R 4.2.1)
##    lubridate     * 1.9.3   2023-09-27 [1] CRAN (R 4.2.3)
##    magrittr        2.0.3   2022-03-30 [1] CRAN (R 4.2.1)
##    memoise         2.0.1   2021-11-26 [1] CRAN (R 4.2.1)
##    mime            0.12    2021-09-28 [1] CRAN (R 4.2.0)
##    miniUI          0.1.1.1 2018-05-18 [1] CRAN (R 4.2.1)
##    munsell         0.5.0   2018-06-12 [1] CRAN (R 4.2.1)
##    openssl         2.1.1   2023-09-25 [1] CRAN (R 4.2.3)
##    pillar          1.9.0   2023-03-22 [1] CRAN (R 4.2.3)
##    pkgbuild        1.4.2   2023-06-26 [1] CRAN (R 4.2.3)
##    pkgconfig       2.0.3   2019-09-22 [1] CRAN (R 4.2.1)
##    pkgload         1.3.2.1 2023-07-08 [1] CRAN (R 4.2.3)
##    prettyunits     1.2.0   2023-09-24 [1] CRAN (R 4.2.3)
##    processx        3.8.2   2023-06-30 [1] CRAN (R 4.2.3)
##    profvis         0.3.7   2020-11-02 [1] CRAN (R 4.2.1)
##    promises        1.2.0.1 2021-02-11 [1] CRAN (R 4.2.1)
##    proxy           0.4-27  2022-06-09 [1] CRAN (R 4.2.1)
##    ps              1.7.5   2023-04-18 [1] CRAN (R 4.2.3)
##    purrr         * 1.0.2   2023-08-10 [1] CRAN (R 4.2.3)
##    qs            * 0.25.5  2023-02-22 [1] CRAN (R 4.2.2)
##    R6              2.5.1   2021-08-19 [1] CRAN (R 4.2.1)
##    RApiSerialize   0.1.2   2022-08-25 [1] CRAN (R 4.2.1)
##    Rcpp            1.0.11  2023-07-06 [1] CRAN (R 4.2.3)
##  D RcppParallel    5.1.7   2023-02-27 [1] CRAN (R 4.2.3)
##    readr         * 2.1.4   2023-02-10 [1] CRAN (R 4.2.2)
##    remotes         2.4.2   2021-11-30 [1] CRAN (R 4.2.1)
##    rlang           1.1.1   2023-04-28 [1] CRAN (R 4.2.3)
##    rmarkdown       2.21    2023-03-26 [1] CRAN (R 4.2.3)
##    rprojroot       2.0.3   2022-04-02 [1] CRAN (R 4.2.1)
##    rstudioapi      0.14    2022-08-22 [1] CRAN (R 4.2.1)
##    sass            0.4.6   2023-05-03 [1] CRAN (R 4.2.3)
##    scales        * 1.2.1   2022-08-20 [1] CRAN (R 4.2.1)
##    sessioninfo     1.2.2   2021-12-06 [1] CRAN (R 4.2.1)
##    sf            * 1.0-12  2023-03-19 [1] CRAN (R 4.2.3)
##    shiny           1.7.4   2022-12-15 [1] CRAN (R 4.2.2)
##    stringfish      0.15.7  2022-04-13 [1] CRAN (R 4.2.1)
##    stringi         1.7.12  2023-01-11 [1] CRAN (R 4.2.2)
##    stringr       * 1.5.0   2022-12-02 [1] CRAN (R 4.2.2)
##    tibble        * 3.2.1   2023-03-20 [1] CRAN (R 4.2.3)
##    tidyr         * 1.3.0   2023-01-24 [1] CRAN (R 4.2.2)
##    tidyselect      1.2.0   2022-10-10 [1] CRAN (R 4.2.1)
##    tidyverse     * 2.0.0   2023-02-22 [1] CRAN (R 4.2.2)
##    timechange      0.2.0   2023-01-11 [1] CRAN (R 4.2.2)
##    tzdb            0.4.0   2023-05-12 [1] CRAN (R 4.2.3)
##    units           0.8-1   2022-12-10 [1] CRAN (R 4.2.2)
##    urlchecker      1.0.1   2021-11-30 [1] CRAN (R 4.2.1)
##    usethis         2.1.6   2022-05-25 [1] CRAN (R 4.2.1)
##    utf8            1.2.3   2023-01-31 [1] CRAN (R 4.2.2)
##    vctrs           0.6.3   2023-06-14 [1] CRAN (R 4.2.3)
##    withr           2.5.1   2023-09-26 [1] CRAN (R 4.2.3)
##    xfun            0.39    2023-04-20 [1] CRAN (R 4.2.3)
##    xtable          1.8-4   2019-04-21 [1] CRAN (R 4.2.1)
##    yaml            2.3.7   2023-01-23 [1] CRAN (R 4.2.2)
## 
##  [1] C:/R/win-library/4.2
##  [2] C:/Program Files/R/R-4.2.3/library
## 
##  D ── DLL MD5 mismatch, broken installation.
## 
## ──────────────────────────────────────────────────────────────────────────────

Load globally-used data:

# Import region assignments
df_regions <- read_csv(here("data/raw/region_assignments.csv"))
  
# Load Delta shapefile from Brian and only keep SubRegions east of Carquinez Straight
sf_delta <- R_EDSM_Subregions_Mahardja_FLOAT %>% 
  filter(
    !SubRegion %in% c(
      "Carquinez Strait", 
      "Lower Napa River", 
      "San Francisco Bay",
      "San Pablo Bay",
      "South Bay",
      "Upper Napa River" 
    )
  ) %>% 
  select(SubRegion)

# Import year assignments
df_yr_type <- read_csv(here("data/raw/year_assignments.csv")) %>% rename(YearAdj = Year)

# Define years used in the publication
lt_yrs <- c(1975:2021)

# Create data frame that contains all possible combinations of year, season, and region
df_yr_seas_reg <- expand_grid(
  YearAdj = lt_yrs,
  Season = c("Winter", "Spring", "Summer", "Fall"),
  Region = unique(df_regions$Region)
)

# Define the threshold for the number of years of data that a subregion needs to
  # have to be included in the long-term averages
num_yrs_threshold <- round(length(lt_yrs) * 0.75)

Create globally-used functions:

# Filter data so that there is only one sample per station-day by choosing the
# data point closest to noon
filt_daily_dups <- function(df) {
  # Look for any instances when more than 1 data point was collected at a station-day
  df_dups <- df %>%
    count(Source, Station, Date) %>% 
    filter(n > 1) %>% 
    select(-n)
  
  # Fix duplicates
  df_dups_fixed <- df %>%
    inner_join(df_dups, by = c("Source", "Station", "Date")) %>%
    drop_na(Datetime) %>%
    mutate(
      # Create variable for time
      Time = as_hms(Datetime),
      # Calculate difference from noon for each data point for later filtering
      Noon_diff = abs(hms(hours = 12) - Time)
    ) %>%
    # Use dtplyr to speed up operations
    lazy_dt() %>%
    group_by(Station, Date) %>%
    # Select only 1 data point per station and date, choose data closest to noon
    filter(Noon_diff == min(Noon_diff)) %>%
    # When points are equidistant from noon, select earlier point
    filter(Time == min(Time)) %>%
    ungroup() %>%
    # End dtplyr operation
    as_tibble() %>%
    select(-c(Time, Noon_diff))

  # Add back the fixed duplicates
  df %>%
    anti_join(df_dups, by = c("Source", "Station", "Date")) %>%
    bind_rows(df_dups_fixed)
}

# Plot sampling effort by Station
plot_samp_effort_sta <- function(df) {
  df %>%
    count(Station, YearAdj, name = "num_samples") %>%
    mutate(Station = fct_rev(factor(Station))) %>%
    ggplot(aes(x = YearAdj, y = Station, fill = num_samples)) +
    geom_tile() +
    scale_x_continuous(
      limits = c(1974, 2022),
      breaks = breaks_pretty(20), 
      expand = expansion()
    ) +
    scale_fill_viridis_c(name = "Number of Samples") +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
      legend.position = "top"
    )
}

# Plot sampling effort by SubRegion and Season
plot_samp_effort_subreg <- function(df) {
  df %>%
    count(SubRegion, YearAdj, Season, name = "num_samples") %>%
    mutate(
      SubRegion = fct_rev(factor(SubRegion)),
      Season = factor(Season, levels = c("Winter", "Spring", "Summer", "Fall"))
    ) %>% 
    ggplot(aes(x = YearAdj, y = SubRegion, fill = num_samples)) +
    geom_tile() +
    facet_wrap(vars(Season), nrow = 2) +
    scale_x_continuous(
      limits = c(1974, 2022),
      breaks = breaks_pretty(20), 
      expand = expansion(mult = 0.02)
    ) +
    scale_y_discrete(drop = FALSE) +
    scale_fill_viridis_c(name = "Number of Samples") +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
      legend.position = "top"
    )
}

# Flag data points with Z-scores greater than a specified threshold
flag_zscore <- function(df, threshold) {
  df %>%
    mutate(
      tmp_mean = mean(Result),
      tmp_sd = sd(Result),
      Zscore = if_else(
        tmp_sd == 0,
        NA_real_,
        abs((Result - tmp_mean) / tmp_sd)
      ),
      Zscore_flag = case_when(
        is.na(Zscore) ~ FALSE,
        Zscore > threshold ~ TRUE,
        TRUE ~ FALSE
      )
    ) %>%
    select(!starts_with("tmp_"))
}

# Flag data points with modified z-scores greater than a specified threshold
flag_modzscore <- function(df, threshold) {
  df %>%
    mutate(
      tmp_median = median(Result),
      tmp_mad = mad(Result),
      ModZscore = if_else(
        tmp_mad == 0,
        NA_real_,
        abs(0.6745 * (Result - tmp_median) / tmp_mad)
      ),
      ModZscore_flag = case_when(
        is.na(ModZscore) ~ FALSE,
        ModZscore > threshold ~ TRUE,
        TRUE ~ FALSE
      )
    ) %>%
    select(!starts_with("tmp_"))
}

# Flag <RL values with high reporting limits (greater than a specified
  # percentile of the data)
flag_high_rl <- function(df, perc_thresh) {
  threshold <- df %>%
    summarize(quant = quantile(Result, probs = perc_thresh)) %>%
    pull(quant)

  df %>% mutate(HighRL_flag = if_else(Sign == "<" & Result > threshold, TRUE, FALSE))
}

# Replace values below the reporting limit with simulated values between
  # `min_val` and the RL
replace_blw_rl <- function(df, min_val = 0, seed = 1) {
  # Pull out values that are below the RL
  df_blw_rl <- df %>% filter(Sign == "<")

  # Replace below RL values with simulated ones
  withr::with_seed(
    # Set seed for reproducibility
    seed = seed,
    df_blw_rl_sim <- df_blw_rl %>% 
      mutate(Result = round(runif(nrow(df_blw_rl), min = min_val, max = Result), 6))
  )

  # Add simulated values back to main data frame
  df %>% filter(Sign != "<") %>% bind_rows(df_blw_rl_sim)
}

# Calculate seasonal-regional averages of raw data
calc_seas_reg_avg <- function(df) {
  df %>%
    # Calculate monthly mean for each region
    group_by(Month, Season, Region, YearAdj) %>%
    summarize(Result_month_mean = mean(Result), .groups = "drop") %>% 
    # Fill in NAs for data_var for any missing Season, Region, YearAdj
    # combinations to make sure all seasons and regions are represented when
    # averaging
    complete(Season, Region, YearAdj) %>% 
    # Calculate seasonal-regional averages for each year
    group_by(Season, Region, YearAdj) %>%
    summarize(Result = mean(Result_month_mean), .groups = "drop")
}

Import and Prepare Data

# Register a contentid for the WQ data from the discretewq EDI data package
# This only needs to be done once
# register(
#   "https://portal.edirepository.org/nis/dataviewer?packageid=edi.731.7&entityid=6c5f35b1d316e39c8de0bfadfb3c9692"
# )

# Define contentid for the WQ data from the discretewq EDI data package
id_dwq <- "hash://sha256/c5397df66c7c0e407c0bcd422711e3aab2713023a4aa3d24ff80de58a68f0cf9"

# Resolve the contentid for the WQ data from the discretewq EDI data package -
  # storing a local copy for faster import
file_dwq <- resolve(id_dwq, store = TRUE)

# Import WQ data from the discretewq EDI data package from the local copy using
  # its contentid
df_dwq <- read_csv(
  file = file_dwq,
  # Select a subset of columns
  col_select = c(
    Source,
    Station,
    Latitude,
    Longitude,
    Date,
    Datetime,
    Temperature,
    Salinity,
    Secchi,
    contains(c("Chlorophyll", "DissAmmonia", "DissNitrateNitrite", "DissOrthophos"))
  ) 
)
## Rows: 353850 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): Source, Station, Chlorophyll_Sign, DissAmmonia_Sign, DissNitrateNi...
## dbl  (9): Latitude, Longitude, Temperature, Salinity, Secchi, Chlorophyll, D...
## dttm (1): Datetime
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Prepare data for parameters of interest
df_dwq_c <- df_dwq %>% 
  # Convert Datetime to PST
  mutate(Datetime = with_tz(Datetime, tzone = "Etc/GMT+8")) %>% 
  # Remove records without lat-long coordinates
  drop_na(Latitude, Longitude) %>% 
  # Assign SubRegions to the stations
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE) %>%
  st_transform(crs = st_crs(sf_delta)) %>%
  st_join(sf_delta, join = st_intersects) %>%
  # Remove any data outside our subregions of interest
  filter(!is.na(SubRegion)) %>%
  st_drop_geometry() %>% 
  # Add variables for adjusted calendar year, month, and season
    # Adjusted calendar year: December-November, with December of the previous calendar year
    # included with the following year
  mutate(
    Month = month(Date),
    YearAdj = if_else(Month == 12, year(Date) + 1, year(Date)),
    Season = case_when(
      Month %in% 3:5 ~ "Spring",
      Month %in% 6:8 ~ "Summer",
      Month %in% 9:11 ~ "Fall",
      Month %in% c(12, 1, 2) ~ "Winter"
    )
  ) %>% 
  # Restrict data to 1975-2021
  filter(YearAdj %in% lt_yrs)

Temporal Scale of all Surveys

Let’s look at which surveys we can use for the long-term WQ publication. First, we’ll look at the temporal scale of all of the surveys available.

# Number of Years for each survey
df_dwq_c %>% 
  distinct(Source, YearAdj) %>% 
  count(Source, name = "NumYears") %>% 
  arrange(desc(NumYears))
## # A tibble: 16 × 2
##    Source     NumYears
##    <chr>         <int>
##  1 EMP              47
##  2 FMWT             47
##  3 STN              47
##  4 USGS_CAWSC       47
##  5 DJFMP            46
##  6 Suisun           43
##  7 Baystudy         42
##  8 USGS_SFBS        42
##  9 20mm             27
## 10 YBFMP            24
## 11 NCRO             23
## 12 SDO              23
## 13 SKT              20
## 14 SLS              13
## 15 USBR              8
## 16 EDSM              5
# Period of record for each survey
df_dwq_c %>% 
  group_by(Source) %>% 
  summarize(min_date = min(Date), max_date = max(Date)) %>% 
  arrange(min_date)
## # A tibble: 16 × 3
##    Source     min_date   max_date  
##    <chr>      <date>     <date>    
##  1 USGS_CAWSC 1974-12-11 2021-11-30
##  2 EMP        1975-01-07 2021-11-16
##  3 USGS_SFBS  1975-01-15 2021-11-04
##  4 STN        1975-06-30 2021-08-19
##  5 FMWT       1975-09-17 2021-11-16
##  6 DJFMP      1976-05-13 2021-11-29
##  7 Suisun     1979-05-16 2021-11-18
##  8 Baystudy   1980-02-08 2021-11-03
##  9 20mm       1995-04-24 2021-07-16
## 10 SDO        1997-08-04 2021-09-10
## 11 YBFMP      1998-01-19 2021-11-30
## 12 NCRO       1999-03-17 2021-11-30
## 13 SKT        2002-01-07 2021-04-29
## 14 SLS        2009-01-05 2021-03-17
## 15 USBR       2012-05-08 2019-10-22
## 16 EDSM       2016-12-15 2021-11-26

Overall, for all parameters, it looks like all surveys except for SLS, USBR, and EDSM have collected at least 20 years of data. We will assume that these surveys have adequate temporal coverage for the long-term analysis.

# Only include surveys with adequate temporal coverage
df_dwq_lt <- df_dwq_c %>% filter(!Source %in% c("SLS", "USBR", "EDSM"))

All Stations Map

Next, let’s take a look at a map of all stations.

sf_stations <- df_dwq_lt %>% 
  distinct(Source, Station, Latitude, Longitude) %>% 
  # Convert to sf object
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)

# Define color palette for Surveys 
color_pal_survey <- colorFactor(palette = "viridis", domain = sf_stations$Source)

# Create map using leaflet
leaflet() %>% 
  addTiles() %>%
  addCircleMarkers(
    data = sf_stations,
    radius = 5,
    fillColor = ~color_pal_survey(Source),
    fillOpacity = 0.8,
    weight = 0.5,
    color = "black",
    opacity = 1,
    label = paste0("Survey: ", sf_stations$Source, ", Station: ", sf_stations$Station)
  ) %>% 
  addLegend(
    position = "topright",
    pal = color_pal_survey,
    values = sf_stations$Source,
    title = "Survey:"
  )

Some of the stations from the Suisun Marsh survey are located in small backwater channels and dead-end sloughs which represent a much different habitat than the sampling locations from the other surveys which tend to be in larger, open water channel habitat. We’ll keep the stations located in Suisun, Montezuma, and Nurse Sloughs from the Suisun Marsh survey, since they seem to be in the larger channels in the area.

Also, there are a few questionable sampling locations from SKT and YBFMP, but I don’t want to dig too deep with these for now.

df_dwq_lt_filt <- df_dwq_lt %>% 
  filter(!(Source == "Suisun" & !str_detect(Station, "(Suisun\\s)SU|MZ|NS")))

Water Quality Measurements

Next, we’ll process the water quality measurement data: Water Temperature, Salinity, and Secchi depth.

# Create a nested data frame to run processing functions on
ndf_wq_meas <- 
  tibble(
    Parameter = c(
      "Temperature",
      "Salinity",
      "Secchi"
    ),
    df_data = rep(list(df_dwq_lt_filt), 3)
  ) %>% 
  # Prepare data for each Parameter
  mutate(
    df_data = map2(
      df_data,
      Parameter,
      ~ drop_na(.x, all_of(.y)) %>% 
        select(
          Source, 
          Station,
          Latitude,
          Longitude,
          SubRegion,
          YearAdj,
          Month,
          Season,
          Date,
          Datetime,
          all_of(.y)
        ) %>% 
        # Filter data so that there is only one sample per station-day
        filt_daily_dups()
    )
  )

# Make sure there is only one sample per station-day for each parameter
map(ndf_wq_meas$df_data, ~ count(.x, Source, Station, Date) %>% filter(n > 1))
## [[1]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
## 
## [[2]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
## 
## [[3]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
# Unnest the nested data frame into a long format
df_wq_meas_c1 <- ndf_wq_meas %>% 
  mutate(df_data = map2(df_data, Parameter, ~ rename(.x, Result = all_of(.y)))) %>% 
  unnest(df_data)

Temporal Coverage

Now let’s take a closer look at the temporal data coverage for each Station and parameter.

Sampling Effort by Station

# Create sampling effort by station plots for each Parameter and Source
ndf_wq_meas_se_sta_plt <- df_wq_meas_c1 %>% 
  nest(.by = c(Parameter, Source), .key = "df_data") %>% 
  mutate(plt = map(df_data, plot_samp_effort_sta)) %>% 
  nest(.by = Parameter, .key = "ndf_data_source")

Temperature

20mm

Baystudy

DJFMP

EMP

FMWT

NCRO

SDO

SKT

STN

Suisun

USGS_CAWSC

USGS_SFBS

YBFMP

Salinity

20mm

Baystudy

DJFMP

EMP

FMWT

NCRO

SDO

SKT

STN

Suisun

USGS_CAWSC

USGS_SFBS

YBFMP

Secchi

20mm

Baystudy

DJFMP

EMP

FMWT

NCRO

SDO

SKT

STN

Suisun

YBFMP

Remove Sparse Surveys

Salinity data from DJFMP is only available for the past three years and NCRO only sampled Secchi depth for the past four years, so we won’t include these survey-parameter combinations in the analyses. For the USGS-CAWSC survey, only station 11447650 (Sacramento River at Freeport) was sampled on a long-term basis for Water Temperature and Salinity, so we’ll only include this station from the USGS-CAWSC survey.

df_wq_meas_c2 <- df_wq_meas_c1 %>% 
  filter(
    !(Source == "USGS_CAWSC" & !str_detect(Station, "USGS-11447650$")),
    !(Parameter == "Salinity" & Source == "DJFMP"),
    !(Parameter == "Secchi" & Source == "NCRO")
  )

Filter Subregions

Not all of the subregions were sampled consistently from 1975-2021. To make sure that we only include the subregions that were sampled adequately, we will require that a subregion needs to have data for at least 75% of the 47 years between 1975 to 2021 (35 years) for each season.

df_wq_meas_c3 <- df_wq_meas_c2 %>%
  nest(.by = Parameter, .key = "df_data") %>% 
  mutate(
    df_subreg_seas = map(
      df_data,
      ~ distinct(.x, SubRegion, YearAdj, Season) %>%
        count(SubRegion, Season, name = "NumYears") %>%
        group_by(SubRegion) %>%
        filter(min(NumYears) >= num_yrs_threshold) %>%
        ungroup() %>% 
        # make sure each season meets the threshold for each SubRegion
        count(SubRegion) %>%
        filter(n == 4)
    ),
    df_data_filt = map2(
      df_data, df_subreg_seas,
      ~ filter(.x, SubRegion %in% unique(.y$SubRegion))
    )
  ) %>% 
  select(Parameter, df_data_filt) %>% 
  unnest(df_data_filt)

View Results

Let’s take a look at the sampling effort for the remaining subregions for each season after filtering for each water quality measurement parameter.

# Create sampling effort by SubRegion plots for each Parameter
ndf_wq_meas_se_subreg_plt <- df_wq_meas_c3 %>%
  nest(.by = Parameter, .key = "df_data") %>% 
  mutate(plt = map(df_data, plot_samp_effort_subreg))

Temperature

Salinity

Secchi

Remove Outliers

First, we’ll look at the min-max ranges of each of the water quality measurement parameters, to see if there are any obvious outliers to exclude from the data set.

df_wq_meas_c3 %>% 
  summarize(
    min_val = min(Result),
    max_val = max(Result),
    .by = Parameter
  )
## # A tibble: 3 × 3
##   Parameter   min_val max_val
##   <chr>         <dbl>   <dbl>
## 1 Temperature       2   116  
## 2 Salinity          0    44.1
## 3 Secchi            0   457

All water quality measurement parameters have questionable minimum values and Temperature and Salinity have questionable maximum values. Let’s take a closer look at these to see if we should omit them from the data set.

# Truncate data so that it displays better
vars_rm_view <- c("Source", "Latitude", "Longitude", "Month", "Datetime")

df_wq_meas_c3_view <- df_wq_meas_c3 %>% select(!any_of(vars_rm_view))

# Minimum Temperature values
df_wq_meas_c3_view %>% 
  filter(Parameter == "Temperature") %>% 
  slice_min(Result, n = 10)
## # A tibble: 11 × 7
##    Parameter   Station                SubRegion YearAdj Season Date       Result
##    <chr>       <chr>                  <chr>       <dbl> <chr>  <date>      <dbl>
##  1 Temperature EMP S42                Suisun M…    1983 Spring 1983-05-03   2   
##  2 Temperature Suisun MZ1             Suisun M…    1989 Winter 1989-02-16   4   
##  3 Temperature NCRO Paradise Cut      Grant Li…    2018 Winter 2018-02-13   4.68
##  4 Temperature NCRO Middle River at … Grant Li…    2014 Winter 2013-12-12   4.7 
##  5 Temperature Suisun MZ1             Suisun M…    1989 Winter 1988-12-30   5   
##  6 Temperature Suisun SU1             Suisun M…    1986 Winter 1985-12-19   5   
##  7 Temperature Suisun SU4             Suisun M…    1988 Fall   1988-10-30   5   
##  8 Temperature USGS_SFBS 653          Sacramen…    1991 Winter 1991-01-07   5.14
##  9 Temperature USGS_SFBS 657          Sacramen…    1991 Winter 1991-01-07   5.28
## 10 Temperature Suisun NS2             Suisun M…    2013 Winter 2013-01-16   5.3 
## 11 Temperature Suisun SU1             Suisun M…    1989 Winter 1988-12-28   5.3
# The minimum temperature value at S42 in Suisun Marsh looks suspicious.
# Let's look at all the data in that SubRegion in Spring 1983
df_wq_meas_c3_view %>% 
  filter(
    Parameter == "Temperature",
    SubRegion == "Suisun Marsh",
    YearAdj == 1983,
    Season == "Spring"
  ) %>% 
  arrange(Date) %>% 
  print(n = 25)
## # A tibble: 24 × 7
##    Parameter   Station    SubRegion    YearAdj Season Date       Result
##    <chr>       <chr>      <chr>          <dbl> <chr>  <date>      <dbl>
##  1 Temperature Suisun SU1 Suisun Marsh    1983 Spring 1983-03-15   13.9
##  2 Temperature Suisun SU2 Suisun Marsh    1983 Spring 1983-03-15   14.5
##  3 Temperature Suisun SU3 Suisun Marsh    1983 Spring 1983-03-15   14.7
##  4 Temperature Suisun SU4 Suisun Marsh    1983 Spring 1983-03-15   14  
##  5 Temperature Suisun MZ1 Suisun Marsh    1983 Spring 1983-03-16   10.5
##  6 Temperature Suisun MZ2 Suisun Marsh    1983 Spring 1983-03-16   10.5
##  7 Temperature Suisun MZ7 Suisun Marsh    1983 Spring 1983-03-16   10.5
##  8 Temperature Suisun MZ8 Suisun Marsh    1983 Spring 1983-03-16   10.5
##  9 Temperature Suisun SU1 Suisun Marsh    1983 Spring 1983-03-29   17  
## 10 Temperature Suisun MZ1 Suisun Marsh    1983 Spring 1983-04-14   13  
## 11 Temperature Suisun MZ7 Suisun Marsh    1983 Spring 1983-04-14   13  
## 12 Temperature Suisun SU3 Suisun Marsh    1983 Spring 1983-04-14   13  
## 13 Temperature Suisun SU4 Suisun Marsh    1983 Spring 1983-04-14   11  
## 14 Temperature Suisun SU1 Suisun Marsh    1983 Spring 1983-04-15   13.2
## 15 Temperature Suisun SU2 Suisun Marsh    1983 Spring 1983-04-15   14.8
## 16 Temperature EMP S42    Suisun Marsh    1983 Spring 1983-05-03    2  
## 17 Temperature Suisun MZ1 Suisun Marsh    1983 Spring 1983-05-18   18  
## 18 Temperature Suisun MZ2 Suisun Marsh    1983 Spring 1983-05-18   17.5
## 19 Temperature Suisun MZ7 Suisun Marsh    1983 Spring 1983-05-18   17.2
## 20 Temperature Suisun MZ8 Suisun Marsh    1983 Spring 1983-05-18   20.8
## 21 Temperature Suisun SU3 Suisun Marsh    1983 Spring 1983-05-18   19.5
## 22 Temperature Suisun SU4 Suisun Marsh    1983 Spring 1983-05-18   22  
## 23 Temperature Suisun SU1 Suisun Marsh    1983 Spring 1983-05-19   19  
## 24 Temperature Suisun SU2 Suisun Marsh    1983 Spring 1983-05-19   19
# Maximum Temperature values
df_wq_meas_c3_view %>% 
  filter(Parameter == "Temperature") %>% 
  slice_max(Result, n = 10)
## # A tibble: 10 × 7
##    Parameter   Station                SubRegion YearAdj Season Date       Result
##    <chr>       <chr>                  <chr>       <dbl> <chr>  <date>      <dbl>
##  1 Temperature USGS_CAWSC USGS-11447… Middle S…    2017 Spring 2017-03-23  116  
##  2 Temperature SDO lt41               San Joaq…    2006 Summer 2006-07-24   29.7
##  3 Temperature STN 912                San Joaq…    1984 Summer 1984-07-17   29.4
##  4 Temperature SDO lt43               San Joaq…    2006 Summer 2006-07-24   29.3
##  5 Temperature SDO tb                 San Joaq…    2006 Summer 2006-07-24   29.2
##  6 Temperature FMWT 501               Mid Suis…    2002 Fall   2002-09-11   29  
##  7 Temperature STN 912                San Joaq…    2014 Summer 2014-06-30   28.9
##  8 Temperature DJFMP SJ054M           Upper Sa…    2013 Summer 2013-07-03   28.9
##  9 Temperature STN 910                San Joaq…    1984 Summer 1984-07-17   28.9
## 10 Temperature NCRO Holland Cut at H… Old River    2021 Summer 2021-07-14   28.8
# Minimum Salinity values
df_wq_meas_c3_view %>% 
  filter(Parameter == "Salinity") %>% 
  slice_min(Result, n = 10)
## # A tibble: 10 × 7
##    Parameter Station       SubRegion           YearAdj Season Date        Result
##    <chr>     <chr>         <chr>                 <dbl> <chr>  <date>       <dbl>
##  1 Salinity  USGS_SFBS 651 Lower Sacramento R…    1980 Summer 1980-08-05 0      
##  2 Salinity  USGS_SFBS 652 Lower Sacramento R…    1978 Fall   1978-11-09 0      
##  3 Salinity  USGS_SFBS 653 Sacramento River n…    1980 Summer 1980-08-05 0      
##  4 Salinity  USGS_SFBS 655 Sacramento River n…    1980 Summer 1980-08-05 0      
##  5 Salinity  USGS_SFBS 657 Sacramento River n…    1979 Summer 1979-08-14 0      
##  6 Salinity  USGS_SFBS 657 Sacramento River n…    1980 Summer 1980-06-18 0      
##  7 Salinity  USGS_SFBS 657 Sacramento River n…    1980 Summer 1980-07-17 0      
##  8 Salinity  20mm 520      Confluence             2020 Spring 2020-03-16 0.00114
##  9 Salinity  Suisun SU4    Suisun Marsh           1982 Spring 1982-04-13 0.00452
## 10 Salinity  Suisun MZ1    Suisun Marsh           1982 Spring 1982-04-13 0.00510
# Maximum Salinity values
df_wq_meas_c3_view %>% 
  filter(Parameter == "Salinity") %>% 
  slice_max(Result, n = 10)
## # A tibble: 10 × 7
##    Parameter Station     SubRegion              YearAdj Season Date       Result
##    <chr>     <chr>       <chr>                    <dbl> <chr>  <date>      <dbl>
##  1 Salinity  STN 520     Confluence                1991 Summer 1991-07-05   44.1
##  2 Salinity  EMP D12     Lower San Joaquin Riv…    1999 Summer 1999-07-12   25.5
##  3 Salinity  EMP D6      West Suisun Bay           1978 Winter 1977-12-07   24.1
##  4 Salinity  EMP D6      West Suisun Bay           1977 Fall   1977-10-21   24.0
##  5 Salinity  FMWT 410    West Suisun Bay           1977 Fall   1977-11-11   23.9
##  6 Salinity  FMWT 409    West Suisun Bay           1977 Fall   1977-11-10   23.2
##  7 Salinity  EMP D6      West Suisun Bay           1977 Fall   1977-11-17   22.8
##  8 Salinity  USGS_SFBS 7 West Suisun Bay           2014 Winter 2013-12-03   22.7
##  9 Salinity  EMP D6      West Suisun Bay           1977 Summer 1977-06-29   22.6
## 10 Salinity  EMP D6      West Suisun Bay           2014 Fall   2014-10-07   22.4
# The maximum salinity value at STN 520 in the Confluence SubRegion looks suspicious.
# Let's look at all the data in that SubRegion in Summer 1991
df_wq_meas_c3_view %>% 
  filter(
    Parameter == "Salinity",
    SubRegion == "Confluence",
    YearAdj == 1991,
    Season == "Summer"
  ) %>% 
  arrange(Date) %>% 
  print(n = 30)
## # A tibble: 28 × 7
##    Parameter Station      SubRegion  YearAdj Season Date       Result
##    <chr>     <chr>        <chr>        <dbl> <chr>  <date>      <dbl>
##  1 Salinity  USGS_SFBS 2  Confluence    1991 Summer 1991-06-05   2.16
##  2 Salinity  USGS_SFBS 3  Confluence    1991 Summer 1991-06-05   2.6 
##  3 Salinity  EMP D10      Confluence    1991 Summer 1991-06-07   4.72
##  4 Salinity  Baystudy 535 Confluence    1991 Summer 1991-06-11   4.67
##  5 Salinity  STN 513      Confluence    1991 Summer 1991-06-19   3.74
##  6 Salinity  STN 801      Confluence    1991 Summer 1991-06-19   1.98
##  7 Salinity  EMP D10      Confluence    1991 Summer 1991-06-21   6.60
##  8 Salinity  STN 508      Confluence    1991 Summer 1991-06-21   4.20
##  9 Salinity  STN 520      Confluence    1991 Summer 1991-06-21   2.86
## 10 Salinity  STN 513      Confluence    1991 Summer 1991-07-03   5.66
## 11 Salinity  STN 801      Confluence    1991 Summer 1991-07-03   3.00
## 12 Salinity  STN 508      Confluence    1991 Summer 1991-07-05   6.21
## 13 Salinity  STN 520      Confluence    1991 Summer 1991-07-05  44.1 
## 14 Salinity  Baystudy 535 Confluence    1991 Summer 1991-07-09   3.43
## 15 Salinity  EMP D10      Confluence    1991 Summer 1991-07-09   7.10
## 16 Salinity  STN 513      Confluence    1991 Summer 1991-07-17   4.48
## 17 Salinity  STN 801      Confluence    1991 Summer 1991-07-17   1.63
## 18 Salinity  STN 508      Confluence    1991 Summer 1991-07-19   5.01
## 19 Salinity  STN 520      Confluence    1991 Summer 1991-07-19   3.80
## 20 Salinity  STN 801      Confluence    1991 Summer 1991-07-31   4.34
## 21 Salinity  USGS_SFBS 2  Confluence    1991 Summer 1991-08-01   2.18
## 22 Salinity  USGS_SFBS 3  Confluence    1991 Summer 1991-08-01   3.21
## 23 Salinity  STN 508      Confluence    1991 Summer 1991-08-02   5.44
## 24 Salinity  STN 513      Confluence    1991 Summer 1991-08-02   4.75
## 25 Salinity  STN 520      Confluence    1991 Summer 1991-08-02   4.81
## 26 Salinity  Baystudy 535 Confluence    1991 Summer 1991-08-06   4.33
## 27 Salinity  EMP D10      Confluence    1991 Summer 1991-08-06   6.85
## 28 Salinity  EMP D10      Confluence    1991 Summer 1991-08-23   6.05
# Minimum Secchi depth values
df_wq_meas_c3_view %>% 
  filter(Parameter == "Secchi") %>% 
  slice_min(Result, n = 20)
## # A tibble: 20 × 7
##    Parameter Station      SubRegion             YearAdj Season Date       Result
##    <chr>     <chr>        <chr>                   <dbl> <chr>  <date>      <dbl>
##  1 Secchi    FMWT 073     Middle Sacramento Ri…    1995 Winter 1995-01-12   0   
##  2 Secchi    FMWT 601     Mid Suisun Bay           1999 Fall   1999-09-09   0   
##  3 Secchi    FMWT 602     Grizzly Bay              1999 Fall   1999-09-09   0   
##  4 Secchi    FMWT 604     Grizzly Bay              1999 Fall   1999-09-09   0   
##  5 Secchi    FMWT 704     Lower Sacramento Riv…    1995 Spring 1995-03-13   0   
##  6 Secchi    FMWT 705     Lower Sacramento Riv…    1995 Spring 1995-03-13   0   
##  7 Secchi    FMWT 706     Sacramento River nea…    1995 Spring 1995-03-13   0   
##  8 Secchi    FMWT 707     Sacramento River nea…    1995 Spring 1995-03-13   0   
##  9 Secchi    FMWT 708     Sacramento River nea…    1995 Spring 1995-03-13   0   
## 10 Secchi    FMWT 709     Sacramento River nea…    1995 Spring 1995-03-13   0   
## 11 Secchi    FMWT 710     Sacramento River nea…    1995 Spring 1995-03-13   0   
## 12 Secchi    FMWT 711     Sacramento River nea…    1995 Spring 1995-03-13   0   
## 13 Secchi    FMWT 735     Middle Sacramento Ri…    1995 Winter 1995-01-12   0   
## 14 Secchi    FMWT 736     Middle Sacramento Ri…    1995 Winter 1995-01-12   0   
## 15 Secchi    FMWT 919     Lower Mokelumne River    1995 Winter 1995-01-13   0   
## 16 Secchi    FMWT 920     Lower Mokelumne River    1995 Winter 1995-01-13   0   
## 17 Secchi    SKT 801      Confluence               2005 Spring 2005-04-20   0   
## 18 Secchi    Suisun MZ1   Suisun Marsh             1995 Winter 1995-01-21   3   
## 19 Secchi    DJFMP SB018M Confluence               1995 Winter 1995-01-14   4.00
## 20 Secchi    DJFMP SB018N Confluence               1995 Winter 1995-01-14   4.00

The following values are obviously out of range of reasonable limits for the parameter and will be excluded from the data set:

  • Temperature value of 2 collected on 5/3/1983 at S42
  • Temperature value of 116 collected on 3/23/2017 at USGS-11447650
  • Salinity value of 44.1 collected on 7/5/1991 at STN 520
  • Salinity values equal to zero
df_wq_meas_c4 <- df_wq_meas_c3 %>% 
  filter(
    !(Parameter == "Temperature" & Result <= 2),
    !(Parameter == "Temperature" & Result > 30),
    !(Parameter == "Salinity" & Result <= 0),
    !(Parameter == "Salinity" & Result > 30)
  )

Next, we’ll look for outliers by using a Z-score test flagging data points that are more than 15 SDs away from the mean of each subregion.

df_wq_meas_flag <- df_wq_meas_c4 %>%
  group_by(Parameter, SubRegion) %>% 
  flag_zscore(threshold = 15) %>% 
  ungroup()

# View flagged data points
df_wq_meas_flag_view <- df_wq_meas_flag %>% select(!any_of(vars_rm_view))
df_wq_meas_flag_view %>% filter(Zscore_flag)
## # A tibble: 11 × 9
##    Parameter Station  SubRegion         YearAdj Season Date        Result Zscore
##    <chr>     <chr>    <chr>               <dbl> <chr>  <date>       <dbl>  <dbl>
##  1 Salinity  EMP D12  Lower San Joaqui…    1999 Summer 1999-07-12  25.5     21.3
##  2 Salinity  EMP D22  Sacramento River…    2001 Winter 2000-12-06  22.4     35.8
##  3 Salinity  FMWT 710 Sacramento River…    2001 Winter 2000-12-07  11.0     17.4
##  4 Salinity  FMWT 711 Sacramento River…    2001 Winter 2000-12-07  10.7     16.9
##  5 Salinity  FMWT 735 Middle Sacrament…    2006 Fall   2006-11-16   0.609   18.2
##  6 Salinity  FMWT 908 San Joaquin Rive…    1995 Spring 1995-03-14   1.78    19.0
##  7 Salinity  FMWT 919 Lower Mokelumne …    2001 Fall   2001-11-15   1.06    16.8
##  8 Salinity  FMWT 920 Lower Mokelumne …    2001 Fall   2001-11-15   0.973   15.3
##  9 Salinity  FMWT 923 Lower Mokelumne …    2001 Fall   2001-11-15   1.17    18.7
## 10 Salinity  STN 918  Victoria Canal       2008 Summer 2008-07-28   2.51    21.1
## 11 Secchi    FMWT 518 Honker Bay           2020 Winter 2019-12-02 360       16.4
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Lower San Joaquin River in Summer 1999
df_wq_meas_flag_view %>% 
  filter(
    Parameter == "Salinity",
    SubRegion == "Lower San Joaquin River",
    YearAdj == 1999,
    Season == "Summer"
  ) %>% 
  arrange(Date)
## # A tibble: 20 × 9
##    Parameter Station      SubRegion     YearAdj Season Date        Result Zscore
##    <chr>     <chr>        <chr>           <dbl> <chr>  <date>       <dbl>  <dbl>
##  1 Salinity  20mm 804     Lower San Jo…    1999 Summer 1999-06-02  0.145   0.668
##  2 Salinity  Baystudy 837 Lower San Jo…    1999 Summer 1999-06-07  0.135   0.677
##  3 Salinity  Baystudy 853 Lower San Jo…    1999 Summer 1999-06-07  0.135   0.677
##  4 Salinity  20mm 804     Lower San Jo…    1999 Summer 1999-06-09  0.0951  0.711
##  5 Salinity  EMP D12      Lower San Jo…    1999 Summer 1999-06-10  0.0821  0.722
##  6 Salinity  20mm 804     Lower San Jo…    1999 Summer 1999-06-14  0.0865  0.719
##  7 Salinity  20mm 804     Lower San Jo…    1999 Summer 1999-06-26  0.102   0.705
##  8 Salinity  Baystudy 837 Lower San Jo…    1999 Summer 1999-07-06  0.190   0.629
##  9 Salinity  Baystudy 853 Lower San Jo…    1999 Summer 1999-07-06  0.180   0.637
## 10 Salinity  20mm 804     Lower San Jo…    1999 Summer 1999-07-08  0.227   0.597
## 11 Salinity  STN 804      Lower San Jo…    1999 Summer 1999-07-08  0.153   0.661
## 12 Salinity  EMP D12      Lower San Jo…    1999 Summer 1999-07-12 25.5    21.3  
## 13 Salinity  STN 804      Lower San Jo…    1999 Summer 1999-07-20  0.348   0.492
## 14 Salinity  20mm 804     Lower San Jo…    1999 Summer 1999-07-24  0.253   0.575
## 15 Salinity  STN 804      Lower San Jo…    1999 Summer 1999-08-03  0.230   0.594
## 16 Salinity  EMP D12      Lower San Jo…    1999 Summer 1999-08-09  0.269   0.561
## 17 Salinity  Baystudy 837 Lower San Jo…    1999 Summer 1999-08-12  0.171   0.645
## 18 Salinity  Baystudy 853 Lower San Jo…    1999 Summer 1999-08-12  0.162   0.653
## 19 Salinity  Baystudy 837 Lower San Jo…    1999 Summer 1999-08-30  0.781   0.117
## 20 Salinity  Baystudy 853 Lower San Jo…    1999 Summer 1999-08-30  0.358   0.483
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Sacramento River near Rio Vista from Nov 2000 - Feb 2001
df_wq_meas_flag_view %>% 
  filter(
    Parameter == "Salinity",
    SubRegion == "Sacramento River near Rio Vista",
    Date >= "2000-11-01" & Date <= "2001-02-28"
  ) %>% 
  arrange(Date) %>% 
  print(n = 50)
## # A tibble: 48 × 9
##    Parameter Station       SubRegion   YearAdj Season Date        Result  Zscore
##    <chr>     <chr>         <chr>         <dbl> <chr>  <date>       <dbl>   <dbl>
##  1 Salinity  EMP D22       Sacramento…    2000 Fall   2000-11-06  1.11   1.35e+0
##  2 Salinity  USGS_SFBS 657 Sacramento…    2000 Fall   2000-11-07  0.09   3.06e-1
##  3 Salinity  Baystudy 751  Sacramento…    2000 Fall   2000-11-13  0.443  2.66e-1
##  4 Salinity  Baystudy 752  Sacramento…    2000 Fall   2000-11-13  0.501  3.58e-1
##  5 Salinity  Baystudy 760  Sacramento…    2000 Fall   2000-11-13  0.153  2.04e-1
##  6 Salinity  Baystudy 761  Sacramento…    2000 Fall   2000-11-13  0.144  2.19e-1
##  7 Salinity  FMWT 706      Sacramento…    2000 Fall   2000-11-14  0.802  8.45e-1
##  8 Salinity  FMWT 707      Sacramento…    2000 Fall   2000-11-14  0.589  5.02e-1
##  9 Salinity  FMWT 708      Sacramento…    2000 Fall   2000-11-14  0.259  3.31e-2
## 10 Salinity  FMWT 709      Sacramento…    2000 Fall   2000-11-14  0.104  2.84e-1
## 11 Salinity  FMWT 710      Sacramento…    2000 Fall   2000-11-14  0.0860 3.13e-1
## 12 Salinity  FMWT 711      Sacramento…    2000 Fall   2000-11-14  0.0749 3.30e-1
## 13 Salinity  EMP D22       Sacramento…    2001 Winter 2000-12-06 22.4    3.58e+1
## 14 Salinity  Baystudy 751  Sacramento…    2001 Winter 2000-12-07  1.19   1.48e+0
## 15 Salinity  Baystudy 752  Sacramento…    2001 Winter 2000-12-07  0.945  1.08e+0
## 16 Salinity  Baystudy 760  Sacramento…    2001 Winter 2000-12-07  0.245  5.47e-2
## 17 Salinity  Baystudy 761  Sacramento…    2001 Winter 2000-12-07  0.158  1.96e-1
## 18 Salinity  FMWT 706      Sacramento…    2001 Winter 2000-12-07  0.505  3.66e-1
## 19 Salinity  FMWT 707      Sacramento…    2001 Winter 2000-12-07  0.460  2.93e-1
## 20 Salinity  FMWT 708      Sacramento…    2001 Winter 2000-12-07  0.265  2.26e-2
## 21 Salinity  FMWT 709      Sacramento…    2001 Winter 2000-12-07  0.135  2.33e-1
## 22 Salinity  FMWT 710      Sacramento…    2001 Winter 2000-12-07 11.0    1.74e+1
## 23 Salinity  FMWT 711      Sacramento…    2001 Winter 2000-12-07 10.7    1.69e+1
## 24 Salinity  USGS_SFBS 657 Sacramento…    2001 Winter 2000-12-12  0.34   9.84e-2
## 25 Salinity  Baystudy 760  Sacramento…    2001 Winter 2001-01-04  0.236  6.91e-2
## 26 Salinity  Baystudy 761  Sacramento…    2001 Winter 2001-01-04  0.153  2.04e-1
## 27 Salinity  EMP D22       Sacramento…    2001 Winter 2001-01-05  2.83   4.13e+0
## 28 Salinity  FMWT 706      Sacramento…    2001 Winter 2001-01-05  1.54   2.04e+0
## 29 Salinity  FMWT 707      Sacramento…    2001 Winter 2001-01-05  0.928  1.05e+0
## 30 Salinity  FMWT 708      Sacramento…    2001 Winter 2001-01-05  1.03   1.21e+0
## 31 Salinity  FMWT 709      Sacramento…    2001 Winter 2001-01-05  0.454  2.82e-1
## 32 Salinity  FMWT 710      Sacramento…    2001 Winter 2001-01-05  0.205  1.19e-1
## 33 Salinity  FMWT 711      Sacramento…    2001 Winter 2001-01-05  0.0941 2.99e-1
## 34 Salinity  Baystudy 751  Sacramento…    2001 Winter 2001-01-08  0.771  7.95e-1
## 35 Salinity  Baystudy 752  Sacramento…    2001 Winter 2001-01-08  1.20   1.49e+0
## 36 Salinity  Baystudy 760  Sacramento…    2001 Winter 2001-01-31  0.135  2.34e-1
## 37 Salinity  Baystudy 761  Sacramento…    2001 Winter 2001-01-31  0.125  2.49e-1
## 38 Salinity  Baystudy 751  Sacramento…    2001 Winter 2001-02-01  0.144  2.19e-1
## 39 Salinity  Baystudy 752  Sacramento…    2001 Winter 2001-02-01  0.153  2.04e-1
## 40 Salinity  EMP D22       Sacramento…    2001 Winter 2001-02-05  0.281  3.12e-3
## 41 Salinity  USGS_SFBS 657 Sacramento…    2001 Winter 2001-02-06  0.13   2.41e-1
## 42 Salinity  FMWT 706      Sacramento…    2001 Winter 2001-02-20  0.184  1.53e-1
## 43 Salinity  FMWT 707      Sacramento…    2001 Winter 2001-02-20  0.121  2.56e-1
## 44 Salinity  FMWT 708      Sacramento…    2001 Winter 2001-02-20  0.114  2.67e-1
## 45 Salinity  FMWT 709      Sacramento…    2001 Winter 2001-02-20  0.112  2.70e-1
## 46 Salinity  FMWT 710      Sacramento…    2001 Winter 2001-02-20  0.116  2.63e-1
## 47 Salinity  FMWT 711      Sacramento…    2001 Winter 2001-02-20  0.114  2.67e-1
## 48 Salinity  USGS_SFBS 657 Sacramento…    2001 Winter 2001-02-26  0.11   2.74e-1
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Middle Sacramento River from Aug-Dec 2006
df_wq_meas_flag_view %>% 
  filter(
    Parameter == "Salinity",
    SubRegion == "Middle Sacramento River",
    Date >= "2006-08-01" & Date <= "2006-12-31"
  ) %>% 
  arrange(Date)
## # A tibble: 19 × 9
##    Parameter Station          SubRegion YearAdj Season Date       Result  Zscore
##    <chr>     <chr>            <chr>       <dbl> <chr>  <date>      <dbl>   <dbl>
##  1 Salinity  USGS_CAWSC USGS… Middle S…    2006 Summer 2006-08-01 0.0663  0.313 
##  2 Salinity  EMP C3A          Middle S…    2006 Summer 2006-08-14 0.0677  0.267 
##  3 Salinity  EMP C3A          Middle S…    2006 Fall   2006-09-13 0.0807  0.178 
##  4 Salinity  USGS_CAWSC USGS… Middle S…    2006 Fall   2006-09-19 0.0740  0.0511
##  5 Salinity  EMP C3A          Middle S…    2006 Fall   2006-10-06 0.0639  0.395 
##  6 Salinity  USGS_CAWSC USGS… Middle S…    2006 Fall   2006-10-12 0.0577  0.607 
##  7 Salinity  FMWT 073         Middle S…    2006 Fall   2006-10-19 0.0682  0.247 
##  8 Salinity  FMWT 735         Middle S…    2006 Fall   2006-10-19 0.0716  0.133 
##  9 Salinity  EMP C3A          Middle S…    2006 Fall   2006-11-07 0.0778  0.0799
## 10 Salinity  FMWT 073         Middle S…    2006 Fall   2006-11-16 0.0869  0.391 
## 11 Salinity  FMWT 735         Middle S…    2006 Fall   2006-11-16 0.609  18.2   
## 12 Salinity  FMWT 736         Middle S…    2006 Fall   2006-11-16 0.0821  0.227 
## 13 Salinity  USGS_CAWSC USGS… Middle S…    2006 Fall   2006-11-20 0.0802  0.162 
## 14 Salinity  EMP C3A          Middle S…    2007 Winter 2006-12-06 0.0817  0.212 
## 15 Salinity  FMWT 073         Middle S…    2007 Winter 2006-12-15 0.0826  0.244 
## 16 Salinity  FMWT 735         Middle S…    2007 Winter 2006-12-15 0.0884  0.440 
## 17 Salinity  FMWT 736         Middle S…    2007 Winter 2006-12-15 0.0869  0.391 
## 18 Salinity  USGS_CAWSC USGS… Middle S…    2007 Winter 2006-12-18 0.0697  0.198 
## 19 Salinity  USGS_CAWSC USGS… Middle S…    2007 Winter 2006-12-27 0.0817  0.211 
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in San Joaquin River at Prisoners Pt from Feb-May 1995
df_wq_meas_flag_view %>% 
  filter(
    Parameter == "Salinity",
    SubRegion == "San Joaquin River at Prisoners Pt",
    Date >= "1995-02-01" & Date <= "1995-05-31"
  ) %>% 
  arrange(Date) %>% 
  print(n = 40)
## # A tibble: 36 × 9
##    Parameter Station      SubRegion     YearAdj Season Date       Result  Zscore
##    <chr>     <chr>        <chr>           <dbl> <chr>  <date>      <dbl>   <dbl>
##  1 Salinity  EMP D26      San Joaquin …    1995 Winter 1995-02-07 0.0726  0.858 
##  2 Salinity  Baystudy 864 San Joaquin …    1995 Winter 1995-02-08 0.135   0.137 
##  3 Salinity  Baystudy 865 San Joaquin …    1995 Winter 1995-02-08 0.135   0.137 
##  4 Salinity  FMWT 814     San Joaquin …    1995 Winter 1995-02-08 0.558   4.78  
##  5 Salinity  FMWT 815     San Joaquin …    1995 Winter 1995-02-08 0.568   4.90  
##  6 Salinity  FMWT 904     San Joaquin …    1995 Winter 1995-02-08 0.554   4.73  
##  7 Salinity  FMWT 905     San Joaquin …    1995 Winter 1995-02-08 0.621   5.52  
##  8 Salinity  FMWT 906     San Joaquin …    1995 Winter 1995-02-16 1.14   11.6   
##  9 Salinity  FMWT 908     San Joaquin …    1995 Winter 1995-02-16 0.973   9.61  
## 10 Salinity  Baystudy 864 San Joaquin …    1995 Spring 1995-03-06 0.121   0.294 
## 11 Salinity  Baystudy 865 San Joaquin …    1995 Spring 1995-03-06 0.153   0.0773
## 12 Salinity  FMWT 814     San Joaquin …    1995 Spring 1995-03-08 0.689   6.30  
## 13 Salinity  FMWT 815     San Joaquin …    1995 Spring 1995-03-08 0.671   6.09  
## 14 Salinity  FMWT 904     San Joaquin …    1995 Spring 1995-03-08 0.680   6.19  
## 15 Salinity  FMWT 905     San Joaquin …    1995 Spring 1995-03-08 0.762   7.15  
## 16 Salinity  FMWT 906     San Joaquin …    1995 Spring 1995-03-14 1.32   13.7   
## 17 Salinity  FMWT 908     San Joaquin …    1995 Spring 1995-03-14 1.78   19.0   
## 18 Salinity  EMP D26      San Joaquin …    1995 Spring 1995-03-23 0.137   0.114 
## 19 Salinity  Baystudy 864 San Joaquin …    1995 Spring 1995-04-03 0.162   0.185 
## 20 Salinity  Baystudy 865 San Joaquin …    1995 Spring 1995-04-03 0.162   0.185 
## 21 Salinity  FMWT 814     San Joaquin …    1995 Spring 1995-04-05 0.225   0.914 
## 22 Salinity  FMWT 815     San Joaquin …    1995 Spring 1995-04-05 0.227   0.931 
## 23 Salinity  FMWT 904     San Joaquin …    1995 Spring 1995-04-05 0.245   1.15  
## 24 Salinity  FMWT 905     San Joaquin …    1995 Spring 1995-04-05 0.277   1.52  
## 25 Salinity  FMWT 906     San Joaquin …    1995 Spring 1995-04-05 0.253   1.24  
## 26 Salinity  FMWT 908     San Joaquin …    1995 Spring 1995-04-05 0.278   1.53  
## 27 Salinity  EMP D26      San Joaquin …    1995 Spring 1995-04-20 0.0754  0.825 
## 28 Salinity  20mm 815     San Joaquin …    1995 Spring 1995-04-25 0.0582  1.02  
## 29 Salinity  20mm 906     San Joaquin …    1995 Spring 1995-04-25 0.0965  0.579 
## 30 Salinity  Baystudy 864 San Joaquin …    1995 Spring 1995-05-01 0.135   0.137 
## 31 Salinity  Baystudy 865 San Joaquin …    1995 Spring 1995-05-01 0.135   0.137 
## 32 Salinity  EMP D26      San Joaquin …    1995 Spring 1995-05-08 0.0793  0.780 
## 33 Salinity  20mm 815     San Joaquin …    1995 Spring 1995-05-09 0.0606  0.997 
## 34 Salinity  20mm 906     San Joaquin …    1995 Spring 1995-05-09 0.0606  0.997 
## 35 Salinity  20mm 815     San Joaquin …    1995 Spring 1995-05-23 0.0687  0.902 
## 36 Salinity  20mm 906     San Joaquin …    1995 Spring 1995-05-23 0.0702  0.886 
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Lower Mokelumne River from Aug-Dec 2001
df_wq_meas_flag_view %>% 
  filter(
    Parameter == "Salinity",
    SubRegion == "Lower Mokelumne River",
    Date >= "2001-08-01" & Date <= "2001-12-31"
  ) %>% 
  arrange(Date)
## # A tibble: 11 × 9
##    Parameter Station  SubRegion          YearAdj Season Date       Result Zscore
##    <chr>     <chr>    <chr>                <dbl> <chr>  <date>      <dbl>  <dbl>
##  1 Salinity  FMWT 903 Lower Mokelumne R…    2001 Fall   2001-09-11 0.109   0.375
##  2 Salinity  FMWT 919 Lower Mokelumne R…    2001 Fall   2001-09-11 0.115   0.492
##  3 Salinity  FMWT 920 Lower Mokelumne R…    2001 Fall   2001-09-11 0.110   0.392
##  4 Salinity  FMWT 903 Lower Mokelumne R…    2001 Fall   2001-11-14 0.107   0.342
##  5 Salinity  FMWT 919 Lower Mokelumne R…    2001 Fall   2001-11-15 1.06   16.8  
##  6 Salinity  FMWT 920 Lower Mokelumne R…    2001 Fall   2001-11-15 0.973  15.3  
##  7 Salinity  FMWT 923 Lower Mokelumne R…    2001 Fall   2001-11-15 1.17   18.7  
##  8 Salinity  FMWT 903 Lower Mokelumne R…    2002 Winter 2001-12-12 0.0965  0.167
##  9 Salinity  FMWT 919 Lower Mokelumne R…    2002 Winter 2001-12-12 0.105   0.308
## 10 Salinity  FMWT 920 Lower Mokelumne R…    2002 Winter 2001-12-12 0.146   1.02 
## 11 Salinity  FMWT 923 Lower Mokelumne R…    2002 Winter 2001-12-12 0.0970  0.175
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Victoria Canal in Summer 2008
df_wq_meas_flag_view %>% 
  filter(
    Parameter == "Salinity",
    SubRegion == "Victoria Canal",
    YearAdj == 2008,
    Season == "Summer"
  ) %>% 
  arrange(Date)
## # A tibble: 16 × 9
##    Parameter Station          SubRegion YearAdj Season Date       Result  Zscore
##    <chr>     <chr>            <chr>       <dbl> <chr>  <date>      <dbl>   <dbl>
##  1 Salinity  STN 918          Victoria…    2008 Summer 2008-06-02  0.221  0.295 
##  2 Salinity  NCRO Victoria C… Victoria…    2008 Summer 2008-06-06  0.202  0.125 
##  3 Salinity  20mm 918         Victoria…    2008 Summer 2008-06-09  0.250  0.560 
##  4 Salinity  STN 918          Victoria…    2008 Summer 2008-06-16  0.268  0.723 
##  5 Salinity  NCRO Middle Riv… Victoria…    2008 Summer 2008-06-20  0.178  0.0920
##  6 Salinity  20mm 918         Victoria…    2008 Summer 2008-06-23  0.280  0.836 
##  7 Salinity  NCRO Victoria C… Victoria…    2008 Summer 2008-06-27  0.169  0.174 
##  8 Salinity  STN 918          Victoria…    2008 Summer 2008-06-30  0.309  1.10  
##  9 Salinity  NCRO Middle Riv… Victoria…    2008 Summer 2008-07-03  0.150  0.352 
## 10 Salinity  20mm 918         Victoria…    2008 Summer 2008-07-07  0.163  0.232 
## 11 Salinity  NCRO Victoria C… Victoria…    2008 Summer 2008-07-18  0.123  0.594 
## 12 Salinity  NCRO Middle Riv… Victoria…    2008 Summer 2008-07-23  0.124  0.582 
## 13 Salinity  STN 918          Victoria…    2008 Summer 2008-07-28  2.51  21.1   
## 14 Salinity  NCRO Victoria C… Victoria…    2008 Summer 2008-08-08  0.146  0.382 
## 15 Salinity  STN 918          Victoria…    2008 Summer 2008-08-11  0.244  0.506 
## 16 Salinity  NCRO Middle Riv… Victoria…    2008 Summer 2008-08-15  0.146  0.384 
## # ℹ 1 more variable: Zscore_flag <lgl>
# Secchi depth in Honker Bay from Oct 2019 - Feb 2020
df_wq_meas_flag_view %>% 
  filter(
    Parameter == "Secchi",
    SubRegion == "Honker Bay",
    Date >= "2019-10-01" & Date <= "2020-02-28"
  ) %>% 
  arrange(Date)
## # A tibble: 17 × 9
##    Parameter Station      SubRegion  YearAdj Season Date       Result  Zscore
##    <chr>     <chr>        <chr>        <dbl> <chr>  <date>      <dbl>   <dbl>
##  1 Secchi    FMWT 505     Honker Bay    2019 Fall   2019-10-07     70  1.49  
##  2 Secchi    Baystudy 534 Honker Bay    2019 Fall   2019-10-08     40  0.0530
##  3 Secchi    Baystudy 534 Honker Bay    2019 Fall   2019-11-05     50  0.461 
##  4 Secchi    FMWT 507     Honker Bay    2019 Fall   2019-11-08     23  0.927 
##  5 Secchi    FMWT 518     Honker Bay    2019 Fall   2019-11-08     30  0.567 
##  6 Secchi    FMWT 519     Honker Bay    2019 Fall   2019-11-08     34  0.361 
##  7 Secchi    FMWT 505     Honker Bay    2019 Fall   2019-11-21     67  1.33  
##  8 Secchi    FMWT 507     Honker Bay    2020 Winter 2019-12-02     57  0.821 
##  9 Secchi    FMWT 518     Honker Bay    2020 Winter 2019-12-02    360 16.4   
## 10 Secchi    FMWT 519     Honker Bay    2020 Winter 2019-12-02     46  0.255 
## 11 Secchi    Baystudy 534 Honker Bay    2020 Winter 2019-12-03     50  0.461 
## 12 Secchi    FMWT 505     Honker Bay    2020 Winter 2019-12-10     40  0.0530
## 13 Secchi    SKT 519      Honker Bay    2020 Winter 2019-12-17     42  0.0498
## 14 Secchi    Baystudy 534 Honker Bay    2020 Winter 2020-01-13     30  0.567 
## 15 Secchi    SKT 519      Honker Bay    2020 Winter 2020-01-14     44  0.153 
## 16 Secchi    SKT 519      Honker Bay    2020 Winter 2020-02-14     34  0.361 
## 17 Secchi    Baystudy 534 Honker Bay    2020 Winter 2020-02-24     30  0.567 
## # ℹ 1 more variable: Zscore_flag <lgl>

After inspecting the data flagged by the Z-score test, a few of the values appear to be valid based on best professional judgment:

  • Salinity value collected at FMWT 908 on 3/14/1995
  • Salinity values collected at FMWT 919, FMWT 920, and FMWT 923 on 11/15/2001

We will exclude the remaining values flagged by the Z-score test.

# Un-flag the values listed above and remove the remaining flagged data points
  # from the data set
df_wq_meas_c5 <- df_wq_meas_flag %>%
  mutate(
    Zscore_flag = case_when(
      Parameter == "Salinity" & Station == "FMWT 908" & Date == "1995-03-14" ~ FALSE,
      Parameter == "Salinity" & Station %in% c("FMWT 919", "FMWT 920", "FMWT 923") & Date == "2001-11-15" ~ FALSE,
      TRUE ~ Zscore_flag
    )
  ) %>% 
  filter(!Zscore_flag) %>% 
  select(!starts_with("Zscore"))

Calculate Averages

Finally, we’ll calculate seasonal-regional averages for each adjusted water year for each water quality measurement parameter.

# Add regions and keep raw WQ measurement data as an object to export
raw_wq_meas <- df_wq_meas_c5 %>% 
  left_join(df_regions, by = join_by(SubRegion)) %>% 
  relocate(Region, .before = SubRegion) %>% 
  relocate(Parameter, .before = Result)

# Calculate seasonal-regional averages
df_wq_meas_avg <- raw_wq_meas %>% 
  nest(.by = Parameter, .key = "df_data") %>% 
  mutate(df_data = map(df_data, calc_seas_reg_avg)) %>% 
  unnest(df_data) %>% 
  pivot_wider(names_from = Parameter, values_from = Result)

# Make sure each Year-Season-Region combination is represented and add Year Type info
lt_avg_wq_meas <- reduce(list(df_yr_seas_reg, df_yr_type, df_wq_meas_avg), left_join)

Nutrients

Now we’ll process the nutrient data: Dissolved Ammonia, Dissolved Nitrate + Nitrite, and Dissolved Ortho-phosphate. First, we’ll clean up a few issues with the Reporting Limits.

# The EMP data set has a few non-detect values without reporting limits - we'll
  # fill in 0.01 for the reporting limits for these values for now as suggested by
  # Sarah Perry.
df_nutr_c1 <- df_dwq_lt_filt %>% 
  select(-c(Temperature, Salinity, Secchi, starts_with("Chlorophyll"))) %>% 
  mutate(
    DissAmmonia = if_else(DissAmmonia_Sign == "<" & is.na(DissAmmonia), 0.01, DissAmmonia),
    DissNitrateNitrite = if_else(DissNitrateNitrite_Sign == "<" & is.na(DissNitrateNitrite), 0.01, DissNitrateNitrite),
    DissOrthophos = if_else(DissOrthophos_Sign == "<" & is.na(DissOrthophos), 0.01, DissOrthophos)
  ) %>% 
  # Remove records with NA values for all nutrient parameters
  filter(!if_all(c(DissAmmonia, DissNitrateNitrite, DissOrthophos), is.na)) %>%
  # Fill in "=" for the _Sign variables for the USGS_SFBS data for now since
  # they are all NA.
  mutate(across(ends_with("_Sign"), ~ if_else(is.na(.x), "=", .x)))

# For the USGS_SFBS survey, if at least one of the nutrient parameters has a
  # value reported, then we will assume that the other parameters were below the
  # reporting limit for that station and day. We'll use RL values provided by USGS
  # for 2006-present. We assumed these were constant throughout the entire
  # monitoring program including in years earlier than 2006. RL values are 0.05
  # umol/L for dissolved ammonia, dissolved nitrate + nitrite, and dissolved
  # ortho-phosphate. Converting these to mg/L while using the molar mass of either
  # N or P, we used the following RL values for the USGS_SFBS survey: 
    # dissolved ammonia and dissolved nitrate + nitrite: 0.05 * 14.0067 / 1000 = 0.0007 mg/L
    # dissolved ortho-phosphate: 0.05 * 30.973761 / 1000 = 0.0015 mg/L
df_nutr_sfbs_blw_rl <- df_nutr_c1 %>%
  filter(Source == "USGS_SFBS") %>%
  filter(if_any(c(DissAmmonia, DissNitrateNitrite, DissOrthophos), is.na)) %>%
  mutate(
    DissAmmonia_Sign = if_else(is.na(DissAmmonia), "<", DissAmmonia_Sign),
    DissAmmonia = if_else(DissAmmonia_Sign == "<", 0.0007, DissAmmonia),
    DissNitrateNitrite_Sign = if_else(is.na(DissNitrateNitrite), "<", DissNitrateNitrite_Sign),
    DissNitrateNitrite = if_else(DissNitrateNitrite_Sign == "<", 0.0007, DissNitrateNitrite),
    DissOrthophos_Sign = if_else(is.na(DissOrthophos), "<", DissOrthophos_Sign),
    DissOrthophos = if_else(DissOrthophos_Sign == "<", 0.0015, DissOrthophos)
  )

# Add back the USGS_SFBS data
df_nutr_c2 <- df_nutr_c1 %>%
  anti_join(df_nutr_sfbs_blw_rl, by = c("Source", "Station", "Datetime")) %>%
  bind_rows(df_nutr_sfbs_blw_rl)

Next, we’ll filter the data for each nutrient parameter so there is only one sample collected per day at a station, and we’ll restructure the data for continued processing.

# Create a nested data frame to run processing functions on
ndf_nutr <- 
  tibble(
    Parameter = c(
      "DissAmmonia",
      "DissNitrateNitrite",
      "DissOrthophos"
    ),
    df_data = rep(list(df_nutr_c2), 3)
  ) %>% 
  # Prepare data for each Parameter
  mutate(
    df_data = map2(
      df_data,
      Parameter,
      ~ drop_na(.x, all_of(.y)) %>% 
        select(
          Source, 
          Station,
          Latitude,
          Longitude,
          SubRegion,
          YearAdj,
          Month,
          Season,
          Date,
          Datetime,
          contains(.y)
        ) %>% 
        # Filter data so that there is only one sample per station-day
        filt_daily_dups()
    )
  )

# Make sure there is only one sample per station-day for each parameter
map(ndf_nutr$df_data, ~ count(.x, Source, Station, Date) %>% filter(n > 1))
## [[1]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
## 
## [[2]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
## 
## [[3]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
# Unnest the nested data frame into a long format
df_nutr_c3 <- ndf_nutr %>% 
  mutate(
    df_data = map2(
      df_data, 
      Parameter, 
      ~ rename(
        .x, 
        Result = all_of(.y),
        Sign = ends_with("_Sign")
      )
    )
  ) %>% 
  unnest(df_data)

Temporal Coverage

Now let’s take a closer look at the temporal data coverage for each Station and parameter.

Sampling Effort by Station

# Create sampling effort by station plots for each Parameter and Source
ndf_nutr_se_sta_plt <- df_nutr_c3 %>% 
  nest(.by = c(Parameter, Source), .key = "df_data") %>% 
  mutate(plt = map(df_data, plot_samp_effort_sta)) %>% 
  nest(.by = Parameter, .key = "ndf_data_source")

DissAmmonia

EMP

NCRO

USGS_CAWSC

USGS_SFBS

DissNitrateNitrite

EMP

NCRO

USGS_CAWSC

USGS_SFBS

DissOrthophos

EMP

NCRO

USGS_CAWSC

USGS_SFBS

Remove Sparse Surveys

For the USGS-CAWSC survey, only station 11447650 (Sacramento River at Freeport) was sampled on a long-term basis for nutrients, so we’ll only include this station from the USGS-CAWSC survey.

df_nutr_c4 <- df_nutr_c3 %>% filter(!(Source == "USGS_CAWSC" & !str_detect(Station, "USGS-11447650$")))

Filter Subregions

Not all of the subregions were sampled consistently from 1975-2021. To make sure that we only include the subregions that were sampled adequately, we will require that a subregion needs to have data for at least 75% of the 47 years between 1975 to 2021 (35 years) for each season.

df_nutr_c5 <- df_nutr_c4 %>%
  nest(.by = Parameter, .key = "df_data") %>% 
  mutate(
    df_subreg_seas = map(
      df_data,
      ~ distinct(.x, SubRegion, YearAdj, Season) %>%
        count(SubRegion, Season, name = "NumYears") %>%
        group_by(SubRegion) %>%
        filter(min(NumYears) >= num_yrs_threshold) %>%
        ungroup() %>% 
        # make sure each season meets the threshold for each SubRegion
        count(SubRegion) %>%
        filter(n == 4)
    ),
    df_data_filt = map2(
      df_data, df_subreg_seas,
      ~ filter(.x, SubRegion %in% unique(.y$SubRegion))
    )
  ) %>% 
  select(Parameter, df_data_filt) %>% 
  unnest(df_data_filt)

View Results

Let’s take a look at the sampling effort for the remaining subregions for each season after filtering for each nutrient parameter.

# Create sampling effort by SubRegion plots for each Parameter
ndf_nutr_se_subreg_plt <- df_nutr_c5 %>%
  nest(.by = Parameter, .key = "df_data") %>% 
  mutate(plt = map(df_data, plot_samp_effort_subreg))

DissAmmonia

DissNitrateNitrite

DissOrthophos

Remove Outliers

First, we’ll look at the min-max ranges of each of the nutrient parameters, to see if there are any obvious outliers to exclude from the data set.

df_nutr_c5 %>% 
  summarize(
    min_val = min(Result),
    max_val = max(Result),
    .by = Parameter
  )
## # A tibble: 3 × 3
##   Parameter          min_val max_val
##   <chr>                <dbl>   <dbl>
## 1 DissAmmonia         0         2.94
## 2 DissNitrateNitrite  0.0007   15.2 
## 3 DissOrthophos       0.0015    2

Let’s take a closer look at the minimum DissAmmonia values and the maximum values of all nutrient parameters these to see if we should omit them from the data set.

# Truncate data so that it displays better
df_nutr_c5_view <- df_nutr_c5 %>% select(!any_of(vars_rm_view))

# Minimum DissAmmonia values
df_nutr_c5_view %>% 
  filter(Parameter == "DissAmmonia") %>% 
  slice_min(Result, n = 10)
## # A tibble: 10 × 8
##    Parameter   Station         SubRegion YearAdj Season Date       Sign   Result
##    <chr>       <chr>           <chr>       <dbl> <chr>  <date>     <chr>   <dbl>
##  1 DissAmmonia USGS_CAWSC USG… Middle S…    1980 Winter 1980-01-17 =     0      
##  2 DissAmmonia USGS_SFBS 6     Mid Suis…    1988 Fall   1988-10-06 =     1.40e-4
##  3 DissAmmonia USGS_SFBS 6     Mid Suis…    1979 Summer 1979-07-10 =     5.60e-4
##  4 DissAmmonia USGS_SFBS 2     Confluen…    1977 Spring 1977-05-11 <     7   e-4
##  5 DissAmmonia USGS_SFBS 3     Confluen…    1980 Spring 1980-05-22 <     7   e-4
##  6 DissAmmonia USGS_SFBS 6     Mid Suis…    1980 Spring 1980-05-22 <     7   e-4
##  7 DissAmmonia USGS_SFBS 657   Sacramen…    2009 Winter 2009-01-13 <     7   e-4
##  8 DissAmmonia USGS_SFBS 7     West Sui…    1977 Summer 1977-07-11 <     7   e-4
##  9 DissAmmonia USGS_SFBS 6     Mid Suis…    1979 Fall   1979-09-18 =     1.54e-3
## 10 DissAmmonia USGS_SFBS 6     Mid Suis…    1980 Summer 1980-08-19 =     1.68e-3
# Maximum DissAmmonia values
df_nutr_c5_view %>% 
  filter(Parameter == "DissAmmonia") %>% 
  slice_max(Result, n = 10)
## # A tibble: 10 × 8
##    Parameter   Station SubRegion          YearAdj Season Date       Sign  Result
##    <chr>       <chr>   <chr>                <dbl> <chr>  <date>     <chr>  <dbl>
##  1 DissAmmonia EMP P8  San Joaquin River…    2004 Winter 2004-02-26 =       2.94
##  2 DissAmmonia EMP P8  San Joaquin River…    2000 Winter 1999-12-16 =       2.4 
##  3 DissAmmonia EMP P8  San Joaquin River…    2001 Winter 2001-01-03 =       2.4 
##  4 DissAmmonia EMP P8  San Joaquin River…    2006 Winter 2005-12-20 =       2.3 
##  5 DissAmmonia EMP P8  San Joaquin River…    1991 Spring 1991-03-06 =       2   
##  6 DissAmmonia EMP P8  San Joaquin River…    1994 Winter 1994-02-03 =       2   
##  7 DissAmmonia EMP P8  San Joaquin River…    2001 Winter 2001-02-01 =       2   
##  8 DissAmmonia EMP P8  San Joaquin River…    1991 Spring 1991-03-22 =       1.7 
##  9 DissAmmonia EMP P8  San Joaquin River…    2003 Winter 2002-12-10 =       1.7 
## 10 DissAmmonia EMP P8  San Joaquin River…    2004 Winter 2004-01-15 =       1.61
# Maximum DissNitrateNitrite values
df_nutr_c5_view %>% 
  filter(Parameter == "DissNitrateNitrite") %>% 
  slice_max(Result, n = 10)
## # A tibble: 10 × 8
##    Parameter          Station  SubRegion  YearAdj Season Date       Sign  Result
##    <chr>              <chr>    <chr>        <dbl> <chr>  <date>     <chr>  <dbl>
##  1 DissNitrateNitrite EMP D28A Old River     2017 Summer 2017-07-14 =      15.2 
##  2 DissNitrateNitrite EMP D4   Lower Sac…    2019 Winter 2019-01-10 =       6.07
##  3 DissNitrateNitrite EMP P8   San Joaqu…    2015 Winter 2015-02-12 =       4.57
##  4 DissNitrateNitrite EMP P8   San Joaqu…    2014 Winter 2014-02-06 =       3.94
##  5 DissNitrateNitrite EMP P8   San Joaqu…    2009 Winter 2009-02-18 =       3.7 
##  6 DissNitrateNitrite EMP D4   Lower Sac…    2017 Summer 2017-06-14 =       3.68
##  7 DissNitrateNitrite EMP P8   San Joaqu…    2014 Spring 2014-03-07 =       3.68
##  8 DissNitrateNitrite EMP P8   San Joaqu…    2009 Winter 2008-12-04 =       3.6 
##  9 DissNitrateNitrite EMP P8   San Joaqu…    2012 Spring 2012-03-15 =       3.6 
## 10 DissNitrateNitrite EMP P8   San Joaqu…    2019 Winter 2019-01-15 =       3.6
# Maximum DissOrthophos values
df_nutr_c5_view %>% 
  filter(Parameter == "DissOrthophos") %>% 
  slice_max(Result, n = 10)
## # A tibble: 10 × 8
##    Parameter     Station  SubRegion       YearAdj Season Date       Sign  Result
##    <chr>         <chr>    <chr>             <dbl> <chr>  <date>     <chr>  <dbl>
##  1 DissOrthophos EMP D7   Grizzly Bay        2019 Fall   2019-11-07 <      2    
##  2 DissOrthophos EMP P8   San Joaquin Ri…    1989 Spring 1989-03-28 =      0.56 
##  3 DissOrthophos EMP P8   San Joaquin Ri…    1986 Winter 1986-02-12 =      0.51 
##  4 DissOrthophos EMP P8   San Joaquin Ri…    2013 Fall   2013-09-24 =      0.5  
##  5 DissOrthophos EMP P8   San Joaquin Ri…    1991 Spring 1991-04-08 =      0.49 
##  6 DissOrthophos EMP P8   San Joaquin Ri…    2004 Winter 2004-02-26 =      0.48 
##  7 DissOrthophos EMP P8   San Joaquin Ri…    2015 Winter 2015-02-12 =      0.48 
##  8 DissOrthophos EMP P8   San Joaquin Ri…    2020 Fall   2020-09-09 =      0.474
##  9 DissOrthophos EMP MD10 Disappointment…    1979 Winter 1979-01-16 =      0.46 
## 10 DissOrthophos EMP P8   San Joaquin Ri…    2020 Fall   2020-10-08 =      0.456

A few of these values look questionable. For now, we will exclude the one DissAmmonia value equal to zero from the data set, and see if the other values are flagged by the modified Z-score test.

df_nutr_c6 <- df_nutr_c5 %>% filter(Result > 0)

There are a few values that are less than the reporting limit with reporting limits that are very high compared to the range of the values for the parameter (> 75th percentile). This includes the highest DissOrthophos value in the data set. We will flag and take a closer look at these values for possible removal from the data set.

df_nutr_high_rl_flag <- df_nutr_c6 %>% 
  nest(.by = Parameter, .key = "df_data") %>% 
  mutate(df_data = map(df_data, .f = flag_high_rl, perc_thresh = 0.75)) %>% 
  unnest(df_data)

# View flagged data points
df_nutr_high_rl_flag %>% 
  filter(HighRL_flag) %>% 
  select(!any_of(vars_rm_view)) %>% 
  print(n = 40)
## # A tibble: 34 × 9
##    Parameter          Station   SubRegion YearAdj Season Date       Sign  Result
##    <chr>              <chr>     <chr>       <dbl> <chr>  <date>     <chr>  <dbl>
##  1 DissAmmonia        EMP D10   Confluen…    2019 Fall   2019-10-07 <       0.2 
##  2 DissAmmonia        EMP D19   Franks T…    2019 Fall   2019-10-03 <       0.2 
##  3 DissAmmonia        EMP D19   Franks T…    2019 Fall   2019-11-04 <       0.2 
##  4 DissAmmonia        EMP D19   Franks T…    2020 Winter 2020-01-15 <       0.25
##  5 DissAmmonia        EMP D19   Franks T…    2020 Spring 2020-03-02 <       0.25
##  6 DissAmmonia        EMP D26   San Joaq…    2019 Fall   2019-10-04 <       0.2 
##  7 DissAmmonia        EMP D26   San Joaq…    2019 Fall   2019-11-05 <       0.2 
##  8 DissAmmonia        EMP D28A  Old River    2019 Fall   2019-10-03 <       0.2 
##  9 DissAmmonia        EMP D28A  Old River    2019 Fall   2019-11-04 <       0.2 
## 10 DissAmmonia        EMP D28A  Old River    2020 Winter 2020-01-15 <       0.25
## 11 DissAmmonia        EMP D28A  Old River    2020 Spring 2020-03-02 <       0.25
## 12 DissAmmonia        EMP D6    West Sui…    2019 Fall   2019-10-07 <       0.2 
## 13 DissAmmonia        EMP D7    Grizzly …    2019 Fall   2019-11-07 <       0.2 
## 14 DissAmmonia        EMP D8    Mid Suis…    2019 Fall   2019-10-07 <       0.2 
## 15 DissAmmonia        EMP D8    Mid Suis…    2019 Fall   2019-11-06 <       0.2 
## 16 DissAmmonia        EMP EZ2   Confluen…    2019 Fall   2019-10-07 <       0.2 
## 17 DissAmmonia        EMP EZ2   Lower Sa…    2019 Fall   2019-11-06 <       0.2 
## 18 DissAmmonia        EMP EZ6   Confluen…    2019 Fall   2019-10-07 <       0.2 
## 19 DissAmmonia        EMP MD10A Disappoi…    2019 Fall   2019-10-04 <       0.2 
## 20 DissAmmonia        EMP MD10A Disappoi…    2019 Fall   2019-11-05 <       0.2 
## 21 DissAmmonia        EMP MD10A Disappoi…    2020 Winter 2020-01-16 <       0.25
## 22 DissAmmonia        EMP MD10A Disappoi…    2020 Winter 2020-02-14 <       0.25
## 23 DissAmmonia        EMP MD10A Disappoi…    2020 Spring 2020-03-03 <       0.25
## 24 DissAmmonia        EMP P8    San Joaq…    2019 Fall   2019-10-04 <       0.2 
## 25 DissAmmonia        EMP P8    San Joaq…    2019 Fall   2019-11-05 <       0.2 
## 26 DissAmmonia        EMP P8    San Joaq…    2020 Winter 2020-02-14 <       0.25
## 27 DissNitrateNitrite EMP D10   Confluen…    2019 Fall   2019-10-07 <       0.55
## 28 DissNitrateNitrite EMP D6    West Sui…    2019 Fall   2019-10-07 <       2.8 
## 29 DissNitrateNitrite EMP D7    Grizzly …    2019 Fall   2019-10-08 <       2.8 
## 30 DissNitrateNitrite EMP D7    Grizzly …    2019 Fall   2019-11-07 <       2.8 
## 31 DissNitrateNitrite EMP D8    Mid Suis…    2019 Fall   2019-10-07 <       1.1 
## 32 DissNitrateNitrite EMP EZ6   Confluen…    2019 Fall   2019-10-07 <       0.55
## 33 DissOrthophos      EMP D4    Lower Sa…    2019 Fall   2019-11-06 <       0.4 
## 34 DissOrthophos      EMP D7    Grizzly …    2019 Fall   2019-11-07 <       2   
## # ℹ 1 more variable: HighRL_flag <lgl>
# View range of values for each parameter
df_nutr_c6 %>% 
  summarize(
    min_val = min(Result),
    first_quantile = quantile(Result, probs = 0.25),
    median = median(Result),
    third_quantile = quantile(Result, probs = 0.75),
    max_val = max(Result),
    .by = Parameter
  )
## # A tibble: 3 × 6
##   Parameter           min_val first_quantile median third_quantile max_val
##   <chr>                 <dbl>          <dbl>  <dbl>          <dbl>   <dbl>
## 1 DissAmmonia        0.000140           0.03 0.0633           0.12    2.94
## 2 DissNitrateNitrite 0.0007             0.19 0.31             0.46   15.2 
## 3 DissOrthophos      0.0015             0.05 0.07             0.09    2

Upon closer inspection, all values that are less than the reporting limit with reporting limits that are greater than the 75th percentile of the values for the parameter should be removed from the data set.

df_nutr_c7 <- df_nutr_high_rl_flag %>% 
  filter(!HighRL_flag) %>% 
  select(-HighRL_flag)

Next, we’ll look for outliers by using a modified Z-score test flagging data points with scores greater than 15 grouped by subregion.

df_nutr_modzscore_flag <- df_nutr_c7 %>%
  group_by(Parameter, SubRegion) %>% 
  flag_modzscore(threshold = 15) %>% 
  ungroup()

# View flagged data points
df_nutr_modzscore_flag_view <- df_nutr_modzscore_flag %>% select(!any_of(vars_rm_view))
df_nutr_modzscore_flag_view %>% filter(ModZscore_flag)
## # A tibble: 5 × 10
##   Parameter   Station SubRegion YearAdj Season Date       Sign  Result ModZscore
##   <chr>       <chr>   <chr>       <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
## 1 DissAmmonia EMP D2… Old River    1996 Winter 1996-01-25 =       1         22.1
## 2 DissAmmonia EMP P8  San Joaq…    2004 Winter 2004-02-26 =       2.94      18.4
## 3 DissNitrat… EMP D2… Old River    2017 Summer 2017-07-14 =      15.2       45.2
## 4 DissNitrat… EMP D4  Lower Sa…    2017 Summer 2017-06-14 =       3.68      15.3
## 5 DissNitrat… EMP D4  Lower Sa…    2019 Winter 2019-01-10 =       6.07      26.2
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissAmmonia in Old River from Nov 1995 - Mar 1996
df_nutr_modzscore_flag_view %>% 
  filter(
    Parameter == "DissAmmonia",
    SubRegion == "Old River",
    Date >= "1995-11-01" & Date <= "1996-03-31"
  ) %>% 
  arrange(Date)
## # A tibble: 5 × 10
##   Parameter   Station SubRegion YearAdj Season Date       Sign  Result ModZscore
##   <chr>       <chr>   <chr>       <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
## 1 DissAmmonia EMP D2… Old River    1995 Fall   1995-11-14 =       0.02     0.227
## 2 DissAmmonia EMP D2… Old River    1996 Winter 1995-12-14 =       0.07     0.910
## 3 DissAmmonia EMP D2… Old River    1996 Winter 1996-01-25 =       1       22.1  
## 4 DissAmmonia EMP D2… Old River    1996 Winter 1996-02-08 =       0.15     2.73 
## 5 DissAmmonia EMP D2… Old River    1996 Spring 1996-03-12 =       0.04     0.227
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissAmmonia in San Joaquin River near Stockton from Nov 2003 - Apr 2004
df_nutr_modzscore_flag_view %>% 
  filter(
    Parameter == "DissAmmonia",
    SubRegion == "San Joaquin River near Stockton",
    Date >= "2003-11-01" & Date <= "2004-04-30"
  ) %>% 
  arrange(Date)
## # A tibble: 6 × 10
##   Parameter   Station SubRegion YearAdj Season Date       Sign  Result ModZscore
##   <chr>       <chr>   <chr>       <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
## 1 DissAmmonia EMP P8  San Joaq…    2003 Fall   2003-11-17 =       0.2      0.585
## 2 DissAmmonia EMP P8  San Joaq…    2004 Winter 2003-12-16 =       1.22     7.21 
## 3 DissAmmonia EMP P8  San Joaq…    2004 Winter 2004-01-15 =       1.61     9.75 
## 4 DissAmmonia EMP P8  San Joaq…    2004 Winter 2004-02-26 =       2.94    18.4  
## 5 DissAmmonia EMP P8  San Joaq…    2004 Spring 2004-03-15 =       0.89     5.07 
## 6 DissAmmonia EMP P8  San Joaq…    2004 Spring 2004-04-12 =       0.09     0.130
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissNitrateNitrite in Old River from May 2017 - Sept 2017
df_nutr_modzscore_flag_view %>% 
  filter(
    Parameter == "DissNitrateNitrite",
    SubRegion == "Old River",
    Date >= "2017-05-01" & Date <= "2017-09-30"
  ) %>% 
  arrange(Date)
## # A tibble: 5 × 10
##   Parameter   Station SubRegion YearAdj Season Date       Sign  Result ModZscore
##   <chr>       <chr>   <chr>       <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
## 1 DissNitrat… EMP D2… Old River    2017 Spring 2017-05-17 =       0.28    0.0910
## 2 DissNitrat… EMP D2… Old River    2017 Summer 2017-06-15 =       0.11    0.607 
## 3 DissNitrat… EMP D2… Old River    2017 Summer 2017-07-14 =      15.2    45.2   
## 4 DissNitrat… EMP D2… Old River    2017 Summer 2017-08-14 =       0.12    0.576 
## 5 DissNitrat… EMP D2… Old River    2017 Fall   2017-09-12 =       0.14    0.516 
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissNitrateNitrite in Lower Sacramento River from Apr 2017 - Aug 2017
df_nutr_modzscore_flag_view %>% 
  filter(
    Parameter == "DissNitrateNitrite",
    SubRegion == "Lower Sacramento River",
    Date >= "2017-04-01" & Date <= "2017-08-31"
  ) %>% 
  arrange(Date)
## # A tibble: 10 × 10
##    Parameter  Station SubRegion YearAdj Season Date       Sign  Result ModZscore
##    <chr>      <chr>   <chr>       <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
##  1 DissNitra… EMP D4  Lower Sa…    2017 Spring 2017-04-05 =     0.23       0.409
##  2 DissNitra… USGS_S… Lower Sa…    2017 Spring 2017-04-18 =     0.0901     1.05 
##  3 DissNitra… EMP D4  Lower Sa…    2017 Spring 2017-05-19 =     0.12       0.910
##  4 DissNitra… EMP D4  Lower Sa…    2017 Summer 2017-06-14 =     3.68      15.3  
##  5 DissNitra… USGS_S… Lower Sa…    2017 Summer 2017-06-22 =     0.194      0.573
##  6 DissNitra… EMP D4  Lower Sa…    2017 Summer 2017-07-13 =     0.07       1.14 
##  7 DissNitra… USGS_S… Lower Sa…    2017 Summer 2017-07-25 =     0.223      0.441
##  8 DissNitra… EMP D4  Lower Sa…    2017 Summer 2017-08-10 =     0.26       0.273
##  9 DissNitra… EMP EZ2 Lower Sa…    2017 Summer 2017-08-16 =     0.26       0.273
## 10 DissNitra… USGS_S… Lower Sa…    2017 Summer 2017-08-22 =     0.187      0.605
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissNitrateNitrite in Lower Sacramento River from Nov 2018 - Mar 2019
df_nutr_modzscore_flag_view %>% 
  filter(
    Parameter == "DissNitrateNitrite",
    SubRegion == "Lower Sacramento River",
    Date >= "2018-11-01" & Date <= "2019-03-31"
  ) %>% 
  arrange(Date)
## # A tibble: 10 × 10
##    Parameter  Station SubRegion YearAdj Season Date       Sign  Result ModZscore
##    <chr>      <chr>   <chr>       <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
##  1 DissNitra… EMP D4  Lower Sa…    2018 Fall   2018-11-13 =      0.429     0.496
##  2 DissNitra… USGS_S… Lower Sa…    2019 Winter 2018-12-05 =      0.477     0.715
##  3 DissNitra… EMP D4  Lower Sa…    2019 Winter 2018-12-11 =      0.53      0.955
##  4 DissNitra… EMP EZ6 Lower Sa…    2019 Winter 2018-12-11 =      0.532     0.964
##  5 DissNitra… EMP D4  Lower Sa…    2019 Winter 2019-01-10 =      6.07     26.2  
##  6 DissNitra… EMP EZ2 Lower Sa…    2019 Winter 2019-01-10 =      0.53      0.955
##  7 DissNitra… EMP D4  Lower Sa…    2019 Winter 2019-02-12 =      0.44      0.546
##  8 DissNitra… USGS_S… Lower Sa…    2019 Winter 2019-02-20 =      0.277     0.195
##  9 DissNitra… EMP D4  Lower Sa…    2019 Spring 2019-03-12 =      0.27      0.227
## 10 DissNitra… USGS_S… Lower Sa…    2019 Spring 2019-03-20 =      0.188     0.601
## # ℹ 1 more variable: ModZscore_flag <lgl>

After inspecting the data flagged by the modified Z-score test, the DissAmmonia values appear to be valid based on best professional judgment, so we will only exclude the DissNitrateNitrite values flagged by the modified Z-score test.

# Un-flag the DissAmmonia values and remove the DissNitrateNitrite flagged data
  # points from the data set
df_nutr_c8 <- df_nutr_modzscore_flag %>%
  mutate(ModZscore_flag = if_else(Parameter == "DissAmmonia", FALSE, ModZscore_flag)) %>% 
  filter(!ModZscore_flag) %>% 
  select(!starts_with("ModZscore"))

Calculate Averages

Finally, we’ll calculate seasonal-regional averages for each adjusted water year for each nutrient parameter. Before calculating the averages, we will need to replace values measured below the analytical reporting limit with a random number of uniform distribution between zero and the reporting limit.

# Add regions and keep raw nutrient data as an object to export
raw_nutr <- df_nutr_c8 %>% 
  left_join(df_regions, by = join_by(SubRegion)) %>% 
  relocate(Region, .before = SubRegion) %>% 
  relocate(Parameter, .before = Sign)

# Calculate seasonal-regional averages, substituting random numbers from a
  # uniform distribution for the <RL values
df_nutr_avg <- raw_nutr %>% 
  nest(.by = Parameter, .key = "df_data") %>% 
  mutate(
    df_data = map(
      df_data, 
      ~ replace_blw_rl(.x) %>% 
        calc_seas_reg_avg()
    )
  ) %>% 
  unnest(df_data) %>% 
  pivot_wider(names_from = Parameter, values_from = Result)

# Make sure each Year-Season-Region combination is represented and add Year Type info
lt_avg_nutr <- 
  reduce(list(df_yr_seas_reg, df_yr_type, df_nutr_avg), left_join) %>% 
  # Remove Suisun Marsh Region since all values are NA
  filter(Region != "Suisun Marsh")

Chlorophyll

Lastly, we’ll process the discrete Chlorophyll data.

# Prepare data for continued processing
df_chla_c1 <- df_dwq_lt_filt %>% 
  select(
    Source, 
    Station,
    Latitude,
    Longitude,
    SubRegion,
    YearAdj,
    Month,
    Season,
    Date,
    Datetime,
    contains("Chlorophyll")
  ) %>% 
  # Remove records without Chlorophyll data
  drop_na(Chlorophyll) %>% 
  # Fill in "=" for the NA values in Chlorophyll_Sign
  mutate(Chlorophyll_Sign = if_else(is.na(Chlorophyll_Sign), "=", Chlorophyll_Sign)) %>% 
  # Filter data so that there is only one sample per station-day
  filt_daily_dups() %>% 
  # Rename Sign and Result variables to be compatible with later functions
  rename(
    Result = Chlorophyll,
    Sign = Chlorophyll_Sign
  )

# Make sure there is only one sample per station-day
df_chla_c1 %>% count(Source, Station, Date) %>% filter(n > 1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>

Temporal Coverage

Now let’s take a closer look at the temporal data coverage for each Station.

Sampling Effort by Station

# Create sampling effort by station plots for each Parameter and Source
ndf_chla_se_sta_plt <- df_chla_c1 %>% 
  nest(.by = Source, .key = "df_data") %>% 
  mutate(plt = map(df_data, plot_samp_effort_sta))

EMP

NCRO

USGS_CAWSC

USGS_SFBS

Remove Sparse Surveys

For the USGS-CAWSC survey, chlorophyll data is available from 2015-2021 for most of the stations, so we’ll exclude this survey from the chlorophyll analyses.

df_chla_c2 <- df_chla_c1 %>% filter(Source != "USGS_CAWSC")

Filter Subregions

Not all of the subregions were sampled consistently from 1975-2021. To make sure that we only include the subregions that were sampled adequately, we will require that a subregion needs to have data for at least 75% of the 47 years between 1975 to 2021 (35 years) for each season.

df_chla_subreg_seas <- df_chla_c2 %>%
  distinct(SubRegion, YearAdj, Season) %>%
  count(SubRegion, Season, name = "NumYears") %>%
  group_by(SubRegion) %>%
  filter(min(NumYears) >= 35) %>%
  ungroup() %>% 
  # make sure each season meets the threshold for each SubRegion
  count(SubRegion) %>%
  filter(n == 4)

df_chla_c3 <- df_chla_c2 %>% filter(SubRegion %in% unique(df_chla_subreg_seas$SubRegion))

View Results

Let’s take a look at the sampling effort for the remaining subregions for each season after filtering.

plot_samp_effort_subreg(df_chla_c3)

Remove Outliers

First, we’ll look at the min-max ranges for Chlorophyll, to see if there are any obvious outliers to exclude from the data set.

summary(df_chla_c3$Result)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.020   1.540   2.690   7.946   5.980 364.330

Let’s take a closer look at the maximum Chlorophyll values to see if we should omit them from the data set.

df_chla_c3 %>% 
  select(!any_of(vars_rm_view)) %>% 
  slice_max(Result, n = 20)
## # A tibble: 20 × 7
##    Station                      SubRegion YearAdj Season Date       Sign  Result
##    <chr>                        <chr>       <dbl> <chr>  <date>     <chr>  <dbl>
##  1 EMP P12                      Grant Li…    1976 Spring 1976-05-07 =       364.
##  2 EMP P12                      Grant Li…    1977 Spring 1977-05-09 =       352.
##  3 NCRO Middle River at Undine… Grant Li…    2008 Summer 2008-06-04 =       340.
##  4 NCRO Old River below Headwa… Grant Li…    2009 Summer 2009-07-14 =       325 
##  5 EMP P12                      Grant Li…    1976 Summer 1976-06-04 =       297.
##  6 NCRO Old River near Head     Grant Li…    2003 Summer 2003-08-20 =       288 
##  7 EMP P12                      Grant Li…    1976 Spring 1976-05-21 =       278.
##  8 NCRO Middle River at Undine… Grant Li…    2008 Summer 2008-06-11 =       276.
##  9 EMP P12                      Grant Li…    1977 Spring 1977-04-26 =       275.
## 10 NCRO Doughty Cut near Grant… Grant Li…    2007 Summer 2007-07-03 =       269 
## 11 NCRO Old River below Headwa… Grant Li…    2007 Summer 2007-07-03 =       268 
## 12 NCRO Old River below Headwa… Grant Li…    2007 Summer 2007-08-01 =       267 
## 13 EMP P12                      Grant Li…    1976 Summer 1976-06-18 =       266.
## 14 EMP P12                      Grant Li…    1977 Summer 1977-06-23 =       266.
## 15 NCRO Middle River at Undine… Grant Li…    2007 Summer 2007-07-03 =       265 
## 16 NCRO Middle River at Undine… Grant Li…    2009 Summer 2009-06-24 =       264 
## 17 EMP P12                      Grant Li…    1977 Spring 1977-04-11 =       262.
## 18 NCRO Doughty Cut near Grant… Grant Li…    2009 Summer 2009-06-25 =       256 
## 19 NCRO Grantline Canal above … Grant Li…    2009 Summer 2009-06-25 =       244 
## 20 NCRO Doughty Cut near Grant… Grant Li…    2010 Summer 2010-07-15 =       241

The maximum Chlorophyll values appear to be valid. Next, we’ll look for values that are less than the reporting limit with reporting limits that are very high compared to the range of the values for the parameter (> 75th percentile).

df_chla_high_rl_flag <- flag_high_rl(df_chla_c3, perc_thresh = 0.75)

# View flagged data points
df_chla_high_rl_flag %>% filter(HighRL_flag)
## # A tibble: 0 × 13
## # ℹ 13 variables: Source <chr>, Station <chr>, Latitude <dbl>, Longitude <dbl>,
## #   SubRegion <chr>, YearAdj <dbl>, Month <dbl>, Season <chr>, Date <date>,
## #   Datetime <dttm>, Sign <chr>, Result <dbl>, HighRL_flag <lgl>

None of the Chlorophyll values less than the reporting limit have RL values that are greater than the 75th percentile of the data. Next, we’ll look for outliers by using a modified Z-score test flagging data points with scores greater than 15 grouped by subregion.

df_chla_modzscore_flag <- df_chla_c3 %>%
  group_by(SubRegion) %>% 
  flag_modzscore(threshold = 15) %>% 
  ungroup()

# View flagged data points
df_chla_modzscore_flag_view <- df_chla_modzscore_flag %>% select(!any_of(vars_rm_view))
  
df_chla_modzscore_flag_view %>% 
  filter(ModZscore_flag) %>% 
  arrange(SubRegion, desc(Result)) %>% 
  print(n = 180)
## # A tibble: 176 × 9
##     Station           SubRegion YearAdj Season Date       Sign  Result ModZscore
##     <chr>             <chr>       <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
##   1 USGS_SFBS 3       Confluen…    2016 Spring 2016-05-18 =       81.3      35.9
##   2 USGS_SFBS 2       Confluen…    2016 Spring 2016-05-18 =       64.5      28.3
##   3 EMP D10           Confluen…    1979 Summer 1979-07-06 =       49.4      21.4
##   4 EMP D10           Confluen…    1982 Summer 1982-07-15 =       46.5      20.1
##   5 EMP D10           Confluen…    1979 Summer 1979-08-17 =       46.3      20.0
##   6 EMP D10           Confluen…    1978 Summer 1978-08-24 =       43.8      18.9
##   7 EMP D10           Confluen…    1982 Summer 1982-06-30 =       41.8      18.0
##   8 EMP D10           Confluen…    1979 Fall   1979-09-13 =       40.9      17.6
##   9 EMP EZ6           Confluen…    2013 Spring 2013-03-22 =       38.8      16.6
##  10 EMP D10           Confluen…    1979 Summer 1979-07-19 =       37.9      16.2
##  11 NCRO Fisherman's… Franks T…    2015 Spring 2015-04-15 =       83.3      39.8
##  12 EMP D19           Franks T…    1988 Summer 1988-06-08 =       78.7      37.5
##  13 NCRO Fisherman's… Franks T…    2014 Fall   2014-10-01 =       74.2      35.3
##  14 EMP D19           Franks T…    1982 Summer 1982-06-29 =       71.9      34.2
##  15 EMP D19           Franks T…    2016 Spring 2016-05-11 =       62.4      29.5
##  16 EMP D19           Franks T…    1986 Summer 1986-06-18 =       54.6      25.8
##  17 EMP D19           Franks T…    1982 Summer 1982-07-14 =       52.3      24.6
##  18 EMP D19           Franks T…    1993 Spring 1993-05-17 =       48.8      22.9
##  19 EMP D19           Franks T…    1984 Summer 1984-06-06 =       42.0      19.6
##  20 NCRO False River… Franks T…    2015 Spring 2015-04-15 =       42.0      19.5
##  21 NCRO False River… Franks T…    2015 Spring 2015-04-01 =       40.4      18.8
##  22 EMP D19           Franks T…    1985 Spring 1985-05-14 =       40.3      18.7
##  23 EMP D19           Franks T…    1976 Spring 1976-04-07 =       39.4      18.3
##  24 EMP D19           Franks T…    1981 Spring 1981-05-13 =       38.4      17.8
##  25 EMP D19           Franks T…    1976 Spring 1976-03-25 =       37.0      17.1
##  26 EMP D19           Franks T…    1976 Spring 1976-04-21 =       37.0      17.1
##  27 EMP P12           Grant Li…    1976 Spring 1976-05-07 =      364.       33.0
##  28 EMP P12           Grant Li…    1977 Spring 1977-05-09 =      352.       31.9
##  29 NCRO Middle Rive… Grant Li…    2008 Summer 2008-06-04 =      340.       30.7
##  30 NCRO Old River b… Grant Li…    2009 Summer 2009-07-14 =      325        29.4
##  31 EMP P12           Grant Li…    1976 Summer 1976-06-04 =      297.       26.8
##  32 NCRO Old River n… Grant Li…    2003 Summer 2003-08-20 =      288        26.0
##  33 EMP P12           Grant Li…    1976 Spring 1976-05-21 =      278.       25.0
##  34 NCRO Middle Rive… Grant Li…    2008 Summer 2008-06-11 =      276.       24.8
##  35 EMP P12           Grant Li…    1977 Spring 1977-04-26 =      275.       24.8
##  36 NCRO Doughty Cut… Grant Li…    2007 Summer 2007-07-03 =      269        24.2
##  37 NCRO Old River b… Grant Li…    2007 Summer 2007-07-03 =      268        24.1
##  38 NCRO Old River b… Grant Li…    2007 Summer 2007-08-01 =      267        24.0
##  39 EMP P12           Grant Li…    1976 Summer 1976-06-18 =      266.       23.9
##  40 EMP P12           Grant Li…    1977 Summer 1977-06-23 =      266.       23.9
##  41 NCRO Middle Rive… Grant Li…    2007 Summer 2007-07-03 =      265        23.8
##  42 NCRO Middle Rive… Grant Li…    2009 Summer 2009-06-24 =      264        23.8
##  43 EMP P12           Grant Li…    1977 Spring 1977-04-11 =      262.       23.6
##  44 NCRO Doughty Cut… Grant Li…    2009 Summer 2009-06-25 =      256        23.0
##  45 NCRO Grantline C… Grant Li…    2009 Summer 2009-06-25 =      244        21.9
##  46 NCRO Doughty Cut… Grant Li…    2010 Summer 2010-07-15 =      241        21.6
##  47 NCRO Middle Rive… Grant Li…    2007 Summer 2007-07-19 =      239        21.4
##  48 NCRO Old River b… Grant Li…    2004 Summer 2004-07-13 =      239        21.4
##  49 NCRO Old River b… Grant Li…    2008 Summer 2008-06-11 =      239.       21.4
##  50 NCRO Doughty Cut… Grant Li…    2016 Summer 2016-07-28 =      237.       21.2
##  51 NCRO Middle Rive… Grant Li…    2008 Summer 2008-06-24 =      236.       21.2
##  52 NCRO Doughty Cut… Grant Li…    2009 Summer 2009-07-14 =      233        20.9
##  53 NCRO Old River b… Grant Li…    2009 Summer 2009-06-25 =      232        20.8
##  54 NCRO Grant Ln Ca… Grant Li…    2009 Summer 2009-06-24 =      224        20.1
##  55 NCRO Old River b… Grant Li…    2010 Summer 2010-07-15 =      223        20.0
##  56 NCRO Old River @… Grant Li…    2004 Summer 2004-07-13 =      222        19.9
##  57 NCRO Old River @… Grant Li…    2007 Spring 2007-04-10 =      221        19.8
##  58 EMP P12           Grant Li…    1977 Summer 1977-07-07 =      219.       19.6
##  59 NCRO Middle Rive… Grant Li…    2003 Summer 2003-08-19 =      219        19.6
##  60 NCRO Old River b… Grant Li…    2003 Summer 2003-08-19 =      218        19.5
##  61 NCRO Doughty Cut… Grant Li…    2003 Summer 2003-08-19 =      217        19.4
##  62 NCRO Old River b… Grant Li…    2012 Summer 2012-07-06 =      216        19.3
##  63 EMP P12           Grant Li…    1976 Summer 1976-08-02 =      213.       19.1
##  64 NCRO Old River b… Grant Li…    2004 Summer 2004-06-29 =      212        19.0
##  65 NCRO Old River a… Grant Li…    2013 Summer 2013-07-05 =      210.       18.8
##  66 NCRO Old River a… Grant Li…    2009 Summer 2009-06-23 =      209        18.7
##  67 NCRO Old River @… Grant Li…    2008 Summer 2008-07-23 =      208.       18.6
##  68 EMP P12           Grant Li…    1977 Spring 1977-05-25 =      204.       18.2
##  69 NCRO Doughty Cut… Grant Li…    2016 Summer 2016-06-21 =      193.       17.2
##  70 NCRO Old River a… Grant Li…    2007 Spring 2007-04-24 =      193        17.2
##  71 NCRO Middle Rive… Grant Li…    2004 Summer 2004-07-13 =      192        17.1
##  72 NCRO Grantline C… Grant Li…    2008 Summer 2008-06-11 =      192.       17.1
##  73 NCRO Grantline C… Grant Li…    2007 Summer 2007-07-06 =      191        17.0
##  74 NCRO Middle Rive… Grant Li…    2016 Summer 2016-06-17 =      191.       17.0
##  75 NCRO Old River b… Grant Li…    2008 Summer 2008-06-25 =      190.       16.9
##  76 NCRO Doughty Cut… Grant Li…    2008 Summer 2008-06-11 =      187.       16.7
##  77 NCRO Old River @… Grant Li…    2009 Winter 2008-12-03 =      186.       16.6
##  78 NCRO Doughty Cut… Grant Li…    2004 Summer 2004-07-13 =      186        16.6
##  79 EMP P12           Grant Li…    1977 Summer 1977-06-07 =      185.       16.5
##  80 NCRO Doughty Cut… Grant Li…    2009 Summer 2009-06-24 =      185        16.5
##  81 NCRO Grantline C… Grant Li…    2007 Summer 2007-07-03 =      185        16.5
##  82 NCRO Old River b… Grant Li…    2008 Summer 2008-07-23 =      184.       16.4
##  83 NCRO Middle Rive… Grant Li…    2007 Summer 2007-06-29 =      184        16.4
##  84 NCRO Old River a… Grant Li…    2008 Summer 2008-06-04 =      184.       16.3
##  85 NCRO Grant Ln Ca… Grant Li…    2008 Summer 2008-06-11 =      178        15.8
##  86 NCRO Old River a… Grant Li…    2014 Fall   2014-10-02 =      176.       15.7
##  87 NCRO Grantline C… Grant Li…    2010 Summer 2010-07-15 =      176        15.6
##  88 NCRO Grant Line … Grant Li…    2016 Summer 2016-06-21 =      174.       15.4
##  89 NCRO Middle Rive… Grant Li…    2016 Summer 2016-07-12 =      171.       15.2
##  90 NCRO Middle Rive… Grant Li…    2003 Summer 2003-08-29 =      170        15.1
##  91 EMP D7            Grizzly …    1980 Summer 1980-07-17 =       68.8      29.5
##  92 EMP D7            Grizzly …    1978 Fall   1978-09-27 =       60.5      25.8
##  93 EMP D7            Grizzly …    1978 Fall   1978-10-12 =       58.7      25.0
##  94 EMP D7            Grizzly …    1980 Summer 1980-08-06 =       45.8      19.4
##  95 EMP D7            Grizzly …    1979 Summer 1979-06-19 =       44.5      18.8
##  96 EMP D7            Grizzly …    1978 Fall   1978-09-13 =       43.2      18.2
##  97 EMP D7            Grizzly …    1979 Summer 1979-06-07 =       42.6      17.9
##  98 EMP D7            Grizzly …    1984 Fall   1984-09-05 =       41.4      17.4
##  99 EMP D7            Grizzly …    1980 Fall   1980-09-04 =       40.1      16.8
## 100 EMP D7            Grizzly …    1979 Summer 1979-07-06 =       38.9      16.3
## 101 EMP D9            Honker B…    1979 Summer 1979-07-06 =       64.2      21.6
## 102 EMP D9            Honker B…    1982 Summer 1982-07-14 =       55.9      18.7
## 103 EMP D9            Honker B…    1978 Summer 1978-08-23 =       53.1      17.7
## 104 USGS_SFBS 4       Honker B…    1979 Summer 1979-08-14 =       52.5      17.5
## 105 EMP D9            Honker B…    1982 Summer 1982-06-29 =       47.1      15.6
## 106 USGS_SFBS 4       Honker B…    1979 Fall   1979-09-18 =       46.8      15.5
## 107 USGS_SFBS 649     Lower Sa…    2016 Spring 2016-05-18 =       66.9      25.1
## 108 EMP D4            Lower Sa…    2016 Spring 2016-05-13 =       57.3      21.4
## 109 EMP EZ2           Lower Sa…    2016 Spring 2016-05-13 =       54.2      20.1
## 110 USGS_SFBS 649     Lower Sa…    2013 Spring 2013-03-26 =       51.6      19.1
## 111 EMP D4            Lower Sa…    1982 Fall   1982-10-21 =       45.7      16.8
## 112 EMP D4            Lower Sa…    1982 Summer 1982-07-14 =       44.3      16.3
## 113 EMP D11           Lower Sa…    1993 Spring 1993-05-18 =       42.4      15.6
## 114 EMP D14A          Lower Sa…    1986 Summer 1986-06-18 =       62.4      23.1
## 115 EMP D12           Lower Sa…    2016 Spring 2016-05-11 =       59.2      21.9
## 116 NCRO San Joaquin… Lower Sa…    2010 Spring 2010-05-19 =       55.5      20.5
## 117 EMP D14A          Lower Sa…    1981 Spring 1981-05-14 =       54.8      20.2
## 118 EMP D12           Lower Sa…    1986 Summer 1986-06-18 =       54.8      20.2
## 119 EMP D14A          Lower Sa…    1985 Spring 1985-05-15 =       53.4      19.7
## 120 EMP D12           Lower Sa…    1993 Spring 1993-05-18 =       51.6      19.0
## 121 EMP D14A          Lower Sa…    1993 Spring 1993-05-18 =       45.9      16.8
## 122 EMP D12           Lower Sa…    1985 Spring 1985-05-15 =       44.3      16.2
## 123 EMP D12           Lower Sa…    1982 Summer 1982-07-15 =       42.1      15.3
## 124 EMP D8            Mid Suis…    1985 Summer 1985-07-12 =       49.4      22.4
## 125 USGS_SFBS 5       Mid Suis…    1978 Fall   1978-10-12 =       42        18.9
## 126 USGS_SFBS 5       Mid Suis…    1979 Fall   1979-09-18 =       41.4      18.6
## 127 EMP D8            Mid Suis…    1979 Summer 1979-06-19 =       40.1      18.0
## 128 EMP D8            Mid Suis…    1979 Summer 1979-07-06 =       40.1      18.0
## 129 USGS_SFBS 6       Mid Suis…    1980 Summer 1980-08-19 =       39.1      17.5
## 130 EMP D8            Mid Suis…    1980 Summer 1980-08-21 =       38.3      17.1
## 131 USGS_SFBS 6       Mid Suis…    1978 Fall   1978-10-12 =       38.1      17.0
## 132 EMP D8            Mid Suis…    1978 Summer 1978-08-11 =       36.4      16.2
## 133 USGS_SFBS 6       Mid Suis…    1979 Fall   1979-09-18 =       36.3      16.2
## 134 USGS_SFBS 6       Mid Suis…    1978 Fall   1978-09-20 =       35.9      16.0
## 135 EMP D8            Mid Suis…    1978 Fall   1978-09-28 =       35.2      15.6
## 136 EMP D8            Mid Suis…    1978 Fall   1978-10-13 =       35.2      15.6
## 137 EMP C3            Middle S…    1977 Spring 1977-03-28 =       38.6      20.7
## 138 EMP C3            Middle S…    1977 Spring 1977-04-12 =       30.9      16.3
## 139 EMP D28A          Old River    1986 Summer 1986-06-17 =       65.6      22.7
## 140 EMP D28A          Old River    1985 Spring 1985-05-13 =       50.3      17.2
## 141 EMP D28A          Old River    1982 Summer 1982-06-28 =       50.0      17.1
## 142 EMP D28A          Old River    2016 Spring 2016-05-11 =       49.7      17.0
## 143 EMP D28A          Old River    1976 Spring 1976-03-22 =       47.1      16.1
## 144 EMP D28A          Old River    1984 Fall   1984-10-16 =       47.1      16.1
## 145 EMP D22           Sacramen…    2012 Fall   2012-10-08 =       61.3      27.7
## 146 EMP D22           Sacramen…    2016 Spring 2016-05-13 =       58.8      26.5
## 147 EMP EZ2           Sacramen…    2012 Fall   2012-10-08 =       46.9      20.9
## 148 NCRO Three Mile … Sacramen…    2016 Spring 2016-05-17 =       40.7      18.0
## 149 EMP D26           San Joaq…    1983 Fall   1983-09-28 =      155.       74.3
## 150 EMP D26           San Joaq…    1992 Spring 1992-05-11 =       67.4      31.7
## 151 EMP D26           San Joaq…    2016 Spring 2016-05-12 =       66.9      31.5
## 152 EMP D26           San Joaq…    1993 Spring 1993-05-17 =       47.4      22.0
## 153 EMP D26           San Joaq…    1986 Summer 1986-06-17 =       38.6      17.8
## 154 EMP D26           San Joaq…    1988 Spring 1988-05-25 =       36.1      16.6
## 155 EMP D26           San Joaq…    2015 Spring 2015-04-10 =       34.5      15.8
## 156 EMP D26           San Joaq…    1988 Summer 1988-06-08 =       34.4      15.8
## 157 EMP D26           San Joaq…    1981 Spring 1981-05-13 =       33.0      15.1
## 158 EMP D16           San Joaq…    2016 Spring 2016-05-12 =       72.7      26.2
## 159 EMP D15           San Joaq…    1986 Summer 1986-06-18 =       71.9      25.9
## 160 EMP D16           San Joaq…    1993 Spring 1993-05-17 =       53.2      19.0
## 161 EMP D15           San Joaq…    1981 Spring 1981-05-14 =       48.6      17.3
## 162 EMP D15           San Joaq…    1985 Spring 1985-05-15 =       47.1      16.7
## 163 EMP D16           San Joaq…    1986 Summer 1986-06-17 =       46.8      16.6
## 164 EMP D16           San Joaq…    1988 Summer 1988-06-08 =       46        16.3
## 165 EMP D16           San Joaq…    1985 Spring 1985-05-14 =       45.7      16.2
## 166 EMP D15           San Joaq…    1993 Spring 1993-05-18 =       44.0      15.5
## 167 EMP P8            San Joaq…    1976 Spring 1976-05-06 =      130.       24.5
## 168 EMP C9            Victoria…    1985 Spring 1985-05-09 =       90.5      36.8
## 169 NCRO Middle Rive… Victoria…    2007 Fall   2007-11-27 =       58        23.3
## 170 NCRO Middle Rive… Victoria…    2014 Fall   2014-10-02 =       50.9      20.3
## 171 EMP C9            Victoria…    1981 Spring 1981-05-12 =       45.4      18.0
## 172 EMP C9            Victoria…    1984 Summer 1984-06-05 =       41.2      16.3
## 173 NCRO Middle Rive… Victoria…    2013 Spring 2013-05-01 =       38.7      15.2
## 174 EMP C9            Victoria…    1976 Spring 1976-05-07 =       38.6      15.2
## 175 USGS_SFBS 7       West Sui…    1979 Summer 1979-07-10 =       32.3      17.7
## 176 USGS_SFBS 7       West Sui…    1978 Fall   1978-09-20 =       31.4      17.1
## # ℹ 1 more variable: ModZscore_flag <lgl>
# San Joaquin River at Prisoners Pt from 1982-1984
df_chla_modzscore_flag_view %>% 
  filter(
    YearAdj %in% 1982:1984,
    SubRegion == "San Joaquin River at Prisoners Pt",
  ) %>% 
  arrange(Date) %>% 
  print(n = 50)
## # A tibble: 43 × 9
##    Station SubRegion            YearAdj Season Date       Sign  Result ModZscore
##    <chr>   <chr>                  <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
##  1 EMP D26 San Joaquin River a…    1982 Winter 1981-12-09 =       0.93    0.445 
##  2 EMP D26 San Joaquin River a…    1982 Winter 1982-02-18 =       1.85    0     
##  3 EMP D26 San Joaquin River a…    1982 Spring 1982-03-03 =       1.08    0.373 
##  4 EMP D26 San Joaquin River a…    1982 Spring 1982-03-17 =       1.7     0.0726
##  5 EMP D26 San Joaquin River a…    1982 Spring 1982-04-15 =       2.01    0.0774
##  6 EMP D26 San Joaquin River a…    1982 Spring 1982-05-18 =       3.09    0.600 
##  7 EMP D26 San Joaquin River a…    1982 Summer 1982-06-15 =       6.48    2.24  
##  8 EMP D26 San Joaquin River a…    1982 Summer 1982-06-29 =      20.7     9.12  
##  9 EMP D26 San Joaquin River a…    1982 Summer 1982-07-14 =      10.3     4.11  
## 10 EMP D26 San Joaquin River a…    1982 Summer 1982-07-28 =       3.09    0.600 
## 11 EMP D26 San Joaquin River a…    1982 Summer 1982-08-11 =       9.11    3.51  
## 12 EMP D26 San Joaquin River a…    1982 Summer 1982-08-25 =      20.8     9.19  
## 13 EMP D26 San Joaquin River a…    1982 Fall   1982-09-13 =       6.18    2.10  
## 14 EMP D26 San Joaquin River a…    1982 Fall   1982-09-22 =      13.1     5.45  
## 15 EMP D26 San Joaquin River a…    1982 Fall   1982-10-07 =       4.48    1.27  
## 16 EMP D26 San Joaquin River a…    1982 Fall   1982-10-21 =       5.56    1.80  
## 17 EMP D26 San Joaquin River a…    1982 Fall   1982-11-09 =       1.09    0.368 
## 18 EMP D26 San Joaquin River a…    1983 Winter 1982-12-08 =       1.08    0.373 
## 19 EMP D26 San Joaquin River a…    1983 Winter 1983-01-26 =       2.32    0.227 
## 20 EMP D26 San Joaquin River a…    1983 Winter 1983-02-23 =       1.08    0.373 
## 21 EMP D26 San Joaquin River a…    1983 Spring 1983-03-23 =       2.32    0.227 
## 22 EMP D26 San Joaquin River a…    1983 Spring 1983-04-06 =       3.86    0.973 
## 23 EMP D26 San Joaquin River a…    1983 Spring 1983-05-04 =       4.01    1.05  
## 24 EMP D26 San Joaquin River a…    1983 Summer 1983-06-16 =       1.39    0.223 
## 25 EMP D26 San Joaquin River a…    1983 Summer 1983-07-19 =       2.16    0.150 
## 26 EMP D26 San Joaquin River a…    1983 Summer 1983-08-17 =       2.78    0.450 
## 27 EMP D26 San Joaquin River a…    1983 Fall   1983-09-28 =     155.     74.3   
## 28 EMP D26 San Joaquin River a…    1983 Fall   1983-10-12 =       0.77    0.523 
## 29 EMP D26 San Joaquin River a…    1983 Fall   1983-11-09 =       0.31    0.745 
## 30 EMP D26 San Joaquin River a…    1984 Winter 1983-12-14 =       0.15    0.823 
## 31 EMP D26 San Joaquin River a…    1984 Winter 1984-01-25 =       2.47    0.300 
## 32 EMP D26 San Joaquin River a…    1984 Winter 1984-02-09 =       0.15    0.823 
## 33 EMP D26 San Joaquin River a…    1984 Spring 1984-04-11 =       0.15    0.823 
## 34 EMP D26 San Joaquin River a…    1984 Spring 1984-05-09 =       4.48    1.27  
## 35 EMP D26 San Joaquin River a…    1984 Summer 1984-06-06 =      24.6    11.0   
## 36 EMP D26 San Joaquin River a…    1984 Summer 1984-07-09 =       2.32    0.227 
## 37 EMP D26 San Joaquin River a…    1984 Summer 1984-07-19 =       2.79    0.455 
## 38 EMP D26 San Joaquin River a…    1984 Summer 1984-08-06 =       6.95    2.47  
## 39 EMP D26 San Joaquin River a…    1984 Summer 1984-08-20 =      17.3     7.47  
## 40 EMP D26 San Joaquin River a…    1984 Fall   1984-09-05 =      11.9     4.86  
## 41 EMP D26 San Joaquin River a…    1984 Fall   1984-09-19 =       7.26    2.62  
## 42 EMP D26 San Joaquin River a…    1984 Fall   1984-10-03 =       5.56    1.80  
## 43 EMP D26 San Joaquin River a…    1984 Fall   1984-11-01 =       5.87    1.95  
## # ℹ 1 more variable: ModZscore_flag <lgl>
# San Joaquin River near Stockton from 1975-1977
df_chla_modzscore_flag_view %>% 
  filter(
    YearAdj %in% 1975:1977,
    SubRegion == "San Joaquin River near Stockton",
  ) %>% 
  arrange(Date) %>% 
  print(n = 60)
## # A tibble: 57 × 9
##    Station SubRegion            YearAdj Season Date       Sign  Result ModZscore
##    <chr>   <chr>                  <dbl> <chr>  <date>     <chr>  <dbl>     <dbl>
##  1 EMP P8  San Joaquin River n…    1975 Winter 1975-02-03 =      20.4     3.09  
##  2 EMP P8  San Joaquin River n…    1975 Spring 1975-03-18 =       7.41    0.543 
##  3 EMP P8  San Joaquin River n…    1975 Spring 1975-04-01 =      12.4     1.51  
##  4 EMP P8  San Joaquin River n…    1975 Spring 1975-04-16 =      20.1     3.03  
##  5 EMP P8  San Joaquin River n…    1975 Spring 1975-05-01 =      40.1     6.96  
##  6 EMP P8  San Joaquin River n…    1975 Spring 1975-05-15 =      40.9     7.11  
##  7 EMP P8  San Joaquin River n…    1975 Summer 1975-06-03 =      34.0     5.75  
##  8 EMP P8  San Joaquin River n…    1975 Summer 1975-06-17 =      15.4     2.12  
##  9 EMP P8  San Joaquin River n…    1975 Summer 1975-07-01 =      13.1     1.66  
## 10 EMP P8  San Joaquin River n…    1975 Summer 1975-07-15 =      40.9     7.11  
## 11 EMP P8  San Joaquin River n…    1975 Summer 1975-08-12 =      67.9    12.4   
## 12 EMP P8  San Joaquin River n…    1975 Summer 1975-08-25 =      37.0     6.36  
## 13 EMP P8  San Joaquin River n…    1975 Fall   1975-09-11 =      34.0     5.75  
## 14 EMP P8  San Joaquin River n…    1975 Fall   1975-09-26 =      17.8     2.57  
## 15 EMP P8  San Joaquin River n…    1975 Fall   1975-10-09 =      42.4     7.41  
## 16 EMP P8  San Joaquin River n…    1975 Fall   1975-10-23 =      17.6     2.54  
## 17 EMP P8  San Joaquin River n…    1975 Fall   1975-11-26 =      20.7     3.15  
## 18 EMP P8  San Joaquin River n…    1976 Winter 1975-12-23 =       2.28    0.463 
## 19 EMP P8  San Joaquin River n…    1976 Winter 1976-01-22 =      15.8     2.18  
## 20 EMP P8  San Joaquin River n…    1976 Winter 1976-02-19 =       3.83    0.159 
## 21 EMP P8  San Joaquin River n…    1976 Spring 1976-03-09 =      12.4     1.51  
## 22 EMP P8  San Joaquin River n…    1976 Spring 1976-03-22 =      20.8     3.18  
## 23 EMP P8  San Joaquin River n…    1976 Spring 1976-04-06 =      56.4    10.1   
## 24 EMP P8  San Joaquin River n…    1976 Spring 1976-04-20 =      74.1    13.6   
## 25 EMP P8  San Joaquin River n…    1976 Spring 1976-05-06 =     130.     24.5   
## 26 EMP P8  San Joaquin River n…    1976 Spring 1976-05-20 =      40.9     7.11  
## 27 EMP P8  San Joaquin River n…    1976 Summer 1976-06-03 =      44       7.72  
## 28 EMP P8  San Joaquin River n…    1976 Summer 1976-06-22 =      13.1     1.66  
## 29 EMP P8  San Joaquin River n…    1976 Summer 1976-07-07 =      21.6     3.33  
## 30 EMP P8  San Joaquin River n…    1976 Summer 1976-07-20 =      20.1     3.03  
## 31 EMP P8  San Joaquin River n…    1976 Summer 1976-08-03 =       7.72    0.604 
## 32 EMP P8  San Joaquin River n…    1976 Summer 1976-08-27 =       9.11    0.877 
## 33 EMP P8  San Joaquin River n…    1976 Fall   1976-09-14 =      11.7     1.39  
## 34 EMP P8  San Joaquin River n…    1976 Fall   1976-09-27 =      11.0     1.24  
## 35 EMP P8  San Joaquin River n…    1976 Fall   1976-10-13 =      32.4     5.45  
## 36 EMP P8  San Joaquin River n…    1976 Fall   1976-10-28 =      21.6     3.33  
## 37 EMP P8  San Joaquin River n…    1976 Fall   1976-11-10 =      20.1     3.03  
## 38 EMP P8  San Joaquin River n…    1977 Winter 1976-12-14 =       3.89    0.147 
## 39 EMP P8  San Joaquin River n…    1977 Winter 1977-01-25 =       4.2     0.0863
## 40 EMP P8  San Joaquin River n…    1977 Winter 1977-02-23 =       5.87    0.241 
## 41 EMP P8  San Joaquin River n…    1977 Spring 1977-03-10 =       5.56    0.180 
## 42 EMP P8  San Joaquin River n…    1977 Spring 1977-03-28 =       8.03    0.665 
## 43 EMP P8  San Joaquin River n…    1977 Spring 1977-04-12 =      40.9     7.11  
## 44 EMP P8  San Joaquin River n…    1977 Spring 1977-04-26 =      27.8     4.54  
## 45 EMP P8  San Joaquin River n…    1977 Spring 1977-05-10 =      27.0     4.39  
## 46 EMP P8  San Joaquin River n…    1977 Spring 1977-05-25 =      12.8     1.60  
## 47 EMP P8  San Joaquin River n…    1977 Summer 1977-06-06 =       8.65    0.786 
## 48 EMP P8  San Joaquin River n…    1977 Summer 1977-06-23 =       7.26    0.514 
## 49 EMP P8  San Joaquin River n…    1977 Summer 1977-07-08 =      10.3     1.12  
## 50 EMP P8  San Joaquin River n…    1977 Summer 1977-07-21 =      12.8     1.60  
## 51 EMP P8  San Joaquin River n…    1977 Summer 1977-08-09 =      14.7     1.97  
## 52 EMP P8  San Joaquin River n…    1977 Summer 1977-08-22 =       7.72    0.604 
## 53 EMP P8  San Joaquin River n…    1977 Fall   1977-09-02 =       3.24    0.275 
## 54 EMP P8  San Joaquin River n…    1977 Fall   1977-09-20 =      10.8     1.21  
## 55 EMP P8  San Joaquin River n…    1977 Fall   1977-10-04 =      12.8     1.60  
## 56 EMP P8  San Joaquin River n…    1977 Fall   1977-10-18 =       8.18    0.694 
## 57 EMP P8  San Joaquin River n…    1977 Fall   1977-11-15 =      15.8     2.18  
## # ℹ 1 more variable: ModZscore_flag <lgl>

After inspecting the data flagged by the modified Z-score test, the Chlorophyll values appear to be valid based on best professional judgment, so we won’t exclude any of the flagged values.

Calculate Averages

Finally, we’ll calculate seasonal-regional averages for each adjusted water year. Before calculating the averages, we will need to replace values measured below the analytical reporting limit with a random number of uniform distribution between zero and the reporting limit.

# Add regions and keep raw chlorophyll data as an object to export
raw_chla <- df_chla_c3 %>% 
  left_join(df_regions, by = join_by(SubRegion)) %>% 
  relocate(Region, .before = SubRegion) %>% 
  mutate(Parameter = "Chlorophyll", .before = Sign) 

# Calculate seasonal-regional averages, substituting random numbers from a
  # uniform distribution for the <RL values
df_chla_avg <- raw_chla %>% 
  replace_blw_rl() %>% 
  calc_seas_reg_avg() %>% 
  rename(Chlorophyll = Result)

# Make sure each Year-Season-Region combination is represented and add Year Type info
lt_avg_chla <- 
  reduce(list(df_yr_seas_reg, df_yr_type, df_chla_avg), left_join) %>% 
  # Remove Suisun Marsh Region since all values are NA
  filter(Region != "Suisun Marsh")

Summarize Reporting Limits

Create a summary table of the reporting limits in the final QC’ed nutrient and chlorophyll raw data for the Supplemental Information.

df_rl_vals_nutr_chla <- 
  bind_rows(raw_nutr, raw_chla) %>% 
  filter(Sign == "<") %>% 
  count(Source, Parameter, Result, name = "Num_blw_RL") %>% 
  # Join crosswalk tables for parameter and survey names
  left_join(df_param_cw, by = join_by(Parameter)) %>%
  left_join(df_survey_cw, by = join_by(Source)) %>% 
  # Clean up
  transmute(
    Survey = Survey_name,
    Parameter = Parameter_publ,
    # Format RL values to prevent Excel from converting to scientific notation
    RL = format(Result, drop0trailing = TRUE),
    Num_blw_RL
  ) %>% 
  arrange(Survey, Parameter, RL) 

Export Data

Export raw data for all parameters as .qs files, and export the long-term average data both as .csv and .rds files for the analyses. Also, export the summary table of the reporting limits as a .csv file.

# Combine all data frames of raw data into a named list
ls_data_raw <- lst(
  raw_wq_meas,
  raw_nutr,
  raw_chla
)

# Export raw data frames as qs files
ls_data_raw %>% iwalk(\(x, idx) qsave(x, file = here("data/interim", paste0(idx, ".qs"))))

# Combine all data frames of long-term average data into a named list
ls_data_lt_avg <- lst(
  lt_avg_wq_meas,
  lt_avg_nutr,
  lt_avg_chla
)

# Export long-term average data frames as csv files
ls_data_lt_avg %>% iwalk(\(x, idx) write_csv(x, file = here("data/processed/wq", paste0(idx, ".csv"))))

# Export long-term average data frames as rds files
ls_data_lt_avg %>% iwalk(\(x, idx) saveRDS(x, file = here("data/processed/wq", paste0(idx, ".rds"))))

# Export summary table of the reporting limits as csv file
df_rl_vals_nutr_chla %>% write_csv(file = here("results/tables/rl_summary_table.csv"))