Process the data for the WQ parameters for the long-term WQ
publication. Parameters include: Water Temperature, Salinity, Secchi
depth, Dissolved Ammonia, Dissolved Nitrate + Nitrite, Dissolved
Ortho-phosphate, and Chlorophyll. Data is from the discretewq
EDI data package, version 731.7.
# Load packages
library(tidyverse)
library(dtplyr)
library(hms)
library(scales)
# Make sure we are using `deltamapr` version 1.0.0, commit d0a6f9c22aa074f906176e99a0ed70f97f26fffd
# install.packages("devtools")
# devtools::install_github("InteragencyEcologicalProgram/deltamapr", ref = "d0a6f9c22aa074f906176e99a0ed70f97f26fffd")
library(deltamapr)
library(sf)
library(leaflet)
library(here)
library(contentid)
library(qs)
library(conflicted)
# Source global data processing functions
source(here("src/data_processing/global_data_proc_func.R"))
# Declare package conflict preferences
conflicts_prefer(dplyr::filter(), hms::hms())
## [conflicted] Will prefer dplyr::filter over any other package.
## [conflicted] Will prefer hms::hms over any other package.
# Check if we are in the correct working directory
i_am("src/data_processing/process_data_wq_nutr_chla.Rmd")
## here() starts at C:/Repositories/04_IEP_Org/WQ-LT-Publication
# Run session info to display package versions
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.3 (2023-03-15 ucrt)
## os Windows 10 x64 (build 19044)
## system x86_64, mingw32
## ui RTerm
## language (EN)
## collate English_United States.utf8
## ctype English_United States.utf8
## tz America/Los_Angeles
## date 2024-01-03
## pandoc 3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## ! package * version date (UTC) lib source
## askpass 1.2.0 2023-09-03 [1] CRAN (R 4.2.3)
## bslib 0.4.2 2022-12-16 [1] CRAN (R 4.2.2)
## cachem 1.0.8 2023-05-01 [1] CRAN (R 4.2.3)
## callr 3.7.3 2022-11-02 [1] CRAN (R 4.2.2)
## class 7.3-21 2023-01-23 [2] CRAN (R 4.2.3)
## classInt 0.4-9 2023-02-28 [1] CRAN (R 4.2.2)
## cli 3.6.1 2023-03-23 [1] CRAN (R 4.2.3)
## colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.2.2)
## conflicted * 1.2.0 2023-02-01 [1] CRAN (R 4.2.2)
## contentid * 0.0.17 2023-04-21 [1] CRAN (R 4.2.3)
## crayon 1.5.2 2022-09-29 [1] CRAN (R 4.2.1)
## crosstalk 1.2.0 2021-11-04 [1] CRAN (R 4.2.1)
## curl 5.1.0 2023-10-02 [1] CRAN (R 4.2.3)
## data.table 1.14.8 2023-02-17 [1] CRAN (R 4.2.2)
## DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.1)
## deltamapr * 1.0.0 2021-06-18 [1] Github (InteragencyEcologicalProgram/deltamapr@d0a6f9c)
## devtools 2.4.5 2022-10-11 [1] CRAN (R 4.2.1)
## digest 0.6.33 2023-07-07 [1] CRAN (R 4.2.3)
## dplyr * 1.1.3 2023-09-03 [1] CRAN (R 4.2.3)
## dtplyr * 1.3.1 2023-03-22 [1] CRAN (R 4.2.3)
## e1071 1.7-13 2023-02-01 [1] CRAN (R 4.2.2)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.1)
## evaluate 0.21 2023-05-05 [1] CRAN (R 4.2.3)
## fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.2)
## fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.2.2)
## forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.2.2)
## fs 1.6.3 2023-07-20 [1] CRAN (R 4.2.3)
## generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1)
## ggplot2 * 3.4.3 2023-08-14 [1] CRAN (R 4.2.3)
## glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.1)
## gtable 0.3.4 2023-08-21 [1] CRAN (R 4.2.3)
## here * 1.0.1 2020-12-13 [1] CRAN (R 4.2.1)
## hms * 1.1.3 2023-03-21 [1] CRAN (R 4.2.3)
## htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.2.3)
## htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.2.3)
## httpuv 1.6.9 2023-02-14 [1] CRAN (R 4.2.2)
## httr 1.4.7 2023-08-15 [1] CRAN (R 4.2.3)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.2.1)
## jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.2.3)
## KernSmooth 2.23-20 2021-05-03 [2] CRAN (R 4.2.3)
## knitr 1.42 2023-01-25 [1] CRAN (R 4.2.2)
## later 1.3.0 2021-08-18 [1] CRAN (R 4.2.1)
## leaflet * 2.1.2 2023-03-10 [1] CRAN (R 4.2.2)
## lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.1)
## lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.2.3)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.1)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.1)
## mime 0.12 2021-09-28 [1] CRAN (R 4.2.0)
## miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.2.1)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.1)
## openssl 2.1.1 2023-09-25 [1] CRAN (R 4.2.3)
## pillar 1.9.0 2023-03-22 [1] CRAN (R 4.2.3)
## pkgbuild 1.4.2 2023-06-26 [1] CRAN (R 4.2.3)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.1)
## pkgload 1.3.2.1 2023-07-08 [1] CRAN (R 4.2.3)
## prettyunits 1.2.0 2023-09-24 [1] CRAN (R 4.2.3)
## processx 3.8.2 2023-06-30 [1] CRAN (R 4.2.3)
## profvis 0.3.7 2020-11-02 [1] CRAN (R 4.2.1)
## promises 1.2.0.1 2021-02-11 [1] CRAN (R 4.2.1)
## proxy 0.4-27 2022-06-09 [1] CRAN (R 4.2.1)
## ps 1.7.5 2023-04-18 [1] CRAN (R 4.2.3)
## purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.2.3)
## qs * 0.25.5 2023-02-22 [1] CRAN (R 4.2.2)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.1)
## RApiSerialize 0.1.2 2022-08-25 [1] CRAN (R 4.2.1)
## Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.2.3)
## D RcppParallel 5.1.7 2023-02-27 [1] CRAN (R 4.2.3)
## readr * 2.1.4 2023-02-10 [1] CRAN (R 4.2.2)
## remotes 2.4.2 2021-11-30 [1] CRAN (R 4.2.1)
## rlang 1.1.1 2023-04-28 [1] CRAN (R 4.2.3)
## rmarkdown 2.21 2023-03-26 [1] CRAN (R 4.2.3)
## rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.2.1)
## rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.1)
## sass 0.4.6 2023-05-03 [1] CRAN (R 4.2.3)
## scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.1)
## sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.1)
## sf * 1.0-12 2023-03-19 [1] CRAN (R 4.2.3)
## shiny 1.7.4 2022-12-15 [1] CRAN (R 4.2.2)
## stringfish 0.15.7 2022-04-13 [1] CRAN (R 4.2.1)
## stringi 1.7.12 2023-01-11 [1] CRAN (R 4.2.2)
## stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.2.2)
## tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.2.3)
## tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.2.2)
## tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.1)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.2.2)
## timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.2)
## tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.2.3)
## units 0.8-1 2022-12-10 [1] CRAN (R 4.2.2)
## urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.2.1)
## usethis 2.1.6 2022-05-25 [1] CRAN (R 4.2.1)
## utf8 1.2.3 2023-01-31 [1] CRAN (R 4.2.2)
## vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.2.3)
## withr 2.5.1 2023-09-26 [1] CRAN (R 4.2.3)
## xfun 0.39 2023-04-20 [1] CRAN (R 4.2.3)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.2.1)
## yaml 2.3.7 2023-01-23 [1] CRAN (R 4.2.2)
##
## [1] C:/R/win-library/4.2
## [2] C:/Program Files/R/R-4.2.3/library
##
## D ── DLL MD5 mismatch, broken installation.
##
## ──────────────────────────────────────────────────────────────────────────────
Load globally-used data:
# Import region assignments
df_regions <- read_csv(here("data/raw/region_assignments.csv"))
# Load Delta shapefile from Brian and only keep SubRegions east of Carquinez Straight
sf_delta <- R_EDSM_Subregions_Mahardja_FLOAT %>%
filter(
!SubRegion %in% c(
"Carquinez Strait",
"Lower Napa River",
"San Francisco Bay",
"San Pablo Bay",
"South Bay",
"Upper Napa River"
)
) %>%
select(SubRegion)
# Import year assignments
df_yr_type <- read_csv(here("data/raw/year_assignments.csv")) %>% rename(YearAdj = Year)
# Define years used in the publication
lt_yrs <- c(1975:2021)
# Create data frame that contains all possible combinations of year, season, and region
df_yr_seas_reg <- expand_grid(
YearAdj = lt_yrs,
Season = c("Winter", "Spring", "Summer", "Fall"),
Region = unique(df_regions$Region)
)
# Define the threshold for the number of years of data that a subregion needs to
# have to be included in the long-term averages
num_yrs_threshold <- round(length(lt_yrs) * 0.75)
Create globally-used functions:
# Filter data so that there is only one sample per station-day by choosing the
# data point closest to noon
filt_daily_dups <- function(df) {
# Look for any instances when more than 1 data point was collected at a station-day
df_dups <- df %>%
count(Source, Station, Date) %>%
filter(n > 1) %>%
select(-n)
# Fix duplicates
df_dups_fixed <- df %>%
inner_join(df_dups, by = c("Source", "Station", "Date")) %>%
drop_na(Datetime) %>%
mutate(
# Create variable for time
Time = as_hms(Datetime),
# Calculate difference from noon for each data point for later filtering
Noon_diff = abs(hms(hours = 12) - Time)
) %>%
# Use dtplyr to speed up operations
lazy_dt() %>%
group_by(Station, Date) %>%
# Select only 1 data point per station and date, choose data closest to noon
filter(Noon_diff == min(Noon_diff)) %>%
# When points are equidistant from noon, select earlier point
filter(Time == min(Time)) %>%
ungroup() %>%
# End dtplyr operation
as_tibble() %>%
select(-c(Time, Noon_diff))
# Add back the fixed duplicates
df %>%
anti_join(df_dups, by = c("Source", "Station", "Date")) %>%
bind_rows(df_dups_fixed)
}
# Plot sampling effort by Station
plot_samp_effort_sta <- function(df) {
df %>%
count(Station, YearAdj, name = "num_samples") %>%
mutate(Station = fct_rev(factor(Station))) %>%
ggplot(aes(x = YearAdj, y = Station, fill = num_samples)) +
geom_tile() +
scale_x_continuous(
limits = c(1974, 2022),
breaks = breaks_pretty(20),
expand = expansion()
) +
scale_fill_viridis_c(name = "Number of Samples") +
theme_bw() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = "top"
)
}
# Plot sampling effort by SubRegion and Season
plot_samp_effort_subreg <- function(df) {
df %>%
count(SubRegion, YearAdj, Season, name = "num_samples") %>%
mutate(
SubRegion = fct_rev(factor(SubRegion)),
Season = factor(Season, levels = c("Winter", "Spring", "Summer", "Fall"))
) %>%
ggplot(aes(x = YearAdj, y = SubRegion, fill = num_samples)) +
geom_tile() +
facet_wrap(vars(Season), nrow = 2) +
scale_x_continuous(
limits = c(1974, 2022),
breaks = breaks_pretty(20),
expand = expansion(mult = 0.02)
) +
scale_y_discrete(drop = FALSE) +
scale_fill_viridis_c(name = "Number of Samples") +
theme_bw() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = "top"
)
}
# Flag data points with Z-scores greater than a specified threshold
flag_zscore <- function(df, threshold) {
df %>%
mutate(
tmp_mean = mean(Result),
tmp_sd = sd(Result),
Zscore = if_else(
tmp_sd == 0,
NA_real_,
abs((Result - tmp_mean) / tmp_sd)
),
Zscore_flag = case_when(
is.na(Zscore) ~ FALSE,
Zscore > threshold ~ TRUE,
TRUE ~ FALSE
)
) %>%
select(!starts_with("tmp_"))
}
# Flag data points with modified z-scores greater than a specified threshold
flag_modzscore <- function(df, threshold) {
df %>%
mutate(
tmp_median = median(Result),
tmp_mad = mad(Result),
ModZscore = if_else(
tmp_mad == 0,
NA_real_,
abs(0.6745 * (Result - tmp_median) / tmp_mad)
),
ModZscore_flag = case_when(
is.na(ModZscore) ~ FALSE,
ModZscore > threshold ~ TRUE,
TRUE ~ FALSE
)
) %>%
select(!starts_with("tmp_"))
}
# Flag <RL values with high reporting limits (greater than a specified
# percentile of the data)
flag_high_rl <- function(df, perc_thresh) {
threshold <- df %>%
summarize(quant = quantile(Result, probs = perc_thresh)) %>%
pull(quant)
df %>% mutate(HighRL_flag = if_else(Sign == "<" & Result > threshold, TRUE, FALSE))
}
# Replace values below the reporting limit with simulated values between
# `min_val` and the RL
replace_blw_rl <- function(df, min_val = 0, seed = 1) {
# Pull out values that are below the RL
df_blw_rl <- df %>% filter(Sign == "<")
# Replace below RL values with simulated ones
withr::with_seed(
# Set seed for reproducibility
seed = seed,
df_blw_rl_sim <- df_blw_rl %>%
mutate(Result = round(runif(nrow(df_blw_rl), min = min_val, max = Result), 6))
)
# Add simulated values back to main data frame
df %>% filter(Sign != "<") %>% bind_rows(df_blw_rl_sim)
}
# Calculate seasonal-regional averages of raw data
calc_seas_reg_avg <- function(df) {
df %>%
# Calculate monthly mean for each region
group_by(Month, Season, Region, YearAdj) %>%
summarize(Result_month_mean = mean(Result), .groups = "drop") %>%
# Fill in NAs for data_var for any missing Season, Region, YearAdj
# combinations to make sure all seasons and regions are represented when
# averaging
complete(Season, Region, YearAdj) %>%
# Calculate seasonal-regional averages for each year
group_by(Season, Region, YearAdj) %>%
summarize(Result = mean(Result_month_mean), .groups = "drop")
}
# Register a contentid for the WQ data from the discretewq EDI data package
# This only needs to be done once
# register(
# "https://portal.edirepository.org/nis/dataviewer?packageid=edi.731.7&entityid=6c5f35b1d316e39c8de0bfadfb3c9692"
# )
# Define contentid for the WQ data from the discretewq EDI data package
id_dwq <- "hash://sha256/c5397df66c7c0e407c0bcd422711e3aab2713023a4aa3d24ff80de58a68f0cf9"
# Resolve the contentid for the WQ data from the discretewq EDI data package -
# storing a local copy for faster import
file_dwq <- resolve(id_dwq, store = TRUE)
# Import WQ data from the discretewq EDI data package from the local copy using
# its contentid
df_dwq <- read_csv(
file = file_dwq,
# Select a subset of columns
col_select = c(
Source,
Station,
Latitude,
Longitude,
Date,
Datetime,
Temperature,
Salinity,
Secchi,
contains(c("Chlorophyll", "DissAmmonia", "DissNitrateNitrite", "DissOrthophos"))
)
)
## Rows: 353850 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Source, Station, Chlorophyll_Sign, DissAmmonia_Sign, DissNitrateNi...
## dbl (9): Latitude, Longitude, Temperature, Salinity, Secchi, Chlorophyll, D...
## dttm (1): Datetime
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Prepare data for parameters of interest
df_dwq_c <- df_dwq %>%
# Convert Datetime to PST
mutate(Datetime = with_tz(Datetime, tzone = "Etc/GMT+8")) %>%
# Remove records without lat-long coordinates
drop_na(Latitude, Longitude) %>%
# Assign SubRegions to the stations
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE) %>%
st_transform(crs = st_crs(sf_delta)) %>%
st_join(sf_delta, join = st_intersects) %>%
# Remove any data outside our subregions of interest
filter(!is.na(SubRegion)) %>%
st_drop_geometry() %>%
# Add variables for adjusted calendar year, month, and season
# Adjusted calendar year: December-November, with December of the previous calendar year
# included with the following year
mutate(
Month = month(Date),
YearAdj = if_else(Month == 12, year(Date) + 1, year(Date)),
Season = case_when(
Month %in% 3:5 ~ "Spring",
Month %in% 6:8 ~ "Summer",
Month %in% 9:11 ~ "Fall",
Month %in% c(12, 1, 2) ~ "Winter"
)
) %>%
# Restrict data to 1975-2021
filter(YearAdj %in% lt_yrs)
Let’s look at which surveys we can use for the long-term WQ publication. First, we’ll look at the temporal scale of all of the surveys available.
# Number of Years for each survey
df_dwq_c %>%
distinct(Source, YearAdj) %>%
count(Source, name = "NumYears") %>%
arrange(desc(NumYears))
## # A tibble: 16 × 2
## Source NumYears
## <chr> <int>
## 1 EMP 47
## 2 FMWT 47
## 3 STN 47
## 4 USGS_CAWSC 47
## 5 DJFMP 46
## 6 Suisun 43
## 7 Baystudy 42
## 8 USGS_SFBS 42
## 9 20mm 27
## 10 YBFMP 24
## 11 NCRO 23
## 12 SDO 23
## 13 SKT 20
## 14 SLS 13
## 15 USBR 8
## 16 EDSM 5
# Period of record for each survey
df_dwq_c %>%
group_by(Source) %>%
summarize(min_date = min(Date), max_date = max(Date)) %>%
arrange(min_date)
## # A tibble: 16 × 3
## Source min_date max_date
## <chr> <date> <date>
## 1 USGS_CAWSC 1974-12-11 2021-11-30
## 2 EMP 1975-01-07 2021-11-16
## 3 USGS_SFBS 1975-01-15 2021-11-04
## 4 STN 1975-06-30 2021-08-19
## 5 FMWT 1975-09-17 2021-11-16
## 6 DJFMP 1976-05-13 2021-11-29
## 7 Suisun 1979-05-16 2021-11-18
## 8 Baystudy 1980-02-08 2021-11-03
## 9 20mm 1995-04-24 2021-07-16
## 10 SDO 1997-08-04 2021-09-10
## 11 YBFMP 1998-01-19 2021-11-30
## 12 NCRO 1999-03-17 2021-11-30
## 13 SKT 2002-01-07 2021-04-29
## 14 SLS 2009-01-05 2021-03-17
## 15 USBR 2012-05-08 2019-10-22
## 16 EDSM 2016-12-15 2021-11-26
Overall, for all parameters, it looks like all surveys except for SLS, USBR, and EDSM have collected at least 20 years of data. We will assume that these surveys have adequate temporal coverage for the long-term analysis.
# Only include surveys with adequate temporal coverage
df_dwq_lt <- df_dwq_c %>% filter(!Source %in% c("SLS", "USBR", "EDSM"))
Next, let’s take a look at a map of all stations.
sf_stations <- df_dwq_lt %>%
distinct(Source, Station, Latitude, Longitude) %>%
# Convert to sf object
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
# Define color palette for Surveys
color_pal_survey <- colorFactor(palette = "viridis", domain = sf_stations$Source)
# Create map using leaflet
leaflet() %>%
addTiles() %>%
addCircleMarkers(
data = sf_stations,
radius = 5,
fillColor = ~color_pal_survey(Source),
fillOpacity = 0.8,
weight = 0.5,
color = "black",
opacity = 1,
label = paste0("Survey: ", sf_stations$Source, ", Station: ", sf_stations$Station)
) %>%
addLegend(
position = "topright",
pal = color_pal_survey,
values = sf_stations$Source,
title = "Survey:"
)
Some of the stations from the Suisun Marsh survey are located in small backwater channels and dead-end sloughs which represent a much different habitat than the sampling locations from the other surveys which tend to be in larger, open water channel habitat. We’ll keep the stations located in Suisun, Montezuma, and Nurse Sloughs from the Suisun Marsh survey, since they seem to be in the larger channels in the area.
Also, there are a few questionable sampling locations from SKT and YBFMP, but I don’t want to dig too deep with these for now.
df_dwq_lt_filt <- df_dwq_lt %>%
filter(!(Source == "Suisun" & !str_detect(Station, "(Suisun\\s)SU|MZ|NS")))
Next, we’ll process the water quality measurement data: Water Temperature, Salinity, and Secchi depth.
# Create a nested data frame to run processing functions on
ndf_wq_meas <-
tibble(
Parameter = c(
"Temperature",
"Salinity",
"Secchi"
),
df_data = rep(list(df_dwq_lt_filt), 3)
) %>%
# Prepare data for each Parameter
mutate(
df_data = map2(
df_data,
Parameter,
~ drop_na(.x, all_of(.y)) %>%
select(
Source,
Station,
Latitude,
Longitude,
SubRegion,
YearAdj,
Month,
Season,
Date,
Datetime,
all_of(.y)
) %>%
# Filter data so that there is only one sample per station-day
filt_daily_dups()
)
)
# Make sure there is only one sample per station-day for each parameter
map(ndf_wq_meas$df_data, ~ count(.x, Source, Station, Date) %>% filter(n > 1))
## [[1]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
##
## [[2]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
##
## [[3]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
# Unnest the nested data frame into a long format
df_wq_meas_c1 <- ndf_wq_meas %>%
mutate(df_data = map2(df_data, Parameter, ~ rename(.x, Result = all_of(.y)))) %>%
unnest(df_data)
Now let’s take a closer look at the temporal data coverage for each Station and parameter.
# Create sampling effort by station plots for each Parameter and Source
ndf_wq_meas_se_sta_plt <- df_wq_meas_c1 %>%
nest(.by = c(Parameter, Source), .key = "df_data") %>%
mutate(plt = map(df_data, plot_samp_effort_sta)) %>%
nest(.by = Parameter, .key = "ndf_data_source")
Salinity data from DJFMP is only available for the past three years and NCRO only sampled Secchi depth for the past four years, so we won’t include these survey-parameter combinations in the analyses. For the USGS-CAWSC survey, only station 11447650 (Sacramento River at Freeport) was sampled on a long-term basis for Water Temperature and Salinity, so we’ll only include this station from the USGS-CAWSC survey.
df_wq_meas_c2 <- df_wq_meas_c1 %>%
filter(
!(Source == "USGS_CAWSC" & !str_detect(Station, "USGS-11447650$")),
!(Parameter == "Salinity" & Source == "DJFMP"),
!(Parameter == "Secchi" & Source == "NCRO")
)
Not all of the subregions were sampled consistently from 1975-2021. To make sure that we only include the subregions that were sampled adequately, we will require that a subregion needs to have data for at least 75% of the 47 years between 1975 to 2021 (35 years) for each season.
df_wq_meas_c3 <- df_wq_meas_c2 %>%
nest(.by = Parameter, .key = "df_data") %>%
mutate(
df_subreg_seas = map(
df_data,
~ distinct(.x, SubRegion, YearAdj, Season) %>%
count(SubRegion, Season, name = "NumYears") %>%
group_by(SubRegion) %>%
filter(min(NumYears) >= num_yrs_threshold) %>%
ungroup() %>%
# make sure each season meets the threshold for each SubRegion
count(SubRegion) %>%
filter(n == 4)
),
df_data_filt = map2(
df_data, df_subreg_seas,
~ filter(.x, SubRegion %in% unique(.y$SubRegion))
)
) %>%
select(Parameter, df_data_filt) %>%
unnest(df_data_filt)
Let’s take a look at the sampling effort for the remaining subregions for each season after filtering for each water quality measurement parameter.
# Create sampling effort by SubRegion plots for each Parameter
ndf_wq_meas_se_subreg_plt <- df_wq_meas_c3 %>%
nest(.by = Parameter, .key = "df_data") %>%
mutate(plt = map(df_data, plot_samp_effort_subreg))
First, we’ll look at the min-max ranges of each of the water quality measurement parameters, to see if there are any obvious outliers to exclude from the data set.
df_wq_meas_c3 %>%
summarize(
min_val = min(Result),
max_val = max(Result),
.by = Parameter
)
## # A tibble: 3 × 3
## Parameter min_val max_val
## <chr> <dbl> <dbl>
## 1 Temperature 2 116
## 2 Salinity 0 44.1
## 3 Secchi 0 457
All water quality measurement parameters have questionable minimum values and Temperature and Salinity have questionable maximum values. Let’s take a closer look at these to see if we should omit them from the data set.
# Truncate data so that it displays better
vars_rm_view <- c("Source", "Latitude", "Longitude", "Month", "Datetime")
df_wq_meas_c3_view <- df_wq_meas_c3 %>% select(!any_of(vars_rm_view))
# Minimum Temperature values
df_wq_meas_c3_view %>%
filter(Parameter == "Temperature") %>%
slice_min(Result, n = 10)
## # A tibble: 11 × 7
## Parameter Station SubRegion YearAdj Season Date Result
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl>
## 1 Temperature EMP S42 Suisun M… 1983 Spring 1983-05-03 2
## 2 Temperature Suisun MZ1 Suisun M… 1989 Winter 1989-02-16 4
## 3 Temperature NCRO Paradise Cut Grant Li… 2018 Winter 2018-02-13 4.68
## 4 Temperature NCRO Middle River at … Grant Li… 2014 Winter 2013-12-12 4.7
## 5 Temperature Suisun MZ1 Suisun M… 1989 Winter 1988-12-30 5
## 6 Temperature Suisun SU1 Suisun M… 1986 Winter 1985-12-19 5
## 7 Temperature Suisun SU4 Suisun M… 1988 Fall 1988-10-30 5
## 8 Temperature USGS_SFBS 653 Sacramen… 1991 Winter 1991-01-07 5.14
## 9 Temperature USGS_SFBS 657 Sacramen… 1991 Winter 1991-01-07 5.28
## 10 Temperature Suisun NS2 Suisun M… 2013 Winter 2013-01-16 5.3
## 11 Temperature Suisun SU1 Suisun M… 1989 Winter 1988-12-28 5.3
# The minimum temperature value at S42 in Suisun Marsh looks suspicious.
# Let's look at all the data in that SubRegion in Spring 1983
df_wq_meas_c3_view %>%
filter(
Parameter == "Temperature",
SubRegion == "Suisun Marsh",
YearAdj == 1983,
Season == "Spring"
) %>%
arrange(Date) %>%
print(n = 25)
## # A tibble: 24 × 7
## Parameter Station SubRegion YearAdj Season Date Result
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl>
## 1 Temperature Suisun SU1 Suisun Marsh 1983 Spring 1983-03-15 13.9
## 2 Temperature Suisun SU2 Suisun Marsh 1983 Spring 1983-03-15 14.5
## 3 Temperature Suisun SU3 Suisun Marsh 1983 Spring 1983-03-15 14.7
## 4 Temperature Suisun SU4 Suisun Marsh 1983 Spring 1983-03-15 14
## 5 Temperature Suisun MZ1 Suisun Marsh 1983 Spring 1983-03-16 10.5
## 6 Temperature Suisun MZ2 Suisun Marsh 1983 Spring 1983-03-16 10.5
## 7 Temperature Suisun MZ7 Suisun Marsh 1983 Spring 1983-03-16 10.5
## 8 Temperature Suisun MZ8 Suisun Marsh 1983 Spring 1983-03-16 10.5
## 9 Temperature Suisun SU1 Suisun Marsh 1983 Spring 1983-03-29 17
## 10 Temperature Suisun MZ1 Suisun Marsh 1983 Spring 1983-04-14 13
## 11 Temperature Suisun MZ7 Suisun Marsh 1983 Spring 1983-04-14 13
## 12 Temperature Suisun SU3 Suisun Marsh 1983 Spring 1983-04-14 13
## 13 Temperature Suisun SU4 Suisun Marsh 1983 Spring 1983-04-14 11
## 14 Temperature Suisun SU1 Suisun Marsh 1983 Spring 1983-04-15 13.2
## 15 Temperature Suisun SU2 Suisun Marsh 1983 Spring 1983-04-15 14.8
## 16 Temperature EMP S42 Suisun Marsh 1983 Spring 1983-05-03 2
## 17 Temperature Suisun MZ1 Suisun Marsh 1983 Spring 1983-05-18 18
## 18 Temperature Suisun MZ2 Suisun Marsh 1983 Spring 1983-05-18 17.5
## 19 Temperature Suisun MZ7 Suisun Marsh 1983 Spring 1983-05-18 17.2
## 20 Temperature Suisun MZ8 Suisun Marsh 1983 Spring 1983-05-18 20.8
## 21 Temperature Suisun SU3 Suisun Marsh 1983 Spring 1983-05-18 19.5
## 22 Temperature Suisun SU4 Suisun Marsh 1983 Spring 1983-05-18 22
## 23 Temperature Suisun SU1 Suisun Marsh 1983 Spring 1983-05-19 19
## 24 Temperature Suisun SU2 Suisun Marsh 1983 Spring 1983-05-19 19
# Maximum Temperature values
df_wq_meas_c3_view %>%
filter(Parameter == "Temperature") %>%
slice_max(Result, n = 10)
## # A tibble: 10 × 7
## Parameter Station SubRegion YearAdj Season Date Result
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl>
## 1 Temperature USGS_CAWSC USGS-11447… Middle S… 2017 Spring 2017-03-23 116
## 2 Temperature SDO lt41 San Joaq… 2006 Summer 2006-07-24 29.7
## 3 Temperature STN 912 San Joaq… 1984 Summer 1984-07-17 29.4
## 4 Temperature SDO lt43 San Joaq… 2006 Summer 2006-07-24 29.3
## 5 Temperature SDO tb San Joaq… 2006 Summer 2006-07-24 29.2
## 6 Temperature FMWT 501 Mid Suis… 2002 Fall 2002-09-11 29
## 7 Temperature STN 912 San Joaq… 2014 Summer 2014-06-30 28.9
## 8 Temperature DJFMP SJ054M Upper Sa… 2013 Summer 2013-07-03 28.9
## 9 Temperature STN 910 San Joaq… 1984 Summer 1984-07-17 28.9
## 10 Temperature NCRO Holland Cut at H… Old River 2021 Summer 2021-07-14 28.8
# Minimum Salinity values
df_wq_meas_c3_view %>%
filter(Parameter == "Salinity") %>%
slice_min(Result, n = 10)
## # A tibble: 10 × 7
## Parameter Station SubRegion YearAdj Season Date Result
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl>
## 1 Salinity USGS_SFBS 651 Lower Sacramento R… 1980 Summer 1980-08-05 0
## 2 Salinity USGS_SFBS 652 Lower Sacramento R… 1978 Fall 1978-11-09 0
## 3 Salinity USGS_SFBS 653 Sacramento River n… 1980 Summer 1980-08-05 0
## 4 Salinity USGS_SFBS 655 Sacramento River n… 1980 Summer 1980-08-05 0
## 5 Salinity USGS_SFBS 657 Sacramento River n… 1979 Summer 1979-08-14 0
## 6 Salinity USGS_SFBS 657 Sacramento River n… 1980 Summer 1980-06-18 0
## 7 Salinity USGS_SFBS 657 Sacramento River n… 1980 Summer 1980-07-17 0
## 8 Salinity 20mm 520 Confluence 2020 Spring 2020-03-16 0.00114
## 9 Salinity Suisun SU4 Suisun Marsh 1982 Spring 1982-04-13 0.00452
## 10 Salinity Suisun MZ1 Suisun Marsh 1982 Spring 1982-04-13 0.00510
# Maximum Salinity values
df_wq_meas_c3_view %>%
filter(Parameter == "Salinity") %>%
slice_max(Result, n = 10)
## # A tibble: 10 × 7
## Parameter Station SubRegion YearAdj Season Date Result
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl>
## 1 Salinity STN 520 Confluence 1991 Summer 1991-07-05 44.1
## 2 Salinity EMP D12 Lower San Joaquin Riv… 1999 Summer 1999-07-12 25.5
## 3 Salinity EMP D6 West Suisun Bay 1978 Winter 1977-12-07 24.1
## 4 Salinity EMP D6 West Suisun Bay 1977 Fall 1977-10-21 24.0
## 5 Salinity FMWT 410 West Suisun Bay 1977 Fall 1977-11-11 23.9
## 6 Salinity FMWT 409 West Suisun Bay 1977 Fall 1977-11-10 23.2
## 7 Salinity EMP D6 West Suisun Bay 1977 Fall 1977-11-17 22.8
## 8 Salinity USGS_SFBS 7 West Suisun Bay 2014 Winter 2013-12-03 22.7
## 9 Salinity EMP D6 West Suisun Bay 1977 Summer 1977-06-29 22.6
## 10 Salinity EMP D6 West Suisun Bay 2014 Fall 2014-10-07 22.4
# The maximum salinity value at STN 520 in the Confluence SubRegion looks suspicious.
# Let's look at all the data in that SubRegion in Summer 1991
df_wq_meas_c3_view %>%
filter(
Parameter == "Salinity",
SubRegion == "Confluence",
YearAdj == 1991,
Season == "Summer"
) %>%
arrange(Date) %>%
print(n = 30)
## # A tibble: 28 × 7
## Parameter Station SubRegion YearAdj Season Date Result
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl>
## 1 Salinity USGS_SFBS 2 Confluence 1991 Summer 1991-06-05 2.16
## 2 Salinity USGS_SFBS 3 Confluence 1991 Summer 1991-06-05 2.6
## 3 Salinity EMP D10 Confluence 1991 Summer 1991-06-07 4.72
## 4 Salinity Baystudy 535 Confluence 1991 Summer 1991-06-11 4.67
## 5 Salinity STN 513 Confluence 1991 Summer 1991-06-19 3.74
## 6 Salinity STN 801 Confluence 1991 Summer 1991-06-19 1.98
## 7 Salinity EMP D10 Confluence 1991 Summer 1991-06-21 6.60
## 8 Salinity STN 508 Confluence 1991 Summer 1991-06-21 4.20
## 9 Salinity STN 520 Confluence 1991 Summer 1991-06-21 2.86
## 10 Salinity STN 513 Confluence 1991 Summer 1991-07-03 5.66
## 11 Salinity STN 801 Confluence 1991 Summer 1991-07-03 3.00
## 12 Salinity STN 508 Confluence 1991 Summer 1991-07-05 6.21
## 13 Salinity STN 520 Confluence 1991 Summer 1991-07-05 44.1
## 14 Salinity Baystudy 535 Confluence 1991 Summer 1991-07-09 3.43
## 15 Salinity EMP D10 Confluence 1991 Summer 1991-07-09 7.10
## 16 Salinity STN 513 Confluence 1991 Summer 1991-07-17 4.48
## 17 Salinity STN 801 Confluence 1991 Summer 1991-07-17 1.63
## 18 Salinity STN 508 Confluence 1991 Summer 1991-07-19 5.01
## 19 Salinity STN 520 Confluence 1991 Summer 1991-07-19 3.80
## 20 Salinity STN 801 Confluence 1991 Summer 1991-07-31 4.34
## 21 Salinity USGS_SFBS 2 Confluence 1991 Summer 1991-08-01 2.18
## 22 Salinity USGS_SFBS 3 Confluence 1991 Summer 1991-08-01 3.21
## 23 Salinity STN 508 Confluence 1991 Summer 1991-08-02 5.44
## 24 Salinity STN 513 Confluence 1991 Summer 1991-08-02 4.75
## 25 Salinity STN 520 Confluence 1991 Summer 1991-08-02 4.81
## 26 Salinity Baystudy 535 Confluence 1991 Summer 1991-08-06 4.33
## 27 Salinity EMP D10 Confluence 1991 Summer 1991-08-06 6.85
## 28 Salinity EMP D10 Confluence 1991 Summer 1991-08-23 6.05
# Minimum Secchi depth values
df_wq_meas_c3_view %>%
filter(Parameter == "Secchi") %>%
slice_min(Result, n = 20)
## # A tibble: 20 × 7
## Parameter Station SubRegion YearAdj Season Date Result
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl>
## 1 Secchi FMWT 073 Middle Sacramento Ri… 1995 Winter 1995-01-12 0
## 2 Secchi FMWT 601 Mid Suisun Bay 1999 Fall 1999-09-09 0
## 3 Secchi FMWT 602 Grizzly Bay 1999 Fall 1999-09-09 0
## 4 Secchi FMWT 604 Grizzly Bay 1999 Fall 1999-09-09 0
## 5 Secchi FMWT 704 Lower Sacramento Riv… 1995 Spring 1995-03-13 0
## 6 Secchi FMWT 705 Lower Sacramento Riv… 1995 Spring 1995-03-13 0
## 7 Secchi FMWT 706 Sacramento River nea… 1995 Spring 1995-03-13 0
## 8 Secchi FMWT 707 Sacramento River nea… 1995 Spring 1995-03-13 0
## 9 Secchi FMWT 708 Sacramento River nea… 1995 Spring 1995-03-13 0
## 10 Secchi FMWT 709 Sacramento River nea… 1995 Spring 1995-03-13 0
## 11 Secchi FMWT 710 Sacramento River nea… 1995 Spring 1995-03-13 0
## 12 Secchi FMWT 711 Sacramento River nea… 1995 Spring 1995-03-13 0
## 13 Secchi FMWT 735 Middle Sacramento Ri… 1995 Winter 1995-01-12 0
## 14 Secchi FMWT 736 Middle Sacramento Ri… 1995 Winter 1995-01-12 0
## 15 Secchi FMWT 919 Lower Mokelumne River 1995 Winter 1995-01-13 0
## 16 Secchi FMWT 920 Lower Mokelumne River 1995 Winter 1995-01-13 0
## 17 Secchi SKT 801 Confluence 2005 Spring 2005-04-20 0
## 18 Secchi Suisun MZ1 Suisun Marsh 1995 Winter 1995-01-21 3
## 19 Secchi DJFMP SB018M Confluence 1995 Winter 1995-01-14 4.00
## 20 Secchi DJFMP SB018N Confluence 1995 Winter 1995-01-14 4.00
The following values are obviously out of range of reasonable limits for the parameter and will be excluded from the data set:
df_wq_meas_c4 <- df_wq_meas_c3 %>%
filter(
!(Parameter == "Temperature" & Result <= 2),
!(Parameter == "Temperature" & Result > 30),
!(Parameter == "Salinity" & Result <= 0),
!(Parameter == "Salinity" & Result > 30)
)
Next, we’ll look for outliers by using a Z-score test flagging data points that are more than 15 SDs away from the mean of each subregion.
df_wq_meas_flag <- df_wq_meas_c4 %>%
group_by(Parameter, SubRegion) %>%
flag_zscore(threshold = 15) %>%
ungroup()
# View flagged data points
df_wq_meas_flag_view <- df_wq_meas_flag %>% select(!any_of(vars_rm_view))
df_wq_meas_flag_view %>% filter(Zscore_flag)
## # A tibble: 11 × 9
## Parameter Station SubRegion YearAdj Season Date Result Zscore
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl> <dbl>
## 1 Salinity EMP D12 Lower San Joaqui… 1999 Summer 1999-07-12 25.5 21.3
## 2 Salinity EMP D22 Sacramento River… 2001 Winter 2000-12-06 22.4 35.8
## 3 Salinity FMWT 710 Sacramento River… 2001 Winter 2000-12-07 11.0 17.4
## 4 Salinity FMWT 711 Sacramento River… 2001 Winter 2000-12-07 10.7 16.9
## 5 Salinity FMWT 735 Middle Sacrament… 2006 Fall 2006-11-16 0.609 18.2
## 6 Salinity FMWT 908 San Joaquin Rive… 1995 Spring 1995-03-14 1.78 19.0
## 7 Salinity FMWT 919 Lower Mokelumne … 2001 Fall 2001-11-15 1.06 16.8
## 8 Salinity FMWT 920 Lower Mokelumne … 2001 Fall 2001-11-15 0.973 15.3
## 9 Salinity FMWT 923 Lower Mokelumne … 2001 Fall 2001-11-15 1.17 18.7
## 10 Salinity STN 918 Victoria Canal 2008 Summer 2008-07-28 2.51 21.1
## 11 Secchi FMWT 518 Honker Bay 2020 Winter 2019-12-02 360 16.4
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Lower San Joaquin River in Summer 1999
df_wq_meas_flag_view %>%
filter(
Parameter == "Salinity",
SubRegion == "Lower San Joaquin River",
YearAdj == 1999,
Season == "Summer"
) %>%
arrange(Date)
## # A tibble: 20 × 9
## Parameter Station SubRegion YearAdj Season Date Result Zscore
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl> <dbl>
## 1 Salinity 20mm 804 Lower San Jo… 1999 Summer 1999-06-02 0.145 0.668
## 2 Salinity Baystudy 837 Lower San Jo… 1999 Summer 1999-06-07 0.135 0.677
## 3 Salinity Baystudy 853 Lower San Jo… 1999 Summer 1999-06-07 0.135 0.677
## 4 Salinity 20mm 804 Lower San Jo… 1999 Summer 1999-06-09 0.0951 0.711
## 5 Salinity EMP D12 Lower San Jo… 1999 Summer 1999-06-10 0.0821 0.722
## 6 Salinity 20mm 804 Lower San Jo… 1999 Summer 1999-06-14 0.0865 0.719
## 7 Salinity 20mm 804 Lower San Jo… 1999 Summer 1999-06-26 0.102 0.705
## 8 Salinity Baystudy 837 Lower San Jo… 1999 Summer 1999-07-06 0.190 0.629
## 9 Salinity Baystudy 853 Lower San Jo… 1999 Summer 1999-07-06 0.180 0.637
## 10 Salinity 20mm 804 Lower San Jo… 1999 Summer 1999-07-08 0.227 0.597
## 11 Salinity STN 804 Lower San Jo… 1999 Summer 1999-07-08 0.153 0.661
## 12 Salinity EMP D12 Lower San Jo… 1999 Summer 1999-07-12 25.5 21.3
## 13 Salinity STN 804 Lower San Jo… 1999 Summer 1999-07-20 0.348 0.492
## 14 Salinity 20mm 804 Lower San Jo… 1999 Summer 1999-07-24 0.253 0.575
## 15 Salinity STN 804 Lower San Jo… 1999 Summer 1999-08-03 0.230 0.594
## 16 Salinity EMP D12 Lower San Jo… 1999 Summer 1999-08-09 0.269 0.561
## 17 Salinity Baystudy 837 Lower San Jo… 1999 Summer 1999-08-12 0.171 0.645
## 18 Salinity Baystudy 853 Lower San Jo… 1999 Summer 1999-08-12 0.162 0.653
## 19 Salinity Baystudy 837 Lower San Jo… 1999 Summer 1999-08-30 0.781 0.117
## 20 Salinity Baystudy 853 Lower San Jo… 1999 Summer 1999-08-30 0.358 0.483
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Sacramento River near Rio Vista from Nov 2000 - Feb 2001
df_wq_meas_flag_view %>%
filter(
Parameter == "Salinity",
SubRegion == "Sacramento River near Rio Vista",
Date >= "2000-11-01" & Date <= "2001-02-28"
) %>%
arrange(Date) %>%
print(n = 50)
## # A tibble: 48 × 9
## Parameter Station SubRegion YearAdj Season Date Result Zscore
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl> <dbl>
## 1 Salinity EMP D22 Sacramento… 2000 Fall 2000-11-06 1.11 1.35e+0
## 2 Salinity USGS_SFBS 657 Sacramento… 2000 Fall 2000-11-07 0.09 3.06e-1
## 3 Salinity Baystudy 751 Sacramento… 2000 Fall 2000-11-13 0.443 2.66e-1
## 4 Salinity Baystudy 752 Sacramento… 2000 Fall 2000-11-13 0.501 3.58e-1
## 5 Salinity Baystudy 760 Sacramento… 2000 Fall 2000-11-13 0.153 2.04e-1
## 6 Salinity Baystudy 761 Sacramento… 2000 Fall 2000-11-13 0.144 2.19e-1
## 7 Salinity FMWT 706 Sacramento… 2000 Fall 2000-11-14 0.802 8.45e-1
## 8 Salinity FMWT 707 Sacramento… 2000 Fall 2000-11-14 0.589 5.02e-1
## 9 Salinity FMWT 708 Sacramento… 2000 Fall 2000-11-14 0.259 3.31e-2
## 10 Salinity FMWT 709 Sacramento… 2000 Fall 2000-11-14 0.104 2.84e-1
## 11 Salinity FMWT 710 Sacramento… 2000 Fall 2000-11-14 0.0860 3.13e-1
## 12 Salinity FMWT 711 Sacramento… 2000 Fall 2000-11-14 0.0749 3.30e-1
## 13 Salinity EMP D22 Sacramento… 2001 Winter 2000-12-06 22.4 3.58e+1
## 14 Salinity Baystudy 751 Sacramento… 2001 Winter 2000-12-07 1.19 1.48e+0
## 15 Salinity Baystudy 752 Sacramento… 2001 Winter 2000-12-07 0.945 1.08e+0
## 16 Salinity Baystudy 760 Sacramento… 2001 Winter 2000-12-07 0.245 5.47e-2
## 17 Salinity Baystudy 761 Sacramento… 2001 Winter 2000-12-07 0.158 1.96e-1
## 18 Salinity FMWT 706 Sacramento… 2001 Winter 2000-12-07 0.505 3.66e-1
## 19 Salinity FMWT 707 Sacramento… 2001 Winter 2000-12-07 0.460 2.93e-1
## 20 Salinity FMWT 708 Sacramento… 2001 Winter 2000-12-07 0.265 2.26e-2
## 21 Salinity FMWT 709 Sacramento… 2001 Winter 2000-12-07 0.135 2.33e-1
## 22 Salinity FMWT 710 Sacramento… 2001 Winter 2000-12-07 11.0 1.74e+1
## 23 Salinity FMWT 711 Sacramento… 2001 Winter 2000-12-07 10.7 1.69e+1
## 24 Salinity USGS_SFBS 657 Sacramento… 2001 Winter 2000-12-12 0.34 9.84e-2
## 25 Salinity Baystudy 760 Sacramento… 2001 Winter 2001-01-04 0.236 6.91e-2
## 26 Salinity Baystudy 761 Sacramento… 2001 Winter 2001-01-04 0.153 2.04e-1
## 27 Salinity EMP D22 Sacramento… 2001 Winter 2001-01-05 2.83 4.13e+0
## 28 Salinity FMWT 706 Sacramento… 2001 Winter 2001-01-05 1.54 2.04e+0
## 29 Salinity FMWT 707 Sacramento… 2001 Winter 2001-01-05 0.928 1.05e+0
## 30 Salinity FMWT 708 Sacramento… 2001 Winter 2001-01-05 1.03 1.21e+0
## 31 Salinity FMWT 709 Sacramento… 2001 Winter 2001-01-05 0.454 2.82e-1
## 32 Salinity FMWT 710 Sacramento… 2001 Winter 2001-01-05 0.205 1.19e-1
## 33 Salinity FMWT 711 Sacramento… 2001 Winter 2001-01-05 0.0941 2.99e-1
## 34 Salinity Baystudy 751 Sacramento… 2001 Winter 2001-01-08 0.771 7.95e-1
## 35 Salinity Baystudy 752 Sacramento… 2001 Winter 2001-01-08 1.20 1.49e+0
## 36 Salinity Baystudy 760 Sacramento… 2001 Winter 2001-01-31 0.135 2.34e-1
## 37 Salinity Baystudy 761 Sacramento… 2001 Winter 2001-01-31 0.125 2.49e-1
## 38 Salinity Baystudy 751 Sacramento… 2001 Winter 2001-02-01 0.144 2.19e-1
## 39 Salinity Baystudy 752 Sacramento… 2001 Winter 2001-02-01 0.153 2.04e-1
## 40 Salinity EMP D22 Sacramento… 2001 Winter 2001-02-05 0.281 3.12e-3
## 41 Salinity USGS_SFBS 657 Sacramento… 2001 Winter 2001-02-06 0.13 2.41e-1
## 42 Salinity FMWT 706 Sacramento… 2001 Winter 2001-02-20 0.184 1.53e-1
## 43 Salinity FMWT 707 Sacramento… 2001 Winter 2001-02-20 0.121 2.56e-1
## 44 Salinity FMWT 708 Sacramento… 2001 Winter 2001-02-20 0.114 2.67e-1
## 45 Salinity FMWT 709 Sacramento… 2001 Winter 2001-02-20 0.112 2.70e-1
## 46 Salinity FMWT 710 Sacramento… 2001 Winter 2001-02-20 0.116 2.63e-1
## 47 Salinity FMWT 711 Sacramento… 2001 Winter 2001-02-20 0.114 2.67e-1
## 48 Salinity USGS_SFBS 657 Sacramento… 2001 Winter 2001-02-26 0.11 2.74e-1
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Middle Sacramento River from Aug-Dec 2006
df_wq_meas_flag_view %>%
filter(
Parameter == "Salinity",
SubRegion == "Middle Sacramento River",
Date >= "2006-08-01" & Date <= "2006-12-31"
) %>%
arrange(Date)
## # A tibble: 19 × 9
## Parameter Station SubRegion YearAdj Season Date Result Zscore
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl> <dbl>
## 1 Salinity USGS_CAWSC USGS… Middle S… 2006 Summer 2006-08-01 0.0663 0.313
## 2 Salinity EMP C3A Middle S… 2006 Summer 2006-08-14 0.0677 0.267
## 3 Salinity EMP C3A Middle S… 2006 Fall 2006-09-13 0.0807 0.178
## 4 Salinity USGS_CAWSC USGS… Middle S… 2006 Fall 2006-09-19 0.0740 0.0511
## 5 Salinity EMP C3A Middle S… 2006 Fall 2006-10-06 0.0639 0.395
## 6 Salinity USGS_CAWSC USGS… Middle S… 2006 Fall 2006-10-12 0.0577 0.607
## 7 Salinity FMWT 073 Middle S… 2006 Fall 2006-10-19 0.0682 0.247
## 8 Salinity FMWT 735 Middle S… 2006 Fall 2006-10-19 0.0716 0.133
## 9 Salinity EMP C3A Middle S… 2006 Fall 2006-11-07 0.0778 0.0799
## 10 Salinity FMWT 073 Middle S… 2006 Fall 2006-11-16 0.0869 0.391
## 11 Salinity FMWT 735 Middle S… 2006 Fall 2006-11-16 0.609 18.2
## 12 Salinity FMWT 736 Middle S… 2006 Fall 2006-11-16 0.0821 0.227
## 13 Salinity USGS_CAWSC USGS… Middle S… 2006 Fall 2006-11-20 0.0802 0.162
## 14 Salinity EMP C3A Middle S… 2007 Winter 2006-12-06 0.0817 0.212
## 15 Salinity FMWT 073 Middle S… 2007 Winter 2006-12-15 0.0826 0.244
## 16 Salinity FMWT 735 Middle S… 2007 Winter 2006-12-15 0.0884 0.440
## 17 Salinity FMWT 736 Middle S… 2007 Winter 2006-12-15 0.0869 0.391
## 18 Salinity USGS_CAWSC USGS… Middle S… 2007 Winter 2006-12-18 0.0697 0.198
## 19 Salinity USGS_CAWSC USGS… Middle S… 2007 Winter 2006-12-27 0.0817 0.211
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in San Joaquin River at Prisoners Pt from Feb-May 1995
df_wq_meas_flag_view %>%
filter(
Parameter == "Salinity",
SubRegion == "San Joaquin River at Prisoners Pt",
Date >= "1995-02-01" & Date <= "1995-05-31"
) %>%
arrange(Date) %>%
print(n = 40)
## # A tibble: 36 × 9
## Parameter Station SubRegion YearAdj Season Date Result Zscore
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl> <dbl>
## 1 Salinity EMP D26 San Joaquin … 1995 Winter 1995-02-07 0.0726 0.858
## 2 Salinity Baystudy 864 San Joaquin … 1995 Winter 1995-02-08 0.135 0.137
## 3 Salinity Baystudy 865 San Joaquin … 1995 Winter 1995-02-08 0.135 0.137
## 4 Salinity FMWT 814 San Joaquin … 1995 Winter 1995-02-08 0.558 4.78
## 5 Salinity FMWT 815 San Joaquin … 1995 Winter 1995-02-08 0.568 4.90
## 6 Salinity FMWT 904 San Joaquin … 1995 Winter 1995-02-08 0.554 4.73
## 7 Salinity FMWT 905 San Joaquin … 1995 Winter 1995-02-08 0.621 5.52
## 8 Salinity FMWT 906 San Joaquin … 1995 Winter 1995-02-16 1.14 11.6
## 9 Salinity FMWT 908 San Joaquin … 1995 Winter 1995-02-16 0.973 9.61
## 10 Salinity Baystudy 864 San Joaquin … 1995 Spring 1995-03-06 0.121 0.294
## 11 Salinity Baystudy 865 San Joaquin … 1995 Spring 1995-03-06 0.153 0.0773
## 12 Salinity FMWT 814 San Joaquin … 1995 Spring 1995-03-08 0.689 6.30
## 13 Salinity FMWT 815 San Joaquin … 1995 Spring 1995-03-08 0.671 6.09
## 14 Salinity FMWT 904 San Joaquin … 1995 Spring 1995-03-08 0.680 6.19
## 15 Salinity FMWT 905 San Joaquin … 1995 Spring 1995-03-08 0.762 7.15
## 16 Salinity FMWT 906 San Joaquin … 1995 Spring 1995-03-14 1.32 13.7
## 17 Salinity FMWT 908 San Joaquin … 1995 Spring 1995-03-14 1.78 19.0
## 18 Salinity EMP D26 San Joaquin … 1995 Spring 1995-03-23 0.137 0.114
## 19 Salinity Baystudy 864 San Joaquin … 1995 Spring 1995-04-03 0.162 0.185
## 20 Salinity Baystudy 865 San Joaquin … 1995 Spring 1995-04-03 0.162 0.185
## 21 Salinity FMWT 814 San Joaquin … 1995 Spring 1995-04-05 0.225 0.914
## 22 Salinity FMWT 815 San Joaquin … 1995 Spring 1995-04-05 0.227 0.931
## 23 Salinity FMWT 904 San Joaquin … 1995 Spring 1995-04-05 0.245 1.15
## 24 Salinity FMWT 905 San Joaquin … 1995 Spring 1995-04-05 0.277 1.52
## 25 Salinity FMWT 906 San Joaquin … 1995 Spring 1995-04-05 0.253 1.24
## 26 Salinity FMWT 908 San Joaquin … 1995 Spring 1995-04-05 0.278 1.53
## 27 Salinity EMP D26 San Joaquin … 1995 Spring 1995-04-20 0.0754 0.825
## 28 Salinity 20mm 815 San Joaquin … 1995 Spring 1995-04-25 0.0582 1.02
## 29 Salinity 20mm 906 San Joaquin … 1995 Spring 1995-04-25 0.0965 0.579
## 30 Salinity Baystudy 864 San Joaquin … 1995 Spring 1995-05-01 0.135 0.137
## 31 Salinity Baystudy 865 San Joaquin … 1995 Spring 1995-05-01 0.135 0.137
## 32 Salinity EMP D26 San Joaquin … 1995 Spring 1995-05-08 0.0793 0.780
## 33 Salinity 20mm 815 San Joaquin … 1995 Spring 1995-05-09 0.0606 0.997
## 34 Salinity 20mm 906 San Joaquin … 1995 Spring 1995-05-09 0.0606 0.997
## 35 Salinity 20mm 815 San Joaquin … 1995 Spring 1995-05-23 0.0687 0.902
## 36 Salinity 20mm 906 San Joaquin … 1995 Spring 1995-05-23 0.0702 0.886
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Lower Mokelumne River from Aug-Dec 2001
df_wq_meas_flag_view %>%
filter(
Parameter == "Salinity",
SubRegion == "Lower Mokelumne River",
Date >= "2001-08-01" & Date <= "2001-12-31"
) %>%
arrange(Date)
## # A tibble: 11 × 9
## Parameter Station SubRegion YearAdj Season Date Result Zscore
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl> <dbl>
## 1 Salinity FMWT 903 Lower Mokelumne R… 2001 Fall 2001-09-11 0.109 0.375
## 2 Salinity FMWT 919 Lower Mokelumne R… 2001 Fall 2001-09-11 0.115 0.492
## 3 Salinity FMWT 920 Lower Mokelumne R… 2001 Fall 2001-09-11 0.110 0.392
## 4 Salinity FMWT 903 Lower Mokelumne R… 2001 Fall 2001-11-14 0.107 0.342
## 5 Salinity FMWT 919 Lower Mokelumne R… 2001 Fall 2001-11-15 1.06 16.8
## 6 Salinity FMWT 920 Lower Mokelumne R… 2001 Fall 2001-11-15 0.973 15.3
## 7 Salinity FMWT 923 Lower Mokelumne R… 2001 Fall 2001-11-15 1.17 18.7
## 8 Salinity FMWT 903 Lower Mokelumne R… 2002 Winter 2001-12-12 0.0965 0.167
## 9 Salinity FMWT 919 Lower Mokelumne R… 2002 Winter 2001-12-12 0.105 0.308
## 10 Salinity FMWT 920 Lower Mokelumne R… 2002 Winter 2001-12-12 0.146 1.02
## 11 Salinity FMWT 923 Lower Mokelumne R… 2002 Winter 2001-12-12 0.0970 0.175
## # ℹ 1 more variable: Zscore_flag <lgl>
# Salinity in Victoria Canal in Summer 2008
df_wq_meas_flag_view %>%
filter(
Parameter == "Salinity",
SubRegion == "Victoria Canal",
YearAdj == 2008,
Season == "Summer"
) %>%
arrange(Date)
## # A tibble: 16 × 9
## Parameter Station SubRegion YearAdj Season Date Result Zscore
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl> <dbl>
## 1 Salinity STN 918 Victoria… 2008 Summer 2008-06-02 0.221 0.295
## 2 Salinity NCRO Victoria C… Victoria… 2008 Summer 2008-06-06 0.202 0.125
## 3 Salinity 20mm 918 Victoria… 2008 Summer 2008-06-09 0.250 0.560
## 4 Salinity STN 918 Victoria… 2008 Summer 2008-06-16 0.268 0.723
## 5 Salinity NCRO Middle Riv… Victoria… 2008 Summer 2008-06-20 0.178 0.0920
## 6 Salinity 20mm 918 Victoria… 2008 Summer 2008-06-23 0.280 0.836
## 7 Salinity NCRO Victoria C… Victoria… 2008 Summer 2008-06-27 0.169 0.174
## 8 Salinity STN 918 Victoria… 2008 Summer 2008-06-30 0.309 1.10
## 9 Salinity NCRO Middle Riv… Victoria… 2008 Summer 2008-07-03 0.150 0.352
## 10 Salinity 20mm 918 Victoria… 2008 Summer 2008-07-07 0.163 0.232
## 11 Salinity NCRO Victoria C… Victoria… 2008 Summer 2008-07-18 0.123 0.594
## 12 Salinity NCRO Middle Riv… Victoria… 2008 Summer 2008-07-23 0.124 0.582
## 13 Salinity STN 918 Victoria… 2008 Summer 2008-07-28 2.51 21.1
## 14 Salinity NCRO Victoria C… Victoria… 2008 Summer 2008-08-08 0.146 0.382
## 15 Salinity STN 918 Victoria… 2008 Summer 2008-08-11 0.244 0.506
## 16 Salinity NCRO Middle Riv… Victoria… 2008 Summer 2008-08-15 0.146 0.384
## # ℹ 1 more variable: Zscore_flag <lgl>
# Secchi depth in Honker Bay from Oct 2019 - Feb 2020
df_wq_meas_flag_view %>%
filter(
Parameter == "Secchi",
SubRegion == "Honker Bay",
Date >= "2019-10-01" & Date <= "2020-02-28"
) %>%
arrange(Date)
## # A tibble: 17 × 9
## Parameter Station SubRegion YearAdj Season Date Result Zscore
## <chr> <chr> <chr> <dbl> <chr> <date> <dbl> <dbl>
## 1 Secchi FMWT 505 Honker Bay 2019 Fall 2019-10-07 70 1.49
## 2 Secchi Baystudy 534 Honker Bay 2019 Fall 2019-10-08 40 0.0530
## 3 Secchi Baystudy 534 Honker Bay 2019 Fall 2019-11-05 50 0.461
## 4 Secchi FMWT 507 Honker Bay 2019 Fall 2019-11-08 23 0.927
## 5 Secchi FMWT 518 Honker Bay 2019 Fall 2019-11-08 30 0.567
## 6 Secchi FMWT 519 Honker Bay 2019 Fall 2019-11-08 34 0.361
## 7 Secchi FMWT 505 Honker Bay 2019 Fall 2019-11-21 67 1.33
## 8 Secchi FMWT 507 Honker Bay 2020 Winter 2019-12-02 57 0.821
## 9 Secchi FMWT 518 Honker Bay 2020 Winter 2019-12-02 360 16.4
## 10 Secchi FMWT 519 Honker Bay 2020 Winter 2019-12-02 46 0.255
## 11 Secchi Baystudy 534 Honker Bay 2020 Winter 2019-12-03 50 0.461
## 12 Secchi FMWT 505 Honker Bay 2020 Winter 2019-12-10 40 0.0530
## 13 Secchi SKT 519 Honker Bay 2020 Winter 2019-12-17 42 0.0498
## 14 Secchi Baystudy 534 Honker Bay 2020 Winter 2020-01-13 30 0.567
## 15 Secchi SKT 519 Honker Bay 2020 Winter 2020-01-14 44 0.153
## 16 Secchi SKT 519 Honker Bay 2020 Winter 2020-02-14 34 0.361
## 17 Secchi Baystudy 534 Honker Bay 2020 Winter 2020-02-24 30 0.567
## # ℹ 1 more variable: Zscore_flag <lgl>
After inspecting the data flagged by the Z-score test, a few of the values appear to be valid based on best professional judgment:
We will exclude the remaining values flagged by the Z-score test.
# Un-flag the values listed above and remove the remaining flagged data points
# from the data set
df_wq_meas_c5 <- df_wq_meas_flag %>%
mutate(
Zscore_flag = case_when(
Parameter == "Salinity" & Station == "FMWT 908" & Date == "1995-03-14" ~ FALSE,
Parameter == "Salinity" & Station %in% c("FMWT 919", "FMWT 920", "FMWT 923") & Date == "2001-11-15" ~ FALSE,
TRUE ~ Zscore_flag
)
) %>%
filter(!Zscore_flag) %>%
select(!starts_with("Zscore"))
Finally, we’ll calculate seasonal-regional averages for each adjusted water year for each water quality measurement parameter.
# Add regions and keep raw WQ measurement data as an object to export
raw_wq_meas <- df_wq_meas_c5 %>%
left_join(df_regions, by = join_by(SubRegion)) %>%
relocate(Region, .before = SubRegion) %>%
relocate(Parameter, .before = Result)
# Calculate seasonal-regional averages
df_wq_meas_avg <- raw_wq_meas %>%
nest(.by = Parameter, .key = "df_data") %>%
mutate(df_data = map(df_data, calc_seas_reg_avg)) %>%
unnest(df_data) %>%
pivot_wider(names_from = Parameter, values_from = Result)
# Make sure each Year-Season-Region combination is represented and add Year Type info
lt_avg_wq_meas <- reduce(list(df_yr_seas_reg, df_yr_type, df_wq_meas_avg), left_join)
Now we’ll process the nutrient data: Dissolved Ammonia, Dissolved Nitrate + Nitrite, and Dissolved Ortho-phosphate. First, we’ll clean up a few issues with the Reporting Limits.
# The EMP data set has a few non-detect values without reporting limits - we'll
# fill in 0.01 for the reporting limits for these values for now as suggested by
# Sarah Perry.
df_nutr_c1 <- df_dwq_lt_filt %>%
select(-c(Temperature, Salinity, Secchi, starts_with("Chlorophyll"))) %>%
mutate(
DissAmmonia = if_else(DissAmmonia_Sign == "<" & is.na(DissAmmonia), 0.01, DissAmmonia),
DissNitrateNitrite = if_else(DissNitrateNitrite_Sign == "<" & is.na(DissNitrateNitrite), 0.01, DissNitrateNitrite),
DissOrthophos = if_else(DissOrthophos_Sign == "<" & is.na(DissOrthophos), 0.01, DissOrthophos)
) %>%
# Remove records with NA values for all nutrient parameters
filter(!if_all(c(DissAmmonia, DissNitrateNitrite, DissOrthophos), is.na)) %>%
# Fill in "=" for the _Sign variables for the USGS_SFBS data for now since
# they are all NA.
mutate(across(ends_with("_Sign"), ~ if_else(is.na(.x), "=", .x)))
# For the USGS_SFBS survey, if at least one of the nutrient parameters has a
# value reported, then we will assume that the other parameters were below the
# reporting limit for that station and day. We'll use RL values provided by USGS
# for 2006-present. We assumed these were constant throughout the entire
# monitoring program including in years earlier than 2006. RL values are 0.05
# umol/L for dissolved ammonia, dissolved nitrate + nitrite, and dissolved
# ortho-phosphate. Converting these to mg/L while using the molar mass of either
# N or P, we used the following RL values for the USGS_SFBS survey:
# dissolved ammonia and dissolved nitrate + nitrite: 0.05 * 14.0067 / 1000 = 0.0007 mg/L
# dissolved ortho-phosphate: 0.05 * 30.973761 / 1000 = 0.0015 mg/L
df_nutr_sfbs_blw_rl <- df_nutr_c1 %>%
filter(Source == "USGS_SFBS") %>%
filter(if_any(c(DissAmmonia, DissNitrateNitrite, DissOrthophos), is.na)) %>%
mutate(
DissAmmonia_Sign = if_else(is.na(DissAmmonia), "<", DissAmmonia_Sign),
DissAmmonia = if_else(DissAmmonia_Sign == "<", 0.0007, DissAmmonia),
DissNitrateNitrite_Sign = if_else(is.na(DissNitrateNitrite), "<", DissNitrateNitrite_Sign),
DissNitrateNitrite = if_else(DissNitrateNitrite_Sign == "<", 0.0007, DissNitrateNitrite),
DissOrthophos_Sign = if_else(is.na(DissOrthophos), "<", DissOrthophos_Sign),
DissOrthophos = if_else(DissOrthophos_Sign == "<", 0.0015, DissOrthophos)
)
# Add back the USGS_SFBS data
df_nutr_c2 <- df_nutr_c1 %>%
anti_join(df_nutr_sfbs_blw_rl, by = c("Source", "Station", "Datetime")) %>%
bind_rows(df_nutr_sfbs_blw_rl)
Next, we’ll filter the data for each nutrient parameter so there is only one sample collected per day at a station, and we’ll restructure the data for continued processing.
# Create a nested data frame to run processing functions on
ndf_nutr <-
tibble(
Parameter = c(
"DissAmmonia",
"DissNitrateNitrite",
"DissOrthophos"
),
df_data = rep(list(df_nutr_c2), 3)
) %>%
# Prepare data for each Parameter
mutate(
df_data = map2(
df_data,
Parameter,
~ drop_na(.x, all_of(.y)) %>%
select(
Source,
Station,
Latitude,
Longitude,
SubRegion,
YearAdj,
Month,
Season,
Date,
Datetime,
contains(.y)
) %>%
# Filter data so that there is only one sample per station-day
filt_daily_dups()
)
)
# Make sure there is only one sample per station-day for each parameter
map(ndf_nutr$df_data, ~ count(.x, Source, Station, Date) %>% filter(n > 1))
## [[1]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
##
## [[2]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
##
## [[3]]
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
# Unnest the nested data frame into a long format
df_nutr_c3 <- ndf_nutr %>%
mutate(
df_data = map2(
df_data,
Parameter,
~ rename(
.x,
Result = all_of(.y),
Sign = ends_with("_Sign")
)
)
) %>%
unnest(df_data)
Now let’s take a closer look at the temporal data coverage for each Station and parameter.
# Create sampling effort by station plots for each Parameter and Source
ndf_nutr_se_sta_plt <- df_nutr_c3 %>%
nest(.by = c(Parameter, Source), .key = "df_data") %>%
mutate(plt = map(df_data, plot_samp_effort_sta)) %>%
nest(.by = Parameter, .key = "ndf_data_source")
For the USGS-CAWSC survey, only station 11447650 (Sacramento River at Freeport) was sampled on a long-term basis for nutrients, so we’ll only include this station from the USGS-CAWSC survey.
df_nutr_c4 <- df_nutr_c3 %>% filter(!(Source == "USGS_CAWSC" & !str_detect(Station, "USGS-11447650$")))
Not all of the subregions were sampled consistently from 1975-2021. To make sure that we only include the subregions that were sampled adequately, we will require that a subregion needs to have data for at least 75% of the 47 years between 1975 to 2021 (35 years) for each season.
df_nutr_c5 <- df_nutr_c4 %>%
nest(.by = Parameter, .key = "df_data") %>%
mutate(
df_subreg_seas = map(
df_data,
~ distinct(.x, SubRegion, YearAdj, Season) %>%
count(SubRegion, Season, name = "NumYears") %>%
group_by(SubRegion) %>%
filter(min(NumYears) >= num_yrs_threshold) %>%
ungroup() %>%
# make sure each season meets the threshold for each SubRegion
count(SubRegion) %>%
filter(n == 4)
),
df_data_filt = map2(
df_data, df_subreg_seas,
~ filter(.x, SubRegion %in% unique(.y$SubRegion))
)
) %>%
select(Parameter, df_data_filt) %>%
unnest(df_data_filt)
Let’s take a look at the sampling effort for the remaining subregions for each season after filtering for each nutrient parameter.
# Create sampling effort by SubRegion plots for each Parameter
ndf_nutr_se_subreg_plt <- df_nutr_c5 %>%
nest(.by = Parameter, .key = "df_data") %>%
mutate(plt = map(df_data, plot_samp_effort_subreg))
First, we’ll look at the min-max ranges of each of the nutrient parameters, to see if there are any obvious outliers to exclude from the data set.
df_nutr_c5 %>%
summarize(
min_val = min(Result),
max_val = max(Result),
.by = Parameter
)
## # A tibble: 3 × 3
## Parameter min_val max_val
## <chr> <dbl> <dbl>
## 1 DissAmmonia 0 2.94
## 2 DissNitrateNitrite 0.0007 15.2
## 3 DissOrthophos 0.0015 2
Let’s take a closer look at the minimum DissAmmonia values and the maximum values of all nutrient parameters these to see if we should omit them from the data set.
# Truncate data so that it displays better
df_nutr_c5_view <- df_nutr_c5 %>% select(!any_of(vars_rm_view))
# Minimum DissAmmonia values
df_nutr_c5_view %>%
filter(Parameter == "DissAmmonia") %>%
slice_min(Result, n = 10)
## # A tibble: 10 × 8
## Parameter Station SubRegion YearAdj Season Date Sign Result
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl>
## 1 DissAmmonia USGS_CAWSC USG… Middle S… 1980 Winter 1980-01-17 = 0
## 2 DissAmmonia USGS_SFBS 6 Mid Suis… 1988 Fall 1988-10-06 = 1.40e-4
## 3 DissAmmonia USGS_SFBS 6 Mid Suis… 1979 Summer 1979-07-10 = 5.60e-4
## 4 DissAmmonia USGS_SFBS 2 Confluen… 1977 Spring 1977-05-11 < 7 e-4
## 5 DissAmmonia USGS_SFBS 3 Confluen… 1980 Spring 1980-05-22 < 7 e-4
## 6 DissAmmonia USGS_SFBS 6 Mid Suis… 1980 Spring 1980-05-22 < 7 e-4
## 7 DissAmmonia USGS_SFBS 657 Sacramen… 2009 Winter 2009-01-13 < 7 e-4
## 8 DissAmmonia USGS_SFBS 7 West Sui… 1977 Summer 1977-07-11 < 7 e-4
## 9 DissAmmonia USGS_SFBS 6 Mid Suis… 1979 Fall 1979-09-18 = 1.54e-3
## 10 DissAmmonia USGS_SFBS 6 Mid Suis… 1980 Summer 1980-08-19 = 1.68e-3
# Maximum DissAmmonia values
df_nutr_c5_view %>%
filter(Parameter == "DissAmmonia") %>%
slice_max(Result, n = 10)
## # A tibble: 10 × 8
## Parameter Station SubRegion YearAdj Season Date Sign Result
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl>
## 1 DissAmmonia EMP P8 San Joaquin River… 2004 Winter 2004-02-26 = 2.94
## 2 DissAmmonia EMP P8 San Joaquin River… 2000 Winter 1999-12-16 = 2.4
## 3 DissAmmonia EMP P8 San Joaquin River… 2001 Winter 2001-01-03 = 2.4
## 4 DissAmmonia EMP P8 San Joaquin River… 2006 Winter 2005-12-20 = 2.3
## 5 DissAmmonia EMP P8 San Joaquin River… 1991 Spring 1991-03-06 = 2
## 6 DissAmmonia EMP P8 San Joaquin River… 1994 Winter 1994-02-03 = 2
## 7 DissAmmonia EMP P8 San Joaquin River… 2001 Winter 2001-02-01 = 2
## 8 DissAmmonia EMP P8 San Joaquin River… 1991 Spring 1991-03-22 = 1.7
## 9 DissAmmonia EMP P8 San Joaquin River… 2003 Winter 2002-12-10 = 1.7
## 10 DissAmmonia EMP P8 San Joaquin River… 2004 Winter 2004-01-15 = 1.61
# Maximum DissNitrateNitrite values
df_nutr_c5_view %>%
filter(Parameter == "DissNitrateNitrite") %>%
slice_max(Result, n = 10)
## # A tibble: 10 × 8
## Parameter Station SubRegion YearAdj Season Date Sign Result
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl>
## 1 DissNitrateNitrite EMP D28A Old River 2017 Summer 2017-07-14 = 15.2
## 2 DissNitrateNitrite EMP D4 Lower Sac… 2019 Winter 2019-01-10 = 6.07
## 3 DissNitrateNitrite EMP P8 San Joaqu… 2015 Winter 2015-02-12 = 4.57
## 4 DissNitrateNitrite EMP P8 San Joaqu… 2014 Winter 2014-02-06 = 3.94
## 5 DissNitrateNitrite EMP P8 San Joaqu… 2009 Winter 2009-02-18 = 3.7
## 6 DissNitrateNitrite EMP D4 Lower Sac… 2017 Summer 2017-06-14 = 3.68
## 7 DissNitrateNitrite EMP P8 San Joaqu… 2014 Spring 2014-03-07 = 3.68
## 8 DissNitrateNitrite EMP P8 San Joaqu… 2009 Winter 2008-12-04 = 3.6
## 9 DissNitrateNitrite EMP P8 San Joaqu… 2012 Spring 2012-03-15 = 3.6
## 10 DissNitrateNitrite EMP P8 San Joaqu… 2019 Winter 2019-01-15 = 3.6
# Maximum DissOrthophos values
df_nutr_c5_view %>%
filter(Parameter == "DissOrthophos") %>%
slice_max(Result, n = 10)
## # A tibble: 10 × 8
## Parameter Station SubRegion YearAdj Season Date Sign Result
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl>
## 1 DissOrthophos EMP D7 Grizzly Bay 2019 Fall 2019-11-07 < 2
## 2 DissOrthophos EMP P8 San Joaquin Ri… 1989 Spring 1989-03-28 = 0.56
## 3 DissOrthophos EMP P8 San Joaquin Ri… 1986 Winter 1986-02-12 = 0.51
## 4 DissOrthophos EMP P8 San Joaquin Ri… 2013 Fall 2013-09-24 = 0.5
## 5 DissOrthophos EMP P8 San Joaquin Ri… 1991 Spring 1991-04-08 = 0.49
## 6 DissOrthophos EMP P8 San Joaquin Ri… 2004 Winter 2004-02-26 = 0.48
## 7 DissOrthophos EMP P8 San Joaquin Ri… 2015 Winter 2015-02-12 = 0.48
## 8 DissOrthophos EMP P8 San Joaquin Ri… 2020 Fall 2020-09-09 = 0.474
## 9 DissOrthophos EMP MD10 Disappointment… 1979 Winter 1979-01-16 = 0.46
## 10 DissOrthophos EMP P8 San Joaquin Ri… 2020 Fall 2020-10-08 = 0.456
A few of these values look questionable. For now, we will exclude the one DissAmmonia value equal to zero from the data set, and see if the other values are flagged by the modified Z-score test.
df_nutr_c6 <- df_nutr_c5 %>% filter(Result > 0)
There are a few values that are less than the reporting limit with reporting limits that are very high compared to the range of the values for the parameter (> 75th percentile). This includes the highest DissOrthophos value in the data set. We will flag and take a closer look at these values for possible removal from the data set.
df_nutr_high_rl_flag <- df_nutr_c6 %>%
nest(.by = Parameter, .key = "df_data") %>%
mutate(df_data = map(df_data, .f = flag_high_rl, perc_thresh = 0.75)) %>%
unnest(df_data)
# View flagged data points
df_nutr_high_rl_flag %>%
filter(HighRL_flag) %>%
select(!any_of(vars_rm_view)) %>%
print(n = 40)
## # A tibble: 34 × 9
## Parameter Station SubRegion YearAdj Season Date Sign Result
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl>
## 1 DissAmmonia EMP D10 Confluen… 2019 Fall 2019-10-07 < 0.2
## 2 DissAmmonia EMP D19 Franks T… 2019 Fall 2019-10-03 < 0.2
## 3 DissAmmonia EMP D19 Franks T… 2019 Fall 2019-11-04 < 0.2
## 4 DissAmmonia EMP D19 Franks T… 2020 Winter 2020-01-15 < 0.25
## 5 DissAmmonia EMP D19 Franks T… 2020 Spring 2020-03-02 < 0.25
## 6 DissAmmonia EMP D26 San Joaq… 2019 Fall 2019-10-04 < 0.2
## 7 DissAmmonia EMP D26 San Joaq… 2019 Fall 2019-11-05 < 0.2
## 8 DissAmmonia EMP D28A Old River 2019 Fall 2019-10-03 < 0.2
## 9 DissAmmonia EMP D28A Old River 2019 Fall 2019-11-04 < 0.2
## 10 DissAmmonia EMP D28A Old River 2020 Winter 2020-01-15 < 0.25
## 11 DissAmmonia EMP D28A Old River 2020 Spring 2020-03-02 < 0.25
## 12 DissAmmonia EMP D6 West Sui… 2019 Fall 2019-10-07 < 0.2
## 13 DissAmmonia EMP D7 Grizzly … 2019 Fall 2019-11-07 < 0.2
## 14 DissAmmonia EMP D8 Mid Suis… 2019 Fall 2019-10-07 < 0.2
## 15 DissAmmonia EMP D8 Mid Suis… 2019 Fall 2019-11-06 < 0.2
## 16 DissAmmonia EMP EZ2 Confluen… 2019 Fall 2019-10-07 < 0.2
## 17 DissAmmonia EMP EZ2 Lower Sa… 2019 Fall 2019-11-06 < 0.2
## 18 DissAmmonia EMP EZ6 Confluen… 2019 Fall 2019-10-07 < 0.2
## 19 DissAmmonia EMP MD10A Disappoi… 2019 Fall 2019-10-04 < 0.2
## 20 DissAmmonia EMP MD10A Disappoi… 2019 Fall 2019-11-05 < 0.2
## 21 DissAmmonia EMP MD10A Disappoi… 2020 Winter 2020-01-16 < 0.25
## 22 DissAmmonia EMP MD10A Disappoi… 2020 Winter 2020-02-14 < 0.25
## 23 DissAmmonia EMP MD10A Disappoi… 2020 Spring 2020-03-03 < 0.25
## 24 DissAmmonia EMP P8 San Joaq… 2019 Fall 2019-10-04 < 0.2
## 25 DissAmmonia EMP P8 San Joaq… 2019 Fall 2019-11-05 < 0.2
## 26 DissAmmonia EMP P8 San Joaq… 2020 Winter 2020-02-14 < 0.25
## 27 DissNitrateNitrite EMP D10 Confluen… 2019 Fall 2019-10-07 < 0.55
## 28 DissNitrateNitrite EMP D6 West Sui… 2019 Fall 2019-10-07 < 2.8
## 29 DissNitrateNitrite EMP D7 Grizzly … 2019 Fall 2019-10-08 < 2.8
## 30 DissNitrateNitrite EMP D7 Grizzly … 2019 Fall 2019-11-07 < 2.8
## 31 DissNitrateNitrite EMP D8 Mid Suis… 2019 Fall 2019-10-07 < 1.1
## 32 DissNitrateNitrite EMP EZ6 Confluen… 2019 Fall 2019-10-07 < 0.55
## 33 DissOrthophos EMP D4 Lower Sa… 2019 Fall 2019-11-06 < 0.4
## 34 DissOrthophos EMP D7 Grizzly … 2019 Fall 2019-11-07 < 2
## # ℹ 1 more variable: HighRL_flag <lgl>
# View range of values for each parameter
df_nutr_c6 %>%
summarize(
min_val = min(Result),
first_quantile = quantile(Result, probs = 0.25),
median = median(Result),
third_quantile = quantile(Result, probs = 0.75),
max_val = max(Result),
.by = Parameter
)
## # A tibble: 3 × 6
## Parameter min_val first_quantile median third_quantile max_val
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 DissAmmonia 0.000140 0.03 0.0633 0.12 2.94
## 2 DissNitrateNitrite 0.0007 0.19 0.31 0.46 15.2
## 3 DissOrthophos 0.0015 0.05 0.07 0.09 2
Upon closer inspection, all values that are less than the reporting limit with reporting limits that are greater than the 75th percentile of the values for the parameter should be removed from the data set.
df_nutr_c7 <- df_nutr_high_rl_flag %>%
filter(!HighRL_flag) %>%
select(-HighRL_flag)
Next, we’ll look for outliers by using a modified Z-score test flagging data points with scores greater than 15 grouped by subregion.
df_nutr_modzscore_flag <- df_nutr_c7 %>%
group_by(Parameter, SubRegion) %>%
flag_modzscore(threshold = 15) %>%
ungroup()
# View flagged data points
df_nutr_modzscore_flag_view <- df_nutr_modzscore_flag %>% select(!any_of(vars_rm_view))
df_nutr_modzscore_flag_view %>% filter(ModZscore_flag)
## # A tibble: 5 × 10
## Parameter Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 DissAmmonia EMP D2… Old River 1996 Winter 1996-01-25 = 1 22.1
## 2 DissAmmonia EMP P8 San Joaq… 2004 Winter 2004-02-26 = 2.94 18.4
## 3 DissNitrat… EMP D2… Old River 2017 Summer 2017-07-14 = 15.2 45.2
## 4 DissNitrat… EMP D4 Lower Sa… 2017 Summer 2017-06-14 = 3.68 15.3
## 5 DissNitrat… EMP D4 Lower Sa… 2019 Winter 2019-01-10 = 6.07 26.2
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissAmmonia in Old River from Nov 1995 - Mar 1996
df_nutr_modzscore_flag_view %>%
filter(
Parameter == "DissAmmonia",
SubRegion == "Old River",
Date >= "1995-11-01" & Date <= "1996-03-31"
) %>%
arrange(Date)
## # A tibble: 5 × 10
## Parameter Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 DissAmmonia EMP D2… Old River 1995 Fall 1995-11-14 = 0.02 0.227
## 2 DissAmmonia EMP D2… Old River 1996 Winter 1995-12-14 = 0.07 0.910
## 3 DissAmmonia EMP D2… Old River 1996 Winter 1996-01-25 = 1 22.1
## 4 DissAmmonia EMP D2… Old River 1996 Winter 1996-02-08 = 0.15 2.73
## 5 DissAmmonia EMP D2… Old River 1996 Spring 1996-03-12 = 0.04 0.227
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissAmmonia in San Joaquin River near Stockton from Nov 2003 - Apr 2004
df_nutr_modzscore_flag_view %>%
filter(
Parameter == "DissAmmonia",
SubRegion == "San Joaquin River near Stockton",
Date >= "2003-11-01" & Date <= "2004-04-30"
) %>%
arrange(Date)
## # A tibble: 6 × 10
## Parameter Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 DissAmmonia EMP P8 San Joaq… 2003 Fall 2003-11-17 = 0.2 0.585
## 2 DissAmmonia EMP P8 San Joaq… 2004 Winter 2003-12-16 = 1.22 7.21
## 3 DissAmmonia EMP P8 San Joaq… 2004 Winter 2004-01-15 = 1.61 9.75
## 4 DissAmmonia EMP P8 San Joaq… 2004 Winter 2004-02-26 = 2.94 18.4
## 5 DissAmmonia EMP P8 San Joaq… 2004 Spring 2004-03-15 = 0.89 5.07
## 6 DissAmmonia EMP P8 San Joaq… 2004 Spring 2004-04-12 = 0.09 0.130
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissNitrateNitrite in Old River from May 2017 - Sept 2017
df_nutr_modzscore_flag_view %>%
filter(
Parameter == "DissNitrateNitrite",
SubRegion == "Old River",
Date >= "2017-05-01" & Date <= "2017-09-30"
) %>%
arrange(Date)
## # A tibble: 5 × 10
## Parameter Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 DissNitrat… EMP D2… Old River 2017 Spring 2017-05-17 = 0.28 0.0910
## 2 DissNitrat… EMP D2… Old River 2017 Summer 2017-06-15 = 0.11 0.607
## 3 DissNitrat… EMP D2… Old River 2017 Summer 2017-07-14 = 15.2 45.2
## 4 DissNitrat… EMP D2… Old River 2017 Summer 2017-08-14 = 0.12 0.576
## 5 DissNitrat… EMP D2… Old River 2017 Fall 2017-09-12 = 0.14 0.516
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissNitrateNitrite in Lower Sacramento River from Apr 2017 - Aug 2017
df_nutr_modzscore_flag_view %>%
filter(
Parameter == "DissNitrateNitrite",
SubRegion == "Lower Sacramento River",
Date >= "2017-04-01" & Date <= "2017-08-31"
) %>%
arrange(Date)
## # A tibble: 10 × 10
## Parameter Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 DissNitra… EMP D4 Lower Sa… 2017 Spring 2017-04-05 = 0.23 0.409
## 2 DissNitra… USGS_S… Lower Sa… 2017 Spring 2017-04-18 = 0.0901 1.05
## 3 DissNitra… EMP D4 Lower Sa… 2017 Spring 2017-05-19 = 0.12 0.910
## 4 DissNitra… EMP D4 Lower Sa… 2017 Summer 2017-06-14 = 3.68 15.3
## 5 DissNitra… USGS_S… Lower Sa… 2017 Summer 2017-06-22 = 0.194 0.573
## 6 DissNitra… EMP D4 Lower Sa… 2017 Summer 2017-07-13 = 0.07 1.14
## 7 DissNitra… USGS_S… Lower Sa… 2017 Summer 2017-07-25 = 0.223 0.441
## 8 DissNitra… EMP D4 Lower Sa… 2017 Summer 2017-08-10 = 0.26 0.273
## 9 DissNitra… EMP EZ2 Lower Sa… 2017 Summer 2017-08-16 = 0.26 0.273
## 10 DissNitra… USGS_S… Lower Sa… 2017 Summer 2017-08-22 = 0.187 0.605
## # ℹ 1 more variable: ModZscore_flag <lgl>
# DissNitrateNitrite in Lower Sacramento River from Nov 2018 - Mar 2019
df_nutr_modzscore_flag_view %>%
filter(
Parameter == "DissNitrateNitrite",
SubRegion == "Lower Sacramento River",
Date >= "2018-11-01" & Date <= "2019-03-31"
) %>%
arrange(Date)
## # A tibble: 10 × 10
## Parameter Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 DissNitra… EMP D4 Lower Sa… 2018 Fall 2018-11-13 = 0.429 0.496
## 2 DissNitra… USGS_S… Lower Sa… 2019 Winter 2018-12-05 = 0.477 0.715
## 3 DissNitra… EMP D4 Lower Sa… 2019 Winter 2018-12-11 = 0.53 0.955
## 4 DissNitra… EMP EZ6 Lower Sa… 2019 Winter 2018-12-11 = 0.532 0.964
## 5 DissNitra… EMP D4 Lower Sa… 2019 Winter 2019-01-10 = 6.07 26.2
## 6 DissNitra… EMP EZ2 Lower Sa… 2019 Winter 2019-01-10 = 0.53 0.955
## 7 DissNitra… EMP D4 Lower Sa… 2019 Winter 2019-02-12 = 0.44 0.546
## 8 DissNitra… USGS_S… Lower Sa… 2019 Winter 2019-02-20 = 0.277 0.195
## 9 DissNitra… EMP D4 Lower Sa… 2019 Spring 2019-03-12 = 0.27 0.227
## 10 DissNitra… USGS_S… Lower Sa… 2019 Spring 2019-03-20 = 0.188 0.601
## # ℹ 1 more variable: ModZscore_flag <lgl>
After inspecting the data flagged by the modified Z-score test, the DissAmmonia values appear to be valid based on best professional judgment, so we will only exclude the DissNitrateNitrite values flagged by the modified Z-score test.
# Un-flag the DissAmmonia values and remove the DissNitrateNitrite flagged data
# points from the data set
df_nutr_c8 <- df_nutr_modzscore_flag %>%
mutate(ModZscore_flag = if_else(Parameter == "DissAmmonia", FALSE, ModZscore_flag)) %>%
filter(!ModZscore_flag) %>%
select(!starts_with("ModZscore"))
Finally, we’ll calculate seasonal-regional averages for each adjusted water year for each nutrient parameter. Before calculating the averages, we will need to replace values measured below the analytical reporting limit with a random number of uniform distribution between zero and the reporting limit.
# Add regions and keep raw nutrient data as an object to export
raw_nutr <- df_nutr_c8 %>%
left_join(df_regions, by = join_by(SubRegion)) %>%
relocate(Region, .before = SubRegion) %>%
relocate(Parameter, .before = Sign)
# Calculate seasonal-regional averages, substituting random numbers from a
# uniform distribution for the <RL values
df_nutr_avg <- raw_nutr %>%
nest(.by = Parameter, .key = "df_data") %>%
mutate(
df_data = map(
df_data,
~ replace_blw_rl(.x) %>%
calc_seas_reg_avg()
)
) %>%
unnest(df_data) %>%
pivot_wider(names_from = Parameter, values_from = Result)
# Make sure each Year-Season-Region combination is represented and add Year Type info
lt_avg_nutr <-
reduce(list(df_yr_seas_reg, df_yr_type, df_nutr_avg), left_join) %>%
# Remove Suisun Marsh Region since all values are NA
filter(Region != "Suisun Marsh")
Lastly, we’ll process the discrete Chlorophyll data.
# Prepare data for continued processing
df_chla_c1 <- df_dwq_lt_filt %>%
select(
Source,
Station,
Latitude,
Longitude,
SubRegion,
YearAdj,
Month,
Season,
Date,
Datetime,
contains("Chlorophyll")
) %>%
# Remove records without Chlorophyll data
drop_na(Chlorophyll) %>%
# Fill in "=" for the NA values in Chlorophyll_Sign
mutate(Chlorophyll_Sign = if_else(is.na(Chlorophyll_Sign), "=", Chlorophyll_Sign)) %>%
# Filter data so that there is only one sample per station-day
filt_daily_dups() %>%
# Rename Sign and Result variables to be compatible with later functions
rename(
Result = Chlorophyll,
Sign = Chlorophyll_Sign
)
# Make sure there is only one sample per station-day
df_chla_c1 %>% count(Source, Station, Date) %>% filter(n > 1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Source <chr>, Station <chr>, Date <date>, n <int>
Now let’s take a closer look at the temporal data coverage for each Station.
# Create sampling effort by station plots for each Parameter and Source
ndf_chla_se_sta_plt <- df_chla_c1 %>%
nest(.by = Source, .key = "df_data") %>%
mutate(plt = map(df_data, plot_samp_effort_sta))
For the USGS-CAWSC survey, chlorophyll data is available from 2015-2021 for most of the stations, so we’ll exclude this survey from the chlorophyll analyses.
df_chla_c2 <- df_chla_c1 %>% filter(Source != "USGS_CAWSC")
Not all of the subregions were sampled consistently from 1975-2021. To make sure that we only include the subregions that were sampled adequately, we will require that a subregion needs to have data for at least 75% of the 47 years between 1975 to 2021 (35 years) for each season.
df_chla_subreg_seas <- df_chla_c2 %>%
distinct(SubRegion, YearAdj, Season) %>%
count(SubRegion, Season, name = "NumYears") %>%
group_by(SubRegion) %>%
filter(min(NumYears) >= 35) %>%
ungroup() %>%
# make sure each season meets the threshold for each SubRegion
count(SubRegion) %>%
filter(n == 4)
df_chla_c3 <- df_chla_c2 %>% filter(SubRegion %in% unique(df_chla_subreg_seas$SubRegion))
Let’s take a look at the sampling effort for the remaining subregions for each season after filtering.
plot_samp_effort_subreg(df_chla_c3)
First, we’ll look at the min-max ranges for Chlorophyll, to see if there are any obvious outliers to exclude from the data set.
summary(df_chla_c3$Result)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.020 1.540 2.690 7.946 5.980 364.330
Let’s take a closer look at the maximum Chlorophyll values to see if we should omit them from the data set.
df_chla_c3 %>%
select(!any_of(vars_rm_view)) %>%
slice_max(Result, n = 20)
## # A tibble: 20 × 7
## Station SubRegion YearAdj Season Date Sign Result
## <chr> <chr> <dbl> <chr> <date> <chr> <dbl>
## 1 EMP P12 Grant Li… 1976 Spring 1976-05-07 = 364.
## 2 EMP P12 Grant Li… 1977 Spring 1977-05-09 = 352.
## 3 NCRO Middle River at Undine… Grant Li… 2008 Summer 2008-06-04 = 340.
## 4 NCRO Old River below Headwa… Grant Li… 2009 Summer 2009-07-14 = 325
## 5 EMP P12 Grant Li… 1976 Summer 1976-06-04 = 297.
## 6 NCRO Old River near Head Grant Li… 2003 Summer 2003-08-20 = 288
## 7 EMP P12 Grant Li… 1976 Spring 1976-05-21 = 278.
## 8 NCRO Middle River at Undine… Grant Li… 2008 Summer 2008-06-11 = 276.
## 9 EMP P12 Grant Li… 1977 Spring 1977-04-26 = 275.
## 10 NCRO Doughty Cut near Grant… Grant Li… 2007 Summer 2007-07-03 = 269
## 11 NCRO Old River below Headwa… Grant Li… 2007 Summer 2007-07-03 = 268
## 12 NCRO Old River below Headwa… Grant Li… 2007 Summer 2007-08-01 = 267
## 13 EMP P12 Grant Li… 1976 Summer 1976-06-18 = 266.
## 14 EMP P12 Grant Li… 1977 Summer 1977-06-23 = 266.
## 15 NCRO Middle River at Undine… Grant Li… 2007 Summer 2007-07-03 = 265
## 16 NCRO Middle River at Undine… Grant Li… 2009 Summer 2009-06-24 = 264
## 17 EMP P12 Grant Li… 1977 Spring 1977-04-11 = 262.
## 18 NCRO Doughty Cut near Grant… Grant Li… 2009 Summer 2009-06-25 = 256
## 19 NCRO Grantline Canal above … Grant Li… 2009 Summer 2009-06-25 = 244
## 20 NCRO Doughty Cut near Grant… Grant Li… 2010 Summer 2010-07-15 = 241
The maximum Chlorophyll values appear to be valid. Next, we’ll look for values that are less than the reporting limit with reporting limits that are very high compared to the range of the values for the parameter (> 75th percentile).
df_chla_high_rl_flag <- flag_high_rl(df_chla_c3, perc_thresh = 0.75)
# View flagged data points
df_chla_high_rl_flag %>% filter(HighRL_flag)
## # A tibble: 0 × 13
## # ℹ 13 variables: Source <chr>, Station <chr>, Latitude <dbl>, Longitude <dbl>,
## # SubRegion <chr>, YearAdj <dbl>, Month <dbl>, Season <chr>, Date <date>,
## # Datetime <dttm>, Sign <chr>, Result <dbl>, HighRL_flag <lgl>
None of the Chlorophyll values less than the reporting limit have RL values that are greater than the 75th percentile of the data. Next, we’ll look for outliers by using a modified Z-score test flagging data points with scores greater than 15 grouped by subregion.
df_chla_modzscore_flag <- df_chla_c3 %>%
group_by(SubRegion) %>%
flag_modzscore(threshold = 15) %>%
ungroup()
# View flagged data points
df_chla_modzscore_flag_view <- df_chla_modzscore_flag %>% select(!any_of(vars_rm_view))
df_chla_modzscore_flag_view %>%
filter(ModZscore_flag) %>%
arrange(SubRegion, desc(Result)) %>%
print(n = 180)
## # A tibble: 176 × 9
## Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 USGS_SFBS 3 Confluen… 2016 Spring 2016-05-18 = 81.3 35.9
## 2 USGS_SFBS 2 Confluen… 2016 Spring 2016-05-18 = 64.5 28.3
## 3 EMP D10 Confluen… 1979 Summer 1979-07-06 = 49.4 21.4
## 4 EMP D10 Confluen… 1982 Summer 1982-07-15 = 46.5 20.1
## 5 EMP D10 Confluen… 1979 Summer 1979-08-17 = 46.3 20.0
## 6 EMP D10 Confluen… 1978 Summer 1978-08-24 = 43.8 18.9
## 7 EMP D10 Confluen… 1982 Summer 1982-06-30 = 41.8 18.0
## 8 EMP D10 Confluen… 1979 Fall 1979-09-13 = 40.9 17.6
## 9 EMP EZ6 Confluen… 2013 Spring 2013-03-22 = 38.8 16.6
## 10 EMP D10 Confluen… 1979 Summer 1979-07-19 = 37.9 16.2
## 11 NCRO Fisherman's… Franks T… 2015 Spring 2015-04-15 = 83.3 39.8
## 12 EMP D19 Franks T… 1988 Summer 1988-06-08 = 78.7 37.5
## 13 NCRO Fisherman's… Franks T… 2014 Fall 2014-10-01 = 74.2 35.3
## 14 EMP D19 Franks T… 1982 Summer 1982-06-29 = 71.9 34.2
## 15 EMP D19 Franks T… 2016 Spring 2016-05-11 = 62.4 29.5
## 16 EMP D19 Franks T… 1986 Summer 1986-06-18 = 54.6 25.8
## 17 EMP D19 Franks T… 1982 Summer 1982-07-14 = 52.3 24.6
## 18 EMP D19 Franks T… 1993 Spring 1993-05-17 = 48.8 22.9
## 19 EMP D19 Franks T… 1984 Summer 1984-06-06 = 42.0 19.6
## 20 NCRO False River… Franks T… 2015 Spring 2015-04-15 = 42.0 19.5
## 21 NCRO False River… Franks T… 2015 Spring 2015-04-01 = 40.4 18.8
## 22 EMP D19 Franks T… 1985 Spring 1985-05-14 = 40.3 18.7
## 23 EMP D19 Franks T… 1976 Spring 1976-04-07 = 39.4 18.3
## 24 EMP D19 Franks T… 1981 Spring 1981-05-13 = 38.4 17.8
## 25 EMP D19 Franks T… 1976 Spring 1976-03-25 = 37.0 17.1
## 26 EMP D19 Franks T… 1976 Spring 1976-04-21 = 37.0 17.1
## 27 EMP P12 Grant Li… 1976 Spring 1976-05-07 = 364. 33.0
## 28 EMP P12 Grant Li… 1977 Spring 1977-05-09 = 352. 31.9
## 29 NCRO Middle Rive… Grant Li… 2008 Summer 2008-06-04 = 340. 30.7
## 30 NCRO Old River b… Grant Li… 2009 Summer 2009-07-14 = 325 29.4
## 31 EMP P12 Grant Li… 1976 Summer 1976-06-04 = 297. 26.8
## 32 NCRO Old River n… Grant Li… 2003 Summer 2003-08-20 = 288 26.0
## 33 EMP P12 Grant Li… 1976 Spring 1976-05-21 = 278. 25.0
## 34 NCRO Middle Rive… Grant Li… 2008 Summer 2008-06-11 = 276. 24.8
## 35 EMP P12 Grant Li… 1977 Spring 1977-04-26 = 275. 24.8
## 36 NCRO Doughty Cut… Grant Li… 2007 Summer 2007-07-03 = 269 24.2
## 37 NCRO Old River b… Grant Li… 2007 Summer 2007-07-03 = 268 24.1
## 38 NCRO Old River b… Grant Li… 2007 Summer 2007-08-01 = 267 24.0
## 39 EMP P12 Grant Li… 1976 Summer 1976-06-18 = 266. 23.9
## 40 EMP P12 Grant Li… 1977 Summer 1977-06-23 = 266. 23.9
## 41 NCRO Middle Rive… Grant Li… 2007 Summer 2007-07-03 = 265 23.8
## 42 NCRO Middle Rive… Grant Li… 2009 Summer 2009-06-24 = 264 23.8
## 43 EMP P12 Grant Li… 1977 Spring 1977-04-11 = 262. 23.6
## 44 NCRO Doughty Cut… Grant Li… 2009 Summer 2009-06-25 = 256 23.0
## 45 NCRO Grantline C… Grant Li… 2009 Summer 2009-06-25 = 244 21.9
## 46 NCRO Doughty Cut… Grant Li… 2010 Summer 2010-07-15 = 241 21.6
## 47 NCRO Middle Rive… Grant Li… 2007 Summer 2007-07-19 = 239 21.4
## 48 NCRO Old River b… Grant Li… 2004 Summer 2004-07-13 = 239 21.4
## 49 NCRO Old River b… Grant Li… 2008 Summer 2008-06-11 = 239. 21.4
## 50 NCRO Doughty Cut… Grant Li… 2016 Summer 2016-07-28 = 237. 21.2
## 51 NCRO Middle Rive… Grant Li… 2008 Summer 2008-06-24 = 236. 21.2
## 52 NCRO Doughty Cut… Grant Li… 2009 Summer 2009-07-14 = 233 20.9
## 53 NCRO Old River b… Grant Li… 2009 Summer 2009-06-25 = 232 20.8
## 54 NCRO Grant Ln Ca… Grant Li… 2009 Summer 2009-06-24 = 224 20.1
## 55 NCRO Old River b… Grant Li… 2010 Summer 2010-07-15 = 223 20.0
## 56 NCRO Old River @… Grant Li… 2004 Summer 2004-07-13 = 222 19.9
## 57 NCRO Old River @… Grant Li… 2007 Spring 2007-04-10 = 221 19.8
## 58 EMP P12 Grant Li… 1977 Summer 1977-07-07 = 219. 19.6
## 59 NCRO Middle Rive… Grant Li… 2003 Summer 2003-08-19 = 219 19.6
## 60 NCRO Old River b… Grant Li… 2003 Summer 2003-08-19 = 218 19.5
## 61 NCRO Doughty Cut… Grant Li… 2003 Summer 2003-08-19 = 217 19.4
## 62 NCRO Old River b… Grant Li… 2012 Summer 2012-07-06 = 216 19.3
## 63 EMP P12 Grant Li… 1976 Summer 1976-08-02 = 213. 19.1
## 64 NCRO Old River b… Grant Li… 2004 Summer 2004-06-29 = 212 19.0
## 65 NCRO Old River a… Grant Li… 2013 Summer 2013-07-05 = 210. 18.8
## 66 NCRO Old River a… Grant Li… 2009 Summer 2009-06-23 = 209 18.7
## 67 NCRO Old River @… Grant Li… 2008 Summer 2008-07-23 = 208. 18.6
## 68 EMP P12 Grant Li… 1977 Spring 1977-05-25 = 204. 18.2
## 69 NCRO Doughty Cut… Grant Li… 2016 Summer 2016-06-21 = 193. 17.2
## 70 NCRO Old River a… Grant Li… 2007 Spring 2007-04-24 = 193 17.2
## 71 NCRO Middle Rive… Grant Li… 2004 Summer 2004-07-13 = 192 17.1
## 72 NCRO Grantline C… Grant Li… 2008 Summer 2008-06-11 = 192. 17.1
## 73 NCRO Grantline C… Grant Li… 2007 Summer 2007-07-06 = 191 17.0
## 74 NCRO Middle Rive… Grant Li… 2016 Summer 2016-06-17 = 191. 17.0
## 75 NCRO Old River b… Grant Li… 2008 Summer 2008-06-25 = 190. 16.9
## 76 NCRO Doughty Cut… Grant Li… 2008 Summer 2008-06-11 = 187. 16.7
## 77 NCRO Old River @… Grant Li… 2009 Winter 2008-12-03 = 186. 16.6
## 78 NCRO Doughty Cut… Grant Li… 2004 Summer 2004-07-13 = 186 16.6
## 79 EMP P12 Grant Li… 1977 Summer 1977-06-07 = 185. 16.5
## 80 NCRO Doughty Cut… Grant Li… 2009 Summer 2009-06-24 = 185 16.5
## 81 NCRO Grantline C… Grant Li… 2007 Summer 2007-07-03 = 185 16.5
## 82 NCRO Old River b… Grant Li… 2008 Summer 2008-07-23 = 184. 16.4
## 83 NCRO Middle Rive… Grant Li… 2007 Summer 2007-06-29 = 184 16.4
## 84 NCRO Old River a… Grant Li… 2008 Summer 2008-06-04 = 184. 16.3
## 85 NCRO Grant Ln Ca… Grant Li… 2008 Summer 2008-06-11 = 178 15.8
## 86 NCRO Old River a… Grant Li… 2014 Fall 2014-10-02 = 176. 15.7
## 87 NCRO Grantline C… Grant Li… 2010 Summer 2010-07-15 = 176 15.6
## 88 NCRO Grant Line … Grant Li… 2016 Summer 2016-06-21 = 174. 15.4
## 89 NCRO Middle Rive… Grant Li… 2016 Summer 2016-07-12 = 171. 15.2
## 90 NCRO Middle Rive… Grant Li… 2003 Summer 2003-08-29 = 170 15.1
## 91 EMP D7 Grizzly … 1980 Summer 1980-07-17 = 68.8 29.5
## 92 EMP D7 Grizzly … 1978 Fall 1978-09-27 = 60.5 25.8
## 93 EMP D7 Grizzly … 1978 Fall 1978-10-12 = 58.7 25.0
## 94 EMP D7 Grizzly … 1980 Summer 1980-08-06 = 45.8 19.4
## 95 EMP D7 Grizzly … 1979 Summer 1979-06-19 = 44.5 18.8
## 96 EMP D7 Grizzly … 1978 Fall 1978-09-13 = 43.2 18.2
## 97 EMP D7 Grizzly … 1979 Summer 1979-06-07 = 42.6 17.9
## 98 EMP D7 Grizzly … 1984 Fall 1984-09-05 = 41.4 17.4
## 99 EMP D7 Grizzly … 1980 Fall 1980-09-04 = 40.1 16.8
## 100 EMP D7 Grizzly … 1979 Summer 1979-07-06 = 38.9 16.3
## 101 EMP D9 Honker B… 1979 Summer 1979-07-06 = 64.2 21.6
## 102 EMP D9 Honker B… 1982 Summer 1982-07-14 = 55.9 18.7
## 103 EMP D9 Honker B… 1978 Summer 1978-08-23 = 53.1 17.7
## 104 USGS_SFBS 4 Honker B… 1979 Summer 1979-08-14 = 52.5 17.5
## 105 EMP D9 Honker B… 1982 Summer 1982-06-29 = 47.1 15.6
## 106 USGS_SFBS 4 Honker B… 1979 Fall 1979-09-18 = 46.8 15.5
## 107 USGS_SFBS 649 Lower Sa… 2016 Spring 2016-05-18 = 66.9 25.1
## 108 EMP D4 Lower Sa… 2016 Spring 2016-05-13 = 57.3 21.4
## 109 EMP EZ2 Lower Sa… 2016 Spring 2016-05-13 = 54.2 20.1
## 110 USGS_SFBS 649 Lower Sa… 2013 Spring 2013-03-26 = 51.6 19.1
## 111 EMP D4 Lower Sa… 1982 Fall 1982-10-21 = 45.7 16.8
## 112 EMP D4 Lower Sa… 1982 Summer 1982-07-14 = 44.3 16.3
## 113 EMP D11 Lower Sa… 1993 Spring 1993-05-18 = 42.4 15.6
## 114 EMP D14A Lower Sa… 1986 Summer 1986-06-18 = 62.4 23.1
## 115 EMP D12 Lower Sa… 2016 Spring 2016-05-11 = 59.2 21.9
## 116 NCRO San Joaquin… Lower Sa… 2010 Spring 2010-05-19 = 55.5 20.5
## 117 EMP D14A Lower Sa… 1981 Spring 1981-05-14 = 54.8 20.2
## 118 EMP D12 Lower Sa… 1986 Summer 1986-06-18 = 54.8 20.2
## 119 EMP D14A Lower Sa… 1985 Spring 1985-05-15 = 53.4 19.7
## 120 EMP D12 Lower Sa… 1993 Spring 1993-05-18 = 51.6 19.0
## 121 EMP D14A Lower Sa… 1993 Spring 1993-05-18 = 45.9 16.8
## 122 EMP D12 Lower Sa… 1985 Spring 1985-05-15 = 44.3 16.2
## 123 EMP D12 Lower Sa… 1982 Summer 1982-07-15 = 42.1 15.3
## 124 EMP D8 Mid Suis… 1985 Summer 1985-07-12 = 49.4 22.4
## 125 USGS_SFBS 5 Mid Suis… 1978 Fall 1978-10-12 = 42 18.9
## 126 USGS_SFBS 5 Mid Suis… 1979 Fall 1979-09-18 = 41.4 18.6
## 127 EMP D8 Mid Suis… 1979 Summer 1979-06-19 = 40.1 18.0
## 128 EMP D8 Mid Suis… 1979 Summer 1979-07-06 = 40.1 18.0
## 129 USGS_SFBS 6 Mid Suis… 1980 Summer 1980-08-19 = 39.1 17.5
## 130 EMP D8 Mid Suis… 1980 Summer 1980-08-21 = 38.3 17.1
## 131 USGS_SFBS 6 Mid Suis… 1978 Fall 1978-10-12 = 38.1 17.0
## 132 EMP D8 Mid Suis… 1978 Summer 1978-08-11 = 36.4 16.2
## 133 USGS_SFBS 6 Mid Suis… 1979 Fall 1979-09-18 = 36.3 16.2
## 134 USGS_SFBS 6 Mid Suis… 1978 Fall 1978-09-20 = 35.9 16.0
## 135 EMP D8 Mid Suis… 1978 Fall 1978-09-28 = 35.2 15.6
## 136 EMP D8 Mid Suis… 1978 Fall 1978-10-13 = 35.2 15.6
## 137 EMP C3 Middle S… 1977 Spring 1977-03-28 = 38.6 20.7
## 138 EMP C3 Middle S… 1977 Spring 1977-04-12 = 30.9 16.3
## 139 EMP D28A Old River 1986 Summer 1986-06-17 = 65.6 22.7
## 140 EMP D28A Old River 1985 Spring 1985-05-13 = 50.3 17.2
## 141 EMP D28A Old River 1982 Summer 1982-06-28 = 50.0 17.1
## 142 EMP D28A Old River 2016 Spring 2016-05-11 = 49.7 17.0
## 143 EMP D28A Old River 1976 Spring 1976-03-22 = 47.1 16.1
## 144 EMP D28A Old River 1984 Fall 1984-10-16 = 47.1 16.1
## 145 EMP D22 Sacramen… 2012 Fall 2012-10-08 = 61.3 27.7
## 146 EMP D22 Sacramen… 2016 Spring 2016-05-13 = 58.8 26.5
## 147 EMP EZ2 Sacramen… 2012 Fall 2012-10-08 = 46.9 20.9
## 148 NCRO Three Mile … Sacramen… 2016 Spring 2016-05-17 = 40.7 18.0
## 149 EMP D26 San Joaq… 1983 Fall 1983-09-28 = 155. 74.3
## 150 EMP D26 San Joaq… 1992 Spring 1992-05-11 = 67.4 31.7
## 151 EMP D26 San Joaq… 2016 Spring 2016-05-12 = 66.9 31.5
## 152 EMP D26 San Joaq… 1993 Spring 1993-05-17 = 47.4 22.0
## 153 EMP D26 San Joaq… 1986 Summer 1986-06-17 = 38.6 17.8
## 154 EMP D26 San Joaq… 1988 Spring 1988-05-25 = 36.1 16.6
## 155 EMP D26 San Joaq… 2015 Spring 2015-04-10 = 34.5 15.8
## 156 EMP D26 San Joaq… 1988 Summer 1988-06-08 = 34.4 15.8
## 157 EMP D26 San Joaq… 1981 Spring 1981-05-13 = 33.0 15.1
## 158 EMP D16 San Joaq… 2016 Spring 2016-05-12 = 72.7 26.2
## 159 EMP D15 San Joaq… 1986 Summer 1986-06-18 = 71.9 25.9
## 160 EMP D16 San Joaq… 1993 Spring 1993-05-17 = 53.2 19.0
## 161 EMP D15 San Joaq… 1981 Spring 1981-05-14 = 48.6 17.3
## 162 EMP D15 San Joaq… 1985 Spring 1985-05-15 = 47.1 16.7
## 163 EMP D16 San Joaq… 1986 Summer 1986-06-17 = 46.8 16.6
## 164 EMP D16 San Joaq… 1988 Summer 1988-06-08 = 46 16.3
## 165 EMP D16 San Joaq… 1985 Spring 1985-05-14 = 45.7 16.2
## 166 EMP D15 San Joaq… 1993 Spring 1993-05-18 = 44.0 15.5
## 167 EMP P8 San Joaq… 1976 Spring 1976-05-06 = 130. 24.5
## 168 EMP C9 Victoria… 1985 Spring 1985-05-09 = 90.5 36.8
## 169 NCRO Middle Rive… Victoria… 2007 Fall 2007-11-27 = 58 23.3
## 170 NCRO Middle Rive… Victoria… 2014 Fall 2014-10-02 = 50.9 20.3
## 171 EMP C9 Victoria… 1981 Spring 1981-05-12 = 45.4 18.0
## 172 EMP C9 Victoria… 1984 Summer 1984-06-05 = 41.2 16.3
## 173 NCRO Middle Rive… Victoria… 2013 Spring 2013-05-01 = 38.7 15.2
## 174 EMP C9 Victoria… 1976 Spring 1976-05-07 = 38.6 15.2
## 175 USGS_SFBS 7 West Sui… 1979 Summer 1979-07-10 = 32.3 17.7
## 176 USGS_SFBS 7 West Sui… 1978 Fall 1978-09-20 = 31.4 17.1
## # ℹ 1 more variable: ModZscore_flag <lgl>
# San Joaquin River at Prisoners Pt from 1982-1984
df_chla_modzscore_flag_view %>%
filter(
YearAdj %in% 1982:1984,
SubRegion == "San Joaquin River at Prisoners Pt",
) %>%
arrange(Date) %>%
print(n = 50)
## # A tibble: 43 × 9
## Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 EMP D26 San Joaquin River a… 1982 Winter 1981-12-09 = 0.93 0.445
## 2 EMP D26 San Joaquin River a… 1982 Winter 1982-02-18 = 1.85 0
## 3 EMP D26 San Joaquin River a… 1982 Spring 1982-03-03 = 1.08 0.373
## 4 EMP D26 San Joaquin River a… 1982 Spring 1982-03-17 = 1.7 0.0726
## 5 EMP D26 San Joaquin River a… 1982 Spring 1982-04-15 = 2.01 0.0774
## 6 EMP D26 San Joaquin River a… 1982 Spring 1982-05-18 = 3.09 0.600
## 7 EMP D26 San Joaquin River a… 1982 Summer 1982-06-15 = 6.48 2.24
## 8 EMP D26 San Joaquin River a… 1982 Summer 1982-06-29 = 20.7 9.12
## 9 EMP D26 San Joaquin River a… 1982 Summer 1982-07-14 = 10.3 4.11
## 10 EMP D26 San Joaquin River a… 1982 Summer 1982-07-28 = 3.09 0.600
## 11 EMP D26 San Joaquin River a… 1982 Summer 1982-08-11 = 9.11 3.51
## 12 EMP D26 San Joaquin River a… 1982 Summer 1982-08-25 = 20.8 9.19
## 13 EMP D26 San Joaquin River a… 1982 Fall 1982-09-13 = 6.18 2.10
## 14 EMP D26 San Joaquin River a… 1982 Fall 1982-09-22 = 13.1 5.45
## 15 EMP D26 San Joaquin River a… 1982 Fall 1982-10-07 = 4.48 1.27
## 16 EMP D26 San Joaquin River a… 1982 Fall 1982-10-21 = 5.56 1.80
## 17 EMP D26 San Joaquin River a… 1982 Fall 1982-11-09 = 1.09 0.368
## 18 EMP D26 San Joaquin River a… 1983 Winter 1982-12-08 = 1.08 0.373
## 19 EMP D26 San Joaquin River a… 1983 Winter 1983-01-26 = 2.32 0.227
## 20 EMP D26 San Joaquin River a… 1983 Winter 1983-02-23 = 1.08 0.373
## 21 EMP D26 San Joaquin River a… 1983 Spring 1983-03-23 = 2.32 0.227
## 22 EMP D26 San Joaquin River a… 1983 Spring 1983-04-06 = 3.86 0.973
## 23 EMP D26 San Joaquin River a… 1983 Spring 1983-05-04 = 4.01 1.05
## 24 EMP D26 San Joaquin River a… 1983 Summer 1983-06-16 = 1.39 0.223
## 25 EMP D26 San Joaquin River a… 1983 Summer 1983-07-19 = 2.16 0.150
## 26 EMP D26 San Joaquin River a… 1983 Summer 1983-08-17 = 2.78 0.450
## 27 EMP D26 San Joaquin River a… 1983 Fall 1983-09-28 = 155. 74.3
## 28 EMP D26 San Joaquin River a… 1983 Fall 1983-10-12 = 0.77 0.523
## 29 EMP D26 San Joaquin River a… 1983 Fall 1983-11-09 = 0.31 0.745
## 30 EMP D26 San Joaquin River a… 1984 Winter 1983-12-14 = 0.15 0.823
## 31 EMP D26 San Joaquin River a… 1984 Winter 1984-01-25 = 2.47 0.300
## 32 EMP D26 San Joaquin River a… 1984 Winter 1984-02-09 = 0.15 0.823
## 33 EMP D26 San Joaquin River a… 1984 Spring 1984-04-11 = 0.15 0.823
## 34 EMP D26 San Joaquin River a… 1984 Spring 1984-05-09 = 4.48 1.27
## 35 EMP D26 San Joaquin River a… 1984 Summer 1984-06-06 = 24.6 11.0
## 36 EMP D26 San Joaquin River a… 1984 Summer 1984-07-09 = 2.32 0.227
## 37 EMP D26 San Joaquin River a… 1984 Summer 1984-07-19 = 2.79 0.455
## 38 EMP D26 San Joaquin River a… 1984 Summer 1984-08-06 = 6.95 2.47
## 39 EMP D26 San Joaquin River a… 1984 Summer 1984-08-20 = 17.3 7.47
## 40 EMP D26 San Joaquin River a… 1984 Fall 1984-09-05 = 11.9 4.86
## 41 EMP D26 San Joaquin River a… 1984 Fall 1984-09-19 = 7.26 2.62
## 42 EMP D26 San Joaquin River a… 1984 Fall 1984-10-03 = 5.56 1.80
## 43 EMP D26 San Joaquin River a… 1984 Fall 1984-11-01 = 5.87 1.95
## # ℹ 1 more variable: ModZscore_flag <lgl>
# San Joaquin River near Stockton from 1975-1977
df_chla_modzscore_flag_view %>%
filter(
YearAdj %in% 1975:1977,
SubRegion == "San Joaquin River near Stockton",
) %>%
arrange(Date) %>%
print(n = 60)
## # A tibble: 57 × 9
## Station SubRegion YearAdj Season Date Sign Result ModZscore
## <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl>
## 1 EMP P8 San Joaquin River n… 1975 Winter 1975-02-03 = 20.4 3.09
## 2 EMP P8 San Joaquin River n… 1975 Spring 1975-03-18 = 7.41 0.543
## 3 EMP P8 San Joaquin River n… 1975 Spring 1975-04-01 = 12.4 1.51
## 4 EMP P8 San Joaquin River n… 1975 Spring 1975-04-16 = 20.1 3.03
## 5 EMP P8 San Joaquin River n… 1975 Spring 1975-05-01 = 40.1 6.96
## 6 EMP P8 San Joaquin River n… 1975 Spring 1975-05-15 = 40.9 7.11
## 7 EMP P8 San Joaquin River n… 1975 Summer 1975-06-03 = 34.0 5.75
## 8 EMP P8 San Joaquin River n… 1975 Summer 1975-06-17 = 15.4 2.12
## 9 EMP P8 San Joaquin River n… 1975 Summer 1975-07-01 = 13.1 1.66
## 10 EMP P8 San Joaquin River n… 1975 Summer 1975-07-15 = 40.9 7.11
## 11 EMP P8 San Joaquin River n… 1975 Summer 1975-08-12 = 67.9 12.4
## 12 EMP P8 San Joaquin River n… 1975 Summer 1975-08-25 = 37.0 6.36
## 13 EMP P8 San Joaquin River n… 1975 Fall 1975-09-11 = 34.0 5.75
## 14 EMP P8 San Joaquin River n… 1975 Fall 1975-09-26 = 17.8 2.57
## 15 EMP P8 San Joaquin River n… 1975 Fall 1975-10-09 = 42.4 7.41
## 16 EMP P8 San Joaquin River n… 1975 Fall 1975-10-23 = 17.6 2.54
## 17 EMP P8 San Joaquin River n… 1975 Fall 1975-11-26 = 20.7 3.15
## 18 EMP P8 San Joaquin River n… 1976 Winter 1975-12-23 = 2.28 0.463
## 19 EMP P8 San Joaquin River n… 1976 Winter 1976-01-22 = 15.8 2.18
## 20 EMP P8 San Joaquin River n… 1976 Winter 1976-02-19 = 3.83 0.159
## 21 EMP P8 San Joaquin River n… 1976 Spring 1976-03-09 = 12.4 1.51
## 22 EMP P8 San Joaquin River n… 1976 Spring 1976-03-22 = 20.8 3.18
## 23 EMP P8 San Joaquin River n… 1976 Spring 1976-04-06 = 56.4 10.1
## 24 EMP P8 San Joaquin River n… 1976 Spring 1976-04-20 = 74.1 13.6
## 25 EMP P8 San Joaquin River n… 1976 Spring 1976-05-06 = 130. 24.5
## 26 EMP P8 San Joaquin River n… 1976 Spring 1976-05-20 = 40.9 7.11
## 27 EMP P8 San Joaquin River n… 1976 Summer 1976-06-03 = 44 7.72
## 28 EMP P8 San Joaquin River n… 1976 Summer 1976-06-22 = 13.1 1.66
## 29 EMP P8 San Joaquin River n… 1976 Summer 1976-07-07 = 21.6 3.33
## 30 EMP P8 San Joaquin River n… 1976 Summer 1976-07-20 = 20.1 3.03
## 31 EMP P8 San Joaquin River n… 1976 Summer 1976-08-03 = 7.72 0.604
## 32 EMP P8 San Joaquin River n… 1976 Summer 1976-08-27 = 9.11 0.877
## 33 EMP P8 San Joaquin River n… 1976 Fall 1976-09-14 = 11.7 1.39
## 34 EMP P8 San Joaquin River n… 1976 Fall 1976-09-27 = 11.0 1.24
## 35 EMP P8 San Joaquin River n… 1976 Fall 1976-10-13 = 32.4 5.45
## 36 EMP P8 San Joaquin River n… 1976 Fall 1976-10-28 = 21.6 3.33
## 37 EMP P8 San Joaquin River n… 1976 Fall 1976-11-10 = 20.1 3.03
## 38 EMP P8 San Joaquin River n… 1977 Winter 1976-12-14 = 3.89 0.147
## 39 EMP P8 San Joaquin River n… 1977 Winter 1977-01-25 = 4.2 0.0863
## 40 EMP P8 San Joaquin River n… 1977 Winter 1977-02-23 = 5.87 0.241
## 41 EMP P8 San Joaquin River n… 1977 Spring 1977-03-10 = 5.56 0.180
## 42 EMP P8 San Joaquin River n… 1977 Spring 1977-03-28 = 8.03 0.665
## 43 EMP P8 San Joaquin River n… 1977 Spring 1977-04-12 = 40.9 7.11
## 44 EMP P8 San Joaquin River n… 1977 Spring 1977-04-26 = 27.8 4.54
## 45 EMP P8 San Joaquin River n… 1977 Spring 1977-05-10 = 27.0 4.39
## 46 EMP P8 San Joaquin River n… 1977 Spring 1977-05-25 = 12.8 1.60
## 47 EMP P8 San Joaquin River n… 1977 Summer 1977-06-06 = 8.65 0.786
## 48 EMP P8 San Joaquin River n… 1977 Summer 1977-06-23 = 7.26 0.514
## 49 EMP P8 San Joaquin River n… 1977 Summer 1977-07-08 = 10.3 1.12
## 50 EMP P8 San Joaquin River n… 1977 Summer 1977-07-21 = 12.8 1.60
## 51 EMP P8 San Joaquin River n… 1977 Summer 1977-08-09 = 14.7 1.97
## 52 EMP P8 San Joaquin River n… 1977 Summer 1977-08-22 = 7.72 0.604
## 53 EMP P8 San Joaquin River n… 1977 Fall 1977-09-02 = 3.24 0.275
## 54 EMP P8 San Joaquin River n… 1977 Fall 1977-09-20 = 10.8 1.21
## 55 EMP P8 San Joaquin River n… 1977 Fall 1977-10-04 = 12.8 1.60
## 56 EMP P8 San Joaquin River n… 1977 Fall 1977-10-18 = 8.18 0.694
## 57 EMP P8 San Joaquin River n… 1977 Fall 1977-11-15 = 15.8 2.18
## # ℹ 1 more variable: ModZscore_flag <lgl>
After inspecting the data flagged by the modified Z-score test, the Chlorophyll values appear to be valid based on best professional judgment, so we won’t exclude any of the flagged values.
Finally, we’ll calculate seasonal-regional averages for each adjusted water year. Before calculating the averages, we will need to replace values measured below the analytical reporting limit with a random number of uniform distribution between zero and the reporting limit.
# Add regions and keep raw chlorophyll data as an object to export
raw_chla <- df_chla_c3 %>%
left_join(df_regions, by = join_by(SubRegion)) %>%
relocate(Region, .before = SubRegion) %>%
mutate(Parameter = "Chlorophyll", .before = Sign)
# Calculate seasonal-regional averages, substituting random numbers from a
# uniform distribution for the <RL values
df_chla_avg <- raw_chla %>%
replace_blw_rl() %>%
calc_seas_reg_avg() %>%
rename(Chlorophyll = Result)
# Make sure each Year-Season-Region combination is represented and add Year Type info
lt_avg_chla <-
reduce(list(df_yr_seas_reg, df_yr_type, df_chla_avg), left_join) %>%
# Remove Suisun Marsh Region since all values are NA
filter(Region != "Suisun Marsh")
Create a summary table of the reporting limits in the final QC’ed nutrient and chlorophyll raw data for the Supplemental Information.
df_rl_vals_nutr_chla <-
bind_rows(raw_nutr, raw_chla) %>%
filter(Sign == "<") %>%
count(Source, Parameter, Result, name = "Num_blw_RL") %>%
# Join crosswalk tables for parameter and survey names
left_join(df_param_cw, by = join_by(Parameter)) %>%
left_join(df_survey_cw, by = join_by(Source)) %>%
# Clean up
transmute(
Survey = Survey_name,
Parameter = Parameter_publ,
# Format RL values to prevent Excel from converting to scientific notation
RL = format(Result, drop0trailing = TRUE),
Num_blw_RL
) %>%
arrange(Survey, Parameter, RL)
Export raw data for all parameters as .qs files, and export the long-term average data both as .csv and .rds files for the analyses. Also, export the summary table of the reporting limits as a .csv file.
# Combine all data frames of raw data into a named list
ls_data_raw <- lst(
raw_wq_meas,
raw_nutr,
raw_chla
)
# Export raw data frames as qs files
ls_data_raw %>% iwalk(\(x, idx) qsave(x, file = here("data/interim", paste0(idx, ".qs"))))
# Combine all data frames of long-term average data into a named list
ls_data_lt_avg <- lst(
lt_avg_wq_meas,
lt_avg_nutr,
lt_avg_chla
)
# Export long-term average data frames as csv files
ls_data_lt_avg %>% iwalk(\(x, idx) write_csv(x, file = here("data/processed/wq", paste0(idx, ".csv"))))
# Export long-term average data frames as rds files
ls_data_lt_avg %>% iwalk(\(x, idx) saveRDS(x, file = here("data/processed/wq", paste0(idx, ".rds"))))
# Export summary table of the reporting limits as csv file
df_rl_vals_nutr_chla %>% write_csv(file = here("results/tables/rl_summary_table.csv"))