library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Simple Methods

read_csv() with URL

library(readr)

# Import data from a URL using read_csv()
url_dayflow_2023 <- "https://data.cnra.ca.gov/dataset/06ee2016-b138-47d7-9e85-f46fae674536/resource/f7c1ba7f-bd64-4762-88e3-6db9b2501b38/download/dayflowcalculations2023.csv"
df_dayflow_2023 <- read_csv(url_dayflow_2023)
df_dayflow_2023
## # A tibble: 365 × 29
##     Year    Mo Date    SAC  YOLO  CSMR  MOKE  MISC   SJR  EAST   TOT   CCC   SWP
##    <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  2022    10 10/1…  7210    35    25    88   150   212   476  7721   132   494
##  2  2022    10 10/2…  7470    36    26     0   145   218   389  7895   132   499
##  3  2022    10 10/3…  7550    36    26     0   147   245   418  8004   136   499
##  4  2022    10 10/4…  7640    36    26     0   129   234   390  8065   128   498
##  5  2022    10 10/5…  7590    36    26     0    93   222   341  7967   170   490
##  6  2022    10 10/6…  7440    35    26     0    63   213   301  7776   159   486
##  7  2022    10 10/7…  7460    33    25     0    50   216   291  7784   136   494
##  8  2022    10 10/8…  7200    33    24     0    59   248   331  7564   111   493
##  9  2022    10 10/9…  7010    32    24     0    66   308   398  7440   113   491
## 10  2022    10 10/1…  6870    32    24     0    71   305   400  7302   116   490
## # ℹ 355 more rows
## # ℹ 16 more variables: CVP <dbl>, NBAQ <dbl>, EXPORTS <dbl>, GCD <dbl>,
## #   PREC <dbl>, MISDV <dbl>, CD <dbl>, XGEO <dbl>, WEST <dbl>, RIO <dbl>,
## #   OUT <dbl>, EXPIN <dbl>, DIVER <dbl>, EFFEC <dbl>, EFFDIV <dbl>, X2 <dbl>

download.file() then import

# Import data from EDI to temporary directory
url_edi_emp_2022 <- "https://portal.edirepository.org/nis/dataviewer?packageid=edi.458.10&entityid=cf231071093ac2861893793517db26f3"
download.file(url_edi_emp_2022, file.path(tempdir(), "EMP_DWQ_1975_2022.csv"), mode = "wb")

# Then import data using read_csv()
df_emp_2022 <- read_csv(file.path(tempdir(), "EMP_DWQ_1975_2022.csv"))
df_emp_2022
## # A tibble: 17,366 × 73
##    Station Date       Time   SampleDescription Flag  FlagDescription FieldNotes
##    <chr>   <date>     <time> <chr>             <chr> <chr>           <chr>     
##  1 D11     1975-01-07 13:00  <NA>              <NA>  <NA>            <NA>      
##  2 D15     1975-01-07 14:00  <NA>              <NA>  <NA>            <NA>      
##  3 D16     1975-01-07 16:00  <NA>              <NA>  <NA>            <NA>      
##  4 D19     1975-01-07 15:00  <NA>              <NA>  <NA>            <NA>      
##  5 D22     1975-01-07 13:00  <NA>              <NA>  <NA>            <NA>      
##  6 D24     1975-01-07 14:00  <NA>              <NA>  <NA>            <NA>      
##  7 D26     1975-01-07 15:00  <NA>              <NA>  <NA>            <NA>      
##  8 D4      1975-01-07 12:00  <NA>              <NA>  <NA>            <NA>      
##  9 D10     1975-01-08 13:00  <NA>              <NA>  <NA>            <NA>      
## 10 D12     1975-01-08 14:00  <NA>              <NA>  <NA>            <NA>      
## # ℹ 17,356 more rows
## # ℹ 66 more variables: Weather <chr>, AirTemp <dbl>, WindVelocity <dbl>,
## #   WindDirection <dbl>, NorthLat <dbl>, WestLong <dbl>, Chla_Sign <chr>,
## #   Chla <dbl>, Pheophytin_Sign <chr>, Pheophytin <dbl>,
## #   TotAlkalinity_Sign <chr>, TotAlkalinity <dbl>, TotAmmonia_Sign <chr>,
## #   TotAmmonia <dbl>, DissAmmonia_Sign <chr>, DissAmmonia <dbl>,
## #   DissBromide_Sign <chr>, DissBromide <dbl>, DissCalcium_Sign <chr>, …

Dedicated R Packages

EDIutils

Package documentation: https://docs.ropensci.org/EDIutils/
Available on CRAN: https://cloud.r-project.org/web/packages/EDIutils/index.html

List data package revisions

library(EDIutils)
edi_scope <- "edi"
edi_emp_id <- 458

list_data_package_revisions(scope = edi_scope, identifier = edi_emp_id)
##  [1]  1  2  3  4  5  6  7  8  9 10
edi_emp_rev <- list_data_package_revisions(
  scope = edi_scope, 
  identifier = edi_emp_id, 
  filter = "newest"
)
edi_emp_pid <- paste(edi_scope, edi_emp_id, edi_emp_rev, sep = ".")
edi_emp_pid
## [1] "edi.458.10"

List data entities

df_edi_emp_ent <- read_data_entity_names(packageId = edi_emp_pid)
df_edi_emp_ent
##                           entityId                 entityName
## 1 cf231071093ac2861893793517db26f3          EMP_DWQ_1975_2022
## 2 86dd696bc3f8407ff52954094e1e9dcf EMP_DWQ_Stations_1975-2022
## 3 afc5b55a61e9a16d29fcaef4d802f5be          EMP_DWQ_FlagCodes
## 4 b399c042c893809547dc196a762b929f   EMP_DWQ_metadata_methods

Import data entity

edi_emp_ent_id <- df_edi_emp_ent %>% 
  filter(entityName == "EMP_DWQ_1975_2022") %>% 
  pull(entityId)

raw_emp_2022_edi <- read_data_entity(packageId = edi_emp_pid, entityId = edi_emp_ent_id)

df_emp_2022_edi <- read_csv(raw_emp_2022_edi)
df_emp_2022_edi
## # A tibble: 17,366 × 73
##    Station Date       Time   SampleDescription Flag  FlagDescription FieldNotes
##    <chr>   <date>     <time> <chr>             <chr> <chr>           <chr>     
##  1 D11     1975-01-07 13:00  <NA>              <NA>  <NA>            <NA>      
##  2 D15     1975-01-07 14:00  <NA>              <NA>  <NA>            <NA>      
##  3 D16     1975-01-07 16:00  <NA>              <NA>  <NA>            <NA>      
##  4 D19     1975-01-07 15:00  <NA>              <NA>  <NA>            <NA>      
##  5 D22     1975-01-07 13:00  <NA>              <NA>  <NA>            <NA>      
##  6 D24     1975-01-07 14:00  <NA>              <NA>  <NA>            <NA>      
##  7 D26     1975-01-07 15:00  <NA>              <NA>  <NA>            <NA>      
##  8 D4      1975-01-07 12:00  <NA>              <NA>  <NA>            <NA>      
##  9 D10     1975-01-08 13:00  <NA>              <NA>  <NA>            <NA>      
## 10 D12     1975-01-08 14:00  <NA>              <NA>  <NA>            <NA>      
## # ℹ 17,356 more rows
## # ℹ 66 more variables: Weather <chr>, AirTemp <dbl>, WindVelocity <dbl>,
## #   WindDirection <dbl>, NorthLat <dbl>, WestLong <dbl>, Chla_Sign <chr>,
## #   Chla <dbl>, Pheophytin_Sign <chr>, Pheophytin <dbl>,
## #   TotAlkalinity_Sign <chr>, TotAlkalinity <dbl>, TotAmmonia_Sign <chr>,
## #   TotAmmonia <dbl>, DissAmmonia_Sign <chr>, DissAmmonia <dbl>,
## #   DissBromide_Sign <chr>, DissBromide <dbl>, DissCalcium_Sign <chr>, …

dataRetrieval

Package documentation: https://doi-usgs.github.io/dataRetrieval/
Available on CRAN: https://cloud.r-project.org/web/packages/dataRetrieval/index.html

Sacramento River at Freeport CA (USGS station 11447650): https://waterdata.usgs.gov/monitoring-location/11447650/

NWIS Web Services

Station Information

library(dataRetrieval)

df_srf_sta_info <- whatNWISsites(sites = "11447650")
glimpse(df_srf_sta_info)
## Rows: 1
## Columns: 8
## $ agency_cd   <chr> "USGS"
## $ site_no     <chr> "11447650"
## $ station_nm  <chr> "SACRAMENTO R A FREEPORT CA"
## $ site_tp_cd  <chr> "ST"
## $ dec_lat_va  <dbl> 38.45566
## $ dec_long_va <dbl> -121.5016
## $ colocated   <lgl> FALSE
## $ queryTime   <dttm> 2024-04-17 16:12:33

Data availability

df_srf_uv_data_avail <- whatNWISdata(siteNumber = "11447650", service = "uv")
glimpse(df_srf_uv_data_avail)
## Rows: 15
## Columns: 24
## $ agency_cd          <chr> "USGS", "USGS", "USGS", "USGS", "USGS", "USGS", "US…
## $ site_no            <chr> "11447650", "11447650", "11447650", "11447650", "11…
## $ station_nm         <chr> "SACRAMENTO R A FREEPORT CA", "SACRAMENTO R A FREEP…
## $ site_tp_cd         <chr> "ST", "ST", "ST", "ST", "ST", "ST", "ST", "ST", "ST…
## $ dec_lat_va         <dbl> 38.45566, 38.45566, 38.45566, 38.45566, 38.45566, 3…
## $ dec_long_va        <dbl> -121.5016, -121.5016, -121.5016, -121.5016, -121.50…
## $ coord_acy_cd       <chr> "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "…
## $ dec_coord_datum_cd <chr> "NAD83", "NAD83", "NAD83", "NAD83", "NAD83", "NAD83…
## $ alt_va             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ alt_acy_va         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ alt_datum_cd       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ huc_cd             <chr> "18020163", "18020163", "18020163", "18020163", "18…
## $ data_type_cd       <chr> "uv", "uv", "uv", "uv", "uv", "uv", "uv", "uv", "uv…
## $ parm_cd            <chr> "00010", "00010", "00060", "00065", "00095", "00300…
## $ stat_cd            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ts_id              <chr> "15731", "15760", "236032", "15738", "236034", "157…
## $ loc_web_ds         <chr> "[Right Bank Pump Stand]", "BGC PROJECT, [East Fend…
## $ medium_grp_cd      <chr> "wat", "wat", "wat", "wat", "wat", "wat", "wat", "w…
## $ parm_grp_cd        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ srs_id             <chr> "1645597", "1645597", "1645423", "17164583", "16466…
## $ access_cd          <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "…
## $ begin_date         <date> 2007-10-01, 2013-08-30, 2007-10-01, 2007-10-01, 20…
## $ end_date           <date> 2024-04-17, 2024-04-17, 2024-04-17, 2024-04-17, 20…
## $ count_nu           <dbl> 6043, 3883, 6043, 6043, 3883, 3883, 3883, 3883, 388…

USGS parameter codes

as_tibble(parameterCdFile)
## # A tibble: 24,200 × 6
##    parameter_cd parameter_group_nm parameter_nm    casrn srsname parameter_units
##    <chr>        <chr>              <chr>           <chr> <chr>   <chr>          
##  1 00001        Information        Location in cr… " "   " "     ft             
##  2 00002        Information        Location in cr… " "   " "     %              
##  3 00003        Information        Sampling depth… " "   " "     ft             
##  4 00004        Physical           Stream width, … " "   "Instr… ft             
##  5 00005        Information        Location in cr… " "   " "     %              
##  6 00008        Information        Sample account… " "   " "     nu             
##  7 00009        Information        Location in cr… " "   " "     ft             
##  8 00010        Physical           Temperature, w… " "   "Tempe… deg C          
##  9 00011        Physical           Temperature, w… " "   "Tempe… deg F          
## 10 00012        Physical           Evaporation te… " "   " "     deg C          
## # ℹ 24,190 more rows
srf_uv_parm_cd <- unique(df_srf_uv_data_avail$parm_cd)
srf_uv_parm_cd
##  [1] "00010" "00060" "00065" "00095" "00300" "00301" "00400" "00480" "32295"
## [10] "32316" "63680" "72137" "72255"
df_srf_uv_parm_cd <- as_tibble(parameterCdFile) %>% 
  filter(parameter_cd %in% srf_uv_parm_cd) %>% 
  select(parameter_cd, parameter_nm, parameter_units)
df_srf_uv_parm_cd
## # A tibble: 13 × 3
##    parameter_cd parameter_nm                                     parameter_units
##    <chr>        <chr>                                            <chr>          
##  1 00010        Temperature, water, degrees Celsius              deg C          
##  2 00060        Discharge, cubic feet per second                 ft3/s          
##  3 00065        Gage height, feet                                ft             
##  4 00095        Specific conductance, water, unfiltered, micros… uS/cm @25C     
##  5 00300        Dissolved oxygen, water, unfiltered, milligrams… mg/l           
##  6 00301        Dissolved oxygen, water, unfiltered, percent of… % saturatn     
##  7 00400        pH, water, unfiltered, field, standard units     std units      
##  8 00480        Salinity, water, unfiltered, parts per thousand  ppth           
##  9 32295        Dissolved organic matter fluorescence (fDOM), w… ug/l QSE       
## 10 32316        Chlorophyll fluorescence (fChl), water, in situ… ug/l           
## 11 63680        Turbidity, water, unfiltered, monochrome near i… FNU            
## 12 72137        Discharge, tidally filtered, cubic feet per sec… ft3/s          
## 13 72255        Mean water velocity for discharge computation, … ft/sec
as_tibble(df_srf_uv_data_avail) %>% 
  select(parm_cd, loc_web_ds, begin_date, end_date, count_nu) %>% 
  left_join(df_srf_uv_parm_cd, by = join_by(parm_cd == parameter_cd))
## # A tibble: 15 × 7
##    parm_cd loc_web_ds                begin_date end_date   count_nu parameter_nm
##    <chr>   <chr>                     <date>     <date>        <dbl> <chr>       
##  1 00010   [Right Bank Pump Stand]   2007-10-01 2024-04-17     6043 Temperature…
##  2 00010   BGC PROJECT, [East Fende… 2013-08-30 2024-04-17     3883 Temperature…
##  3 00060   <NA>                      2007-10-01 2024-04-17     6043 Discharge, …
##  4 00065   [PRIMARY]                 2007-10-01 2024-04-17     6043 Gage height…
##  5 00095   BGC PROJECT, [East Fende… 2013-08-30 2024-04-17     3883 Specific co…
##  6 00300   BGC PROJECT, [East Fende… 2013-08-30 2024-04-17     3883 Dissolved o…
##  7 00301   BGC PROJECT, [Computed]   2013-08-30 2024-04-17     3883 Dissolved o…
##  8 00400   BGC PROJECT, [East Fende… 2013-08-30 2024-04-17     3883 pH, water, …
##  9 00480   BGC PROJECT, [East Fende… 2013-08-30 2024-04-17     3883 Salinity, w…
## 10 32295   BGC PROJECT, [East Fende… 2021-04-18 2024-04-17     1095 Dissolved o…
## 11 32316   BGC PROJECT               2013-08-30 2024-04-17     3883 Chlorophyll…
## 12 63680   MEDIAN TS087: YSI model … 2009-12-03 2013-10-01     1398 Turbidity, …
## 13 63680   BGC PROJECT, [East Fende… 2013-08-30 2024-04-17     3883 Turbidity, …
## 14 72137   <NA>                      2015-10-01 2024-04-16     3120 Discharge, …
## 15 72255   <NA>                      2015-10-01 2024-04-17     3121 Mean water …
## # ℹ 1 more variable: parameter_units <chr>

Continuous data - instantaneous

df_srf_spc_inst <- readNWISuv(
  siteNumbers = "11447650",
  parameterCd = "00095", 
  startDate = "2023-01-01", endDate = "2023-12-31", 
  tz = "America/Los_Angeles"
)
as_tibble(df_srf_spc_inst)
## # A tibble: 34,216 × 6
##    agency_cd site_no  dateTime            X_00095_00000 X_00095_00000_cd tz_cd  
##    <chr>     <chr>    <dttm>                      <dbl> <chr>            <chr>  
##  1 USGS      11447650 2023-01-01 00:00:00           122 P                Americ…
##  2 USGS      11447650 2023-01-01 00:15:00           121 P                Americ…
##  3 USGS      11447650 2023-01-01 00:30:00           121 P                Americ…
##  4 USGS      11447650 2023-01-01 00:45:00           120 P                Americ…
##  5 USGS      11447650 2023-01-01 01:00:00           120 P                Americ…
##  6 USGS      11447650 2023-01-01 01:15:00           120 P                Americ…
##  7 USGS      11447650 2023-01-01 01:30:00           119 P                Americ…
##  8 USGS      11447650 2023-01-01 01:45:00           119 P                Americ…
##  9 USGS      11447650 2023-01-01 02:00:00           118 P                Americ…
## 10 USGS      11447650 2023-01-01 02:15:00           117 P                Americ…
## # ℹ 34,206 more rows

Continuous data - daily averages

df_srf_tfq_davg <- readNWISdv(
  siteNumbers = "11447650",
  parameterCd = "72137", 
  startDate = "2023-01-01", endDate = "2023-12-31"
)
as_tibble(df_srf_tfq_davg)
## # A tibble: 365 × 5
##    agency_cd site_no  Date       X_72137_00003 X_72137_00003_cd
##    <chr>     <chr>    <date>             <dbl> <chr>           
##  1 USGS      11447650 2023-01-01         56300 A               
##  2 USGS      11447650 2023-01-02         67700 A               
##  3 USGS      11447650 2023-01-03         70500 A               
##  4 USGS      11447650 2023-01-04         67400 A               
##  5 USGS      11447650 2023-01-05         65000 A               
##  6 USGS      11447650 2023-01-06         67900 A               
##  7 USGS      11447650 2023-01-07         73800 A               
##  8 USGS      11447650 2023-01-08         78700 A               
##  9 USGS      11447650 2023-01-09         79800 A               
## 10 USGS      11447650 2023-01-10         79900 A               
## # ℹ 355 more rows

WQP Web Services

Station Information

whatWQPsites(siteid = "USGS-11447650")
## # A tibble: 1 × 37
##   OrganizationIdentifier OrganizationFormalName           MonitoringLocationId…¹
##   <chr>                  <chr>                            <chr>                 
## 1 USGS-CA                USGS California Water Science C… USGS-11447650         
## # ℹ abbreviated name: ¹​MonitoringLocationIdentifier
## # ℹ 34 more variables: MonitoringLocationName <chr>,
## #   MonitoringLocationTypeName <chr>, MonitoringLocationDescriptionText <chr>,
## #   HUCEightDigitCode <chr>, DrainageAreaMeasure.MeasureValue <dbl>,
## #   DrainageAreaMeasure.MeasureUnitCode <chr>,
## #   ContributingDrainageAreaMeasure.MeasureValue <dbl>,
## #   ContributingDrainageAreaMeasure.MeasureUnitCode <chr>, …

Data availability

df_srf_nutr_data_avail <- readWQPsummary(
  siteid = "USGS-11447650", 
  characteristicType = "Nutrient"
)
## NEWS: USGS data availability and format are changing. 
## Beginning in mid-March 2024 the data obtained from legacy profiles
## will not include new USGS data or recent updates to existing data. 
## To view the status of changes in data availability and code functionality, visit:
## https://doi-usgs.github.io/dataRetrieval/articles/Status.html
## If you have additional questions about these changes, 
## email CompTools@usgs.gov.
df_srf_nutr_data_avail
## # A tibble: 480 × 19
##    Provider MonitoringLocationIdentifier YearSummarized CharacteristicType
##    <chr>    <chr>                                 <dbl> <chr>             
##  1 NWIS     USGS-11447650                          1958 Nutrient          
##  2 NWIS     USGS-11447650                          1959 Nutrient          
##  3 NWIS     USGS-11447650                          1960 Nutrient          
##  4 NWIS     USGS-11447650                          1961 Nutrient          
##  5 NWIS     USGS-11447650                          1962 Nutrient          
##  6 NWIS     USGS-11447650                          1963 Nutrient          
##  7 NWIS     USGS-11447650                          1964 Nutrient          
##  8 NWIS     USGS-11447650                          1965 Nutrient          
##  9 NWIS     USGS-11447650                          1966 Nutrient          
## 10 NWIS     USGS-11447650                          1966 Nutrient          
## # ℹ 470 more rows
## # ℹ 15 more variables: CharacteristicName <chr>, ActivityCount <dbl>,
## #   ResultCount <dbl>, LastResultSubmittedDate <chr>,
## #   OrganizationIdentifier <chr>, OrganizationFormalName <chr>,
## #   MonitoringLocationName <chr>, MonitoringLocationTypeName <chr>,
## #   ResolvedMonitoringLocationTypeName <chr>, HUCEightDigitCode <chr>,
## #   MonitoringLocationUrl <chr>, CountyName <chr>, StateName <chr>, …
df_srf_nutr_data_avail %>% summarize(
  min_yr = min(YearSummarized),
  max_yr = max(YearSummarized),
  num_samp = sum(ResultCount),
  .by = CharacteristicName
)
## # A tibble: 12 × 4
##    CharacteristicName                                     min_yr max_yr num_samp
##    <chr>                                                   <dbl>  <dbl>    <dbl>
##  1 Nitrate                                                  1958   2023     1470
##  2 Phosphate-phosphorus                                     1966   1992       54
##  3 Phosphorus                                               1970   2023     1053
##  4 Inorganic nitrogen (nitrate and nitrite)                 1971   2023      714
##  5 Kjeldahl nitrogen                                        1973   2023      851
##  6 Nitrite                                                  1973   2023     1110
##  7 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) an…   1973   2023     1298
##  8 Ammonia and ammonium                                     1977   2023     1349
##  9 Organic Nitrogen                                         1977   2023      919
## 10 Orthophosphate                                           1981   2023     1171
## 11 Nitrogen                                                 2001   2023      222
## 12 Particulate nitrogen/particulate organic carbon          2001   2018      115

Discrete WQ data

Import discrete nitrate data from the Water Quality Portal (WQP)

df_srf_nitrate <- readWQPqw(
  siteNumbers = "USGS-11447650",
  parameterCd = "Nitrate", 
  startDate = "2023-01-01", endDate = "2023-12-31", 
  tz = "America/Los_Angeles"
)
## NEWS: USGS data availability and format are changing. 
## Beginning in mid-March 2024 the data obtained from legacy profiles
## will not include new USGS data or recent updates to existing data. 
## To view the status of changes in data availability and code functionality, visit:
## https://doi-usgs.github.io/dataRetrieval/articles/Status.html
## If you have additional questions about these changes, 
## email CompTools@usgs.gov.
as_tibble(df_srf_nitrate) %>% 
  select(MonitoringLocationIdentifier, ActivityStartDateTime,
    CharacteristicName, USGSPCode, ResultMeasureValue)
## # A tibble: 42 × 5
##    MonitoringLocationIdenti…¹ ActivityStartDateTime CharacteristicName USGSPCode
##    <chr>                      <dttm>                <chr>              <chr>    
##  1 USGS-11447650              2023-01-04 10:15:00   Nitrate            00618    
##  2 USGS-11447650              2023-01-04 10:15:00   Nitrate            71851    
##  3 USGS-11447650              2023-01-11 08:45:00   Nitrate            71851    
##  4 USGS-11447650              2023-01-11 08:45:00   Nitrate            00618    
##  5 USGS-11447650              2023-01-12 09:30:00   Nitrate            00618    
##  6 USGS-11447650              2023-01-12 09:30:00   Nitrate            71851    
##  7 USGS-11447650              2023-01-31 09:30:00   Nitrate            00618    
##  8 USGS-11447650              2023-01-31 09:30:00   Nitrate            71851    
##  9 USGS-11447650              2023-02-07 13:15:00   Nitrate            00618    
## 10 USGS-11447650              2023-02-07 13:15:00   Nitrate            71851    
## # ℹ 32 more rows
## # ℹ abbreviated name: ¹​MonitoringLocationIdentifier
## # ℹ 1 more variable: ResultMeasureValue <dbl>

Other Information

More useful links for accessing USGS data through dataRetrieval:

USGS County and State codes

as_tibble(countyCd)
## # A tibble: 3,253 × 5
##    STATE COUNTY COUNTY_NAME     STUSAB COUNTY_ID
##    <chr> <chr>  <chr>           <chr>  <chr>    
##  1 00    000    Unspecified     <NA>   <NA>     
##  2 01    001    Autauga County  AL     H1       
##  3 01    003    Baldwin County  AL     H1       
##  4 01    005    Barbour County  AL     H1       
##  5 01    007    Bibb County     AL     H1       
##  6 01    009    Blount County   AL     H1       
##  7 01    011    Bullock County  AL     H1       
##  8 01    013    Butler County   AL     H1       
##  9 01    015    Calhoun County  AL     H1       
## 10 01    017    Chambers County AL     H1       
## # ℹ 3,243 more rows
as_tibble(stateCd)
## # A tibble: 58 × 4
##    STATE STATE_NAME           STUSAB STATENS 
##    <chr> <chr>                <chr>  <chr>   
##  1 00    Unspecified          <NA>   <NA>    
##  2 01    Alabama              AL     01779775
##  3 02    Alaska               AK     01785533
##  4 04    Arizona              AZ     01779777
##  5 05    Arkansas             AR     00068085
##  6 06    California           CA     01779778
##  7 08    Colorado             CO     01779779
##  8 09    Connecticut          CT     01779780
##  9 10    Delaware             DE     01779781
## 10 11    District of Columbia DC     01702382
## # ℹ 48 more rows

cder

Package documentation: https://hydroecology.net/cder/index.html
Available on CRAN: https://cran.r-project.org/web/packages/cder/index.html

Station Information

library(cder)
cdec_meta(station = "FPT")

Import data

df_fpt_turb <- cdec_query(
  stations = "FPT", sensors = 221, durations = "E", 
  start.date = "2024-03-01", end.date = "2024-03-31"
)
df_fpt_turb
## # A tibble: 2,877 × 9
##    StationID Duration SensorNumber SensorType DateTime           
##    <chr>     <chr>           <int> <chr>      <dttm>             
##  1 FPT       E                 221 TURB WF    2024-03-01 00:00:00
##  2 FPT       E                 221 TURB WF    2024-03-01 00:15:00
##  3 FPT       E                 221 TURB WF    2024-03-01 00:30:00
##  4 FPT       E                 221 TURB WF    2024-03-01 00:45:00
##  5 FPT       E                 221 TURB WF    2024-03-01 01:00:00
##  6 FPT       E                 221 TURB WF    2024-03-01 01:15:00
##  7 FPT       E                 221 TURB WF    2024-03-01 01:30:00
##  8 FPT       E                 221 TURB WF    2024-03-01 01:45:00
##  9 FPT       E                 221 TURB WF    2024-03-01 02:00:00
## 10 FPT       E                 221 TURB WF    2024-03-01 02:15:00
## # ℹ 2,867 more rows
## # ℹ 4 more variables: ObsDate <dttm>, Value <dbl>, DataFlag <chr>,
## #   SensorUnits <chr>

IEP Integrated Datasets

deltafish

Available on GitHub: https://github.com/Delta-Stewardship-Council/deltafish
EDI data repository: https://portal.edirepository.org/nis/mapbrowse?scope=edi&identifier=1075

# install.packages("devtools")
# devtools::install_github("Delta-Stewardship-Council/deltafish")
library(deltafish)
## Loading required package: arrow
## 
## Attaching package: 'arrow'
## The following object is masked from 'package:lubridate':
## 
##     duration
## The following object is masked from 'package:utils':
## 
##     timestamp
# Build the database - this takes a while, use update = TRUE to 
  # re-build cached database
create_fish_db()
## Reading data from cache directory, revision edi.1075.1
## [1] "C:\\Users\\dboswort\\AppData\\Local/deltafish/deltafish/Cache"
# Open two data files
surv <- open_survey()
fish <- open_fish()
## Note: fish length units are not all compatible across surveys. The Suisun study uses standard length while the other surveys use fork/total length. Use deltafish::convert_lengths to convert the length units of the 20 taxa for which we have conversion equations from standard length to fork/total length.
# Filter for sources and taxa of interest and join them together
surv_FMWT <- surv %>% filter(Source == "FMWT") %>% select(SampleID, Date)

fish_smelt <- fish %>% 
  filter(Taxa %in% c("Dorosoma petenense", "Morone saxatilis", "Spirinchus thaleichthys"))

df_fish <- left_join(surv_FMWT, fish_smelt)

# Collect the resulting data frame - collect executes
  # the SQL query and gives you a table
df_fish_c <- collect(df_fish)
df_fish_c
## # A tibble: 175,009 × 6
##    SampleID Date       Length Count Notes_catch Taxa                   
##    <chr>    <date>      <dbl> <dbl> <chr>       <chr>                  
##  1 FMWT 1   1992-01-10     NA     0 <NA>        Morone saxatilis       
##  2 FMWT 1   1992-01-10     NA     0 <NA>        Dorosoma petenense     
##  3 FMWT 1   1992-01-10     NA     0 <NA>        Spirinchus thaleichthys
##  4 FMWT 2   1992-02-07     NA     0 <NA>        Morone saxatilis       
##  5 FMWT 2   1992-02-07     NA     0 <NA>        Dorosoma petenense     
##  6 FMWT 2   1992-02-07     NA     0 <NA>        Spirinchus thaleichthys
##  7 FMWT 3   1992-03-18     NA     0 <NA>        Morone saxatilis       
##  8 FMWT 3   1992-03-18     NA     0 <NA>        Dorosoma petenense     
##  9 FMWT 3   1992-03-18     NA     0 <NA>        Spirinchus thaleichthys
## 10 FMWT 4   1992-09-16     NA     0 <NA>        Morone saxatilis       
## # ℹ 174,999 more rows

zooper

Available on GitHub: https://github.com/InteragencyEcologicalProgram/zooper
EDI data repository: https://portal.edirepository.org/nis/mapbrowse?scope=edi&identifier=539

# install.packages("devtools")
# devtools::install_github("InteragencyEcologicalProgram/zooper")
library(zooper)

df_zoop <- Zoopsynther(
  Data_type = "Community", Response = c("CPUE", "BPUE"),
  Sources = c("EMP", "FRP", "FMWT"), Size_class = "Meso",
  Date_range = c("1990-10-01", "2000-09-30")
)
## [1] "No disclaimers here! Enjoy the clean data!"
df_zoop
## # A tibble: 151,478 × 35
##    Source SizeClass Volume Lifestage Taxname     Phylum Class Order Family Genus
##    <chr>  <chr>      <dbl> <chr>     <chr>       <chr>  <chr> <chr> <chr>  <chr>
##  1 EMP    Meso        10.6 Adult     Acanthocyc… Arthr… Cope… Cycl… Cyclo… Acan…
##  2 EMP    Meso        10.6 Adult     Acartia_Un… Arthr… Cope… Cala… Acart… Acar…
##  3 EMP    Meso        10.6 Adult     Acartiella… Arthr… Cope… Cala… Acart… Acar…
##  4 EMP    Meso        10.6 Adult     Asplanchna… Rotif… Euro… Ploi… Aspla… Aspl…
##  5 EMP    Meso        10.6 Adult     Bosmina lo… Arthr… Bran… Clad… Bosmi… Bosm…
##  6 EMP    Meso        10.6 Adult     Calanoida_… Arthr… Cope… Cala… <NA>   <NA> 
##  7 EMP    Meso        10.6 Adult     Cladocera_… Arthr… Bran… Clad… <NA>   <NA> 
##  8 EMP    Meso        10.6 Adult     Cyclopoida… Arthr… Cope… Cycl… <NA>   <NA> 
##  9 EMP    Meso        10.6 Adult     Daphnia_Un… Arthr… Bran… Clad… Daphn… Daph…
## 10 EMP    Meso        10.6 Adult     Diaphanoso… Arthr… Bran… Clad… Sidid… Diap…
## # ℹ 151,468 more rows
## # ℹ 25 more variables: Species <chr>, Taxlifestage <chr>, SampleID <chr>,
## #   CPUE <dbl>, BPUE <dbl>, Undersampled <lgl>, Date <dttm>, Station <chr>,
## #   Chl <dbl>, Secchi <dbl>, Temperature <dbl>, BottomDepth <dbl>, Tide <chr>,
## #   TowType <chr>, Datetime <dttm>, Turbidity <dbl>, pH <dbl>, DO <dbl>,
## #   Microcystis <chr>, Year <dbl>, AmphipodCode <chr>, SalSurf <dbl>,
## #   SalBott <dbl>, Latitude <dbl>, Longitude <dbl>

discretewq

Available on GitHub: https://github.com/InteragencyEcologicalProgram/discretewq
EDI data repository: https://portal.edirepository.org/nis/mapbrowse?scope=edi&identifier=731

# install.packages("devtools")
# devtools::install_github("InteragencyEcologicalProgram/discretewq")
library(discretewq)

df_dwq <- wq(
  Sources = c("EMP", "NCRO", "USGS_CAWSC", "USGS_SFBS"), 
  Start_year = 2020, End_year = 2022
)
df_dwq
## # A tibble: 5,176 × 78
##    Source Station Latitude Longitude Field_coords Date               
##    <chr>  <chr>      <dbl>     <dbl> <lgl>        <dttm>             
##  1 EMP    C10A        37.7     -121. FALSE        2020-01-14 00:00:00
##  2 EMP    C3A         38.4     -122. FALSE        2020-01-14 00:00:00
##  3 EMP    C9          37.8     -122. FALSE        2020-01-14 00:00:00
##  4 EMP    D12         38.0     -122. FALSE        2020-01-15 00:00:00
##  5 EMP    D19         38.0     -122. FALSE        2020-01-15 00:00:00
##  6 EMP    D28A        38.0     -122. FALSE        2020-01-15 00:00:00
##  7 EMP    D16         38.1     -122. FALSE        2020-01-16 00:00:00
##  8 EMP    D26         38.1     -122. FALSE        2020-01-16 00:00:00
##  9 EMP    MD10A       38.0     -121. FALSE        2020-01-16 00:00:00
## 10 EMP    P8          38.0     -121. FALSE        2020-01-16 00:00:00
## # ℹ 5,166 more rows
## # ℹ 72 more variables: Datetime <dttm>, Notes <chr>, Depth <dbl>, Tide <chr>,
## #   Microcystis <dbl>, Chlorophyll_Sign <chr>, Chlorophyll <dbl>, Secchi <dbl>,
## #   Temperature <dbl>, Temperature_bottom <dbl>, Conductivity <dbl>,
## #   Conductivity_bottom <dbl>, DissolvedOxygen <dbl>,
## #   DissolvedOxygen_bottom <dbl>, DissolvedOxygenPercent <dbl>,
## #   DissolvedOxygenPercent_bottom <dbl>, pH <dbl>, pH_bottom <dbl>, …

deltamapr

Available on GitHub: https://github.com/InteragencyEcologicalProgram/deltamapr

# install.packages("devtools")
# devtools::install_github("InteragencyEcologicalProgram/deltamapr")
library(deltamapr)
library(sf)
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
WW_Delta
## Simple feature collection with 282 features and 9 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -122.6408 ymin: 37.41522 xmax: -120.9357 ymax: 38.67781
## Geodetic CRS:  NAD83
## # A tibble: 282 × 10
##         AREA PERIMETER HYDRO_POLY HYDRO_PO_1 HYDRO_24K_ TYPE  HNAME   Shape_Leng
##        <dbl>     <dbl>      <int>      <int>      <int> <chr> <chr>        <dbl>
##  1 73544304   1033340         791        797        798 MR    SACRAM…    2.45   
##  2    87637.     3319.       1965       1963       1964 S     W          0.0357 
##  3  7915130     87428.       1967       1965       1966 C     SACTO.…    0.829  
##  4   103906      2719.       1970       1969       1970 L     GREENS…    0.0264 
##  5   106371      2798.       1977       1974       1975 L     LAKE W…    0.0283 
##  6   159485      3392.       1982       1978       1979 S     W          0.0314 
##  7    42597.     1003.       1992       1989       1990 S     W          0.00952
##  8     5648.      498.       2001       2008       2009 MR    SOUTH …    0.00548
##  9     4139.      502.       2006       2012       2013 MR    SOUTH …    0.00536
## 10    97840.     6630.       2008       2011       2012 MR    SOUTH …    0.0746 
## # ℹ 272 more rows
## # ℹ 2 more variables: Shape_Area <dbl>, geometry <POLYGON [°]>
ggplot(WW_Delta) + geom_sf() + theme_bw()

The following is additional info that’s useful but not included in tutorial.

Web Scraping

Package documentation: https://rvest.tidyverse.org/index.html
Available on CRAN: https://cloud.r-project.org/web/packages/rvest/index.html

library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
url_pforb <- "https://invasions.si.edu/nemesis/species_summary/-218"
html_pforb <- read_html(url_pforb)
html_elements(html_pforb, "table")
## {xml_nodeset (6)}
## [1] <table>\n<tr>\n<td><strong>Kingdom:</strong></td>\n                    <t ...
## [2] <table class="table table-striped">\n<tr>\n<td>General Habitat</td>\n<td> ...
## [3] <table class="table table-striped">\n<tr>\n<td>Minimum Temperature (ºC)</ ...
## [4] <table class="table table-striped">\n<tr>\n<td>NEP-V</td>\n<td colspan="2 ...
## [5] <table class="table table-striped" id="dist_table">\n<thead><tr>\n<th>Bio ...
## [6] <table class="table table-striped" id="occ_table">\n<thead><tr>\n<th>OCC_ ...
df_pforb_traits <- html_elements(html_pforb, "table")[[3]] %>% html_table()
df_pforb_traits
## # A tibble: 8 × 3
##   X1                       X2    X3                                             
##   <chr>                    <chr> <chr>                                          
## 1 Minimum Temperature (ºC) 3     Field, Columbia River estuary (Bollens et al. …
## 2 Maximum Temperature (ºC) 27    Maximum in San Joaquin river, near Stockton (O…
## 3 Minimum Salinity (‰)     0     Field, San Francisco Bay (Orsi and Walter 1991)
## 4 Maximum Salinity (‰)     16    Field, San Francisco Bay (Orsi and Walter 1991…
## 5 Minimum Length (mm)      1     Minimum adult length, male (Orsi and Walter 19…
## 6 Maximum Length (mm)      1.2   Maximum adult length, female (Orsi and Walter …
## 7 Broad Temperature Range  None  Warm temperate                                 
## 8 Broad Salinity Range     None  Nontidal Limnetic-Mesohaline

Use the SelectorGadget tool to find CSS selectors on a webpage: https://selectorgadget.com/

Extract data from .pdf

Package documentation: https://docs.ropensci.org/pdftools/
Available on CRAN: https://cloud.r-project.org/web/packages/pdftools/index.html

library(pdftools)
## Using poppler version 22.04.0
# Download Jan 2024 Delta Outflow Computation report from the USBR website to
# the temporary R directory
url_usbr_dout <- "https://www.usbr.gov/mp/cvo/vungvari/dout0124.pdf"
download.file(url_usbr_dout, file.path(tempdir(), "Delta_Outflow_0124.pdf"), mode = "wb")
usbr_pdf_txt <- pdf_text(file.path(tempdir(), "Delta_Outflow_0124.pdf")) %>% read_lines()
usbr_pdf_txt
##  [1] "02/01/24"                                                                                                                                                                                                                          
##  [2] "                                                                                       U.S. Bureau of Reclamation - Central Valley Operations Office"                                                                              
##  [3] "                                                                                                Delta Outflow Computation (values in c.f.s.)"                                                                                      
##  [4] ""                                                                                                                                                                                                                                  
##  [5] "Preliminary Data - Subject to Change                                                                                 January 2024"                                                                                                 
##  [6] "Estimated numbers are in bold Italic print"                                                                                                                                                                                        
##  [7] "                                                          DELTA INFLOW                                                                   DELTA EXPORTS                                   OUTFLOW INDEX          EXPORT/INFLOW"     
##  [8] "              Sacto R                        Yolo +    East Side S. Joaquin River @ Vernalis    Total               Clifton            Contra Byron          Total     3-day"                                                      
##  [9] "   Date                                                                                                  NDCU"                                                                                                                     
## [10] "             @Freeport SRTP                   Misc     Streams              7-day   Monthly     Delta               Court      Tracy   Costa Bethany NBA     Delta      Avg      NDOI        7-day    Monthly   Daily 3 Day 14 Day"
## [11] "                prev dy     prev wk          prev dy    prev dy   prev dy    Avg      Avg       Inflow              (CLT)      (TRA)   (CCC) (BBID)         Exports TRA & CLT    daily        Avg        Avg    (%)   (%)    (%)"  
## [12] " 01/01/24        19,294        210             159        618      1,429     1,356    1,377     21,710     -4,818    2,290     3,547    104    5      32     5,968     7,978     20,561      11,399    20,561   27%   41%    43%"  
## [13] " 01/02/24        23,070        227              89        610      1,377     1,361    1,371     25,373     -3,777    2,298     3,540    108    3      30     5,974     6,907     23,177      13,381    21,869   23%   31%    35%"  
## [14] " 01/03/24        23,756        244              63        608      1,365     1,375    1,384     26,036     -5,881    2,097     3,546    108    3      33     5,782     5,773     26,136      15,980    23,291   22%   24%    28%"  
## [15] " 01/04/24        22,446        261              90        691      1,409     1,389    1,393     24,897     -2,280    2,093     3,532    104    5      29     5,753     5,702     21,424      17,927    22,824   23%   22%    27%"  
## [16] " 01/05/24        20,925        279             109        696      1,421     1,397    1,391     23,430      -960     2,191     3,507    109    4      35     5,838     5,655     18,552      19,423    21,970   24%   23%    27%"  
## [17] " 01/06/24        21,079        296              70        604      1,385     1,395    1,389     23,434     -1,010    2,395     3,476    110    3      32     6,012     5,731     18,432      20,479    21,380   25%   24%    27%"  
## [18] " 01/07/24        21,235        313              64        584      1,379     1,422    1,422     23,575     -1,745    2,393     3,484    111    5      35     6,018     5,815     19,302      21,083    21,083   25%   25%    27%"  
## [19] " 01/08/24        20,019        330              73        583      1,615     1,484    1,470     22,620       31      2,389     3,480    110    4      28     6,004     5,872     16,586      20,515    20,521   26%   25%    27%"  
## [20] " 01/09/24        18,320        330              64        562      1,812     1,553    1,513     21,088       487     2,697     3,466    92     4      36     6,288     5,970     14,312      19,249    19,831   29%   27%    28%"  
## [21] " 01/10/24        17,364        330              57        541      1,852     1,621    1,550     20,144       437     2,696     3,471    80     4      34     6,277     6,067     13,430      17,434    19,191   31%   28%    28%"  
## [22] " 01/11/24        16,624        330              58        555      1,885     1,687    1,580     19,452       159     2,599     3,468    82     4      29     6,175     6,133     13,118      16,247    18,639   31%   30%    28%"  
## [23] " 01/12/24        16,920        330              59        703      1,881     1,756    1,604     19,893       844     2,593     3,488    98     5      31     6,206     6,105     12,844      15,432    18,156   31%   31%    28%"  
## [24] " 01/13/24        16,540        330              59        670      1,868     1,824    1,624     19,467      1,022    2,592     3,553    90     5      38     6,268     6,098     12,177      14,538    17,696   32%   31%    27%"  
## [25] " 01/14/24        16,299        330              59        631      1,857     1,861    1,641     19,176     -1,489    1,791     3,554    62     5      43     5,445     5,857     15,219      13,955    17,519   28%   30%    26%"  
## [26] " 01/15/24        16,793        330             350        882      1,870     1,873    1,658     20,225     -1,539    1,793     3,548    55     4      30     5,422     5,610     16,342      13,920    17,441   26%   29%    25%"  
## [27] " 01/16/24        18,048        330             202        921      1,897     1,878    1,672     21,398     -1,310    1,795     3,541    54     5      35     5,420     5,341     17,288      14,345    17,431   25%   26%    25%"  
## [28] " 01/17/24        24,096        330             140        773      1,885     1,883    1,687     27,224     -3,643    1,796     3,534    67     6      31     5,423     5,336     25,443      16,062    17,902   20%   23%    24%"  
## [29] " 01/18/24        25,440        330             164        852      1,921     1,893    1,702     28,707     -3,871    1,799     3,547    84     4      33     5,458     5,337     27,120      18,062    18,415   19%   21%    24%"  
## [30] " 01/19/24        23,501        330             157        969      1,950     1,894    1,711     26,907     -1,410    1,894     3,556    81     4      20     5,547     5,375     22,771      19,480    18,644   20%   19%    24%"  
## [31] " 01/20/24        24,646        330             133        872      1,879     1,895    1,719     27,860     -1,639    1,796     3,582    85     4      35     5,494     5,391     24,004      21,170    18,912   19%   19%    24%"  
## [32] " 01/21/24        25,386        330             307        910      1,866     1,910    1,731     28,799     -3,236    1,794     3,589    84     3      31     5,496     5,404     26,539      22,787    19,275   19%   19%    23%"  
## [33] " 01/22/24        25,500        330             444       1,337     1,973     1,981    1,761     29,584     -3,971    1,893     3,597    88     4      31     5,606     5,417     27,949      24,445    19,669   19%   19%    23%"  
## [34] " 01/23/24        29,219        330            3,004      1,757     2,391     2,198    1,832     36,701     -9,904     293      3,586    103    5      31     4,009     4,917     42,597      28,060    20,666   11%   16%    20%"  
## [35] " 01/24/24        38,607        330             794       1,760     3,408     2,517    1,929     44,899    -10,183     470      2,665    104    3      23     3,260     4,168     51,822      31,829    21,964   7%    11%    16%"  
## [36] " 01/25/24        42,466        330             480       1,424     4,154     2,784    2,005     48,854    -11,780    1,089     2,666    102    4      33     3,886     3,590     56,748      36,061    23,356   8%    8%     13%"  
## [37] " 01/26/24        44,134        330             417       1,356     3,817     3,014    2,062     50,054    -10,183    1,287     2,658    100    5      33     4,073     3,612     56,163      40,832    24,617   8%    8%     12%"  
## [38] " 01/27/24        45,171        330             272       1,241     3,488     3,234    2,112     50,502     -7,216    1,087     2,651    103    3      31     3,870     3,813     53,847      45,095    25,700   7%    8%     12%"  
## [39] " 01/28/24        44,661        330             214       1,106     3,409     3,453    2,161     49,720     -1,104    1,095     2,651    105    4      31     3,878     3,810     46,946      48,010    26,459   8%    8%     11%"  
## [40] " 01/29/24        42,797        330             184       1,069     3,502     3,595    2,203     47,882      -876     1,992     1,826    118    3      32     3,965     3,767     44,793      50,417    27,091   8%    8%     10%"  
## [41] " 01/30/24        38,357        330             166       1,050     3,385     3,577    2,240     43,288       950     1,995     1,827    93     0      35     3,951     3,795     38,387      49,815    27,468   9%    8%     10%"  
## [42] " 01/31/24        34,283        330             152        990      3,286     3,448    2,272     39,041       950     1,096     2,763    103    4      32     3,990     3,833     34,101      47,284    27,682   10%   9%     10%"  
## [43] ""                                                                                                                                                                                                                                  
## [44] "    Total      816,996       9,750           8,653     27,925     68,616    64,508   53,166    931,940   (88,945)   58,081    100,898 2,899   116     994   162,756   164,179   858,128     764,716   657,222    -     -     -"    
## [45] " Average        26,355        315             279       901        2,213     2,081    1,715     30,063    (2,869)    1,874     3,255    94     4      32     5,250     5,296     27,682      24,668    21,201   20%   21%   23%"   
## [46] ""                                                                                                                                                                                                                                  
## [47] ""                                                                                                                                                                                                                                  
## [48] ""                                                                                                                                                                                                                                  
## [49] ""                                                                                                                                                                                                                                  
## [50] "                                                                                PRELIMINARY"                                                                                                                                       
## [51] "                                                                                                                                                                                                         2/1/2024 7:24 AM"
# Keep "rows" 12-42 in the data and convert to a matrix with 23 columns
usbr_pdf_mat <- usbr_pdf_txt[12:42] %>%
  str_squish() %>%
  str_split_fixed(pattern = " ", n = 23)
usbr_pdf_mat
##       [,1]       [,2]     [,3]  [,4]    [,5]    [,6]    [,7]    [,8]   
##  [1,] "01/01/24" "19,294" "210" "159"   "618"   "1,429" "1,356" "1,377"
##  [2,] "01/02/24" "23,070" "227" "89"    "610"   "1,377" "1,361" "1,371"
##  [3,] "01/03/24" "23,756" "244" "63"    "608"   "1,365" "1,375" "1,384"
##  [4,] "01/04/24" "22,446" "261" "90"    "691"   "1,409" "1,389" "1,393"
##  [5,] "01/05/24" "20,925" "279" "109"   "696"   "1,421" "1,397" "1,391"
##  [6,] "01/06/24" "21,079" "296" "70"    "604"   "1,385" "1,395" "1,389"
##  [7,] "01/07/24" "21,235" "313" "64"    "584"   "1,379" "1,422" "1,422"
##  [8,] "01/08/24" "20,019" "330" "73"    "583"   "1,615" "1,484" "1,470"
##  [9,] "01/09/24" "18,320" "330" "64"    "562"   "1,812" "1,553" "1,513"
## [10,] "01/10/24" "17,364" "330" "57"    "541"   "1,852" "1,621" "1,550"
## [11,] "01/11/24" "16,624" "330" "58"    "555"   "1,885" "1,687" "1,580"
## [12,] "01/12/24" "16,920" "330" "59"    "703"   "1,881" "1,756" "1,604"
## [13,] "01/13/24" "16,540" "330" "59"    "670"   "1,868" "1,824" "1,624"
## [14,] "01/14/24" "16,299" "330" "59"    "631"   "1,857" "1,861" "1,641"
## [15,] "01/15/24" "16,793" "330" "350"   "882"   "1,870" "1,873" "1,658"
## [16,] "01/16/24" "18,048" "330" "202"   "921"   "1,897" "1,878" "1,672"
## [17,] "01/17/24" "24,096" "330" "140"   "773"   "1,885" "1,883" "1,687"
## [18,] "01/18/24" "25,440" "330" "164"   "852"   "1,921" "1,893" "1,702"
## [19,] "01/19/24" "23,501" "330" "157"   "969"   "1,950" "1,894" "1,711"
## [20,] "01/20/24" "24,646" "330" "133"   "872"   "1,879" "1,895" "1,719"
## [21,] "01/21/24" "25,386" "330" "307"   "910"   "1,866" "1,910" "1,731"
## [22,] "01/22/24" "25,500" "330" "444"   "1,337" "1,973" "1,981" "1,761"
## [23,] "01/23/24" "29,219" "330" "3,004" "1,757" "2,391" "2,198" "1,832"
## [24,] "01/24/24" "38,607" "330" "794"   "1,760" "3,408" "2,517" "1,929"
## [25,] "01/25/24" "42,466" "330" "480"   "1,424" "4,154" "2,784" "2,005"
## [26,] "01/26/24" "44,134" "330" "417"   "1,356" "3,817" "3,014" "2,062"
## [27,] "01/27/24" "45,171" "330" "272"   "1,241" "3,488" "3,234" "2,112"
## [28,] "01/28/24" "44,661" "330" "214"   "1,106" "3,409" "3,453" "2,161"
## [29,] "01/29/24" "42,797" "330" "184"   "1,069" "3,502" "3,595" "2,203"
## [30,] "01/30/24" "38,357" "330" "166"   "1,050" "3,385" "3,577" "2,240"
## [31,] "01/31/24" "34,283" "330" "152"   "990"   "3,286" "3,448" "2,272"
##       [,9]     [,10]     [,11]   [,12]   [,13] [,14] [,15] [,16]   [,17]  
##  [1,] "21,710" "-4,818"  "2,290" "3,547" "104" "5"   "32"  "5,968" "7,978"
##  [2,] "25,373" "-3,777"  "2,298" "3,540" "108" "3"   "30"  "5,974" "6,907"
##  [3,] "26,036" "-5,881"  "2,097" "3,546" "108" "3"   "33"  "5,782" "5,773"
##  [4,] "24,897" "-2,280"  "2,093" "3,532" "104" "5"   "29"  "5,753" "5,702"
##  [5,] "23,430" "-960"    "2,191" "3,507" "109" "4"   "35"  "5,838" "5,655"
##  [6,] "23,434" "-1,010"  "2,395" "3,476" "110" "3"   "32"  "6,012" "5,731"
##  [7,] "23,575" "-1,745"  "2,393" "3,484" "111" "5"   "35"  "6,018" "5,815"
##  [8,] "22,620" "31"      "2,389" "3,480" "110" "4"   "28"  "6,004" "5,872"
##  [9,] "21,088" "487"     "2,697" "3,466" "92"  "4"   "36"  "6,288" "5,970"
## [10,] "20,144" "437"     "2,696" "3,471" "80"  "4"   "34"  "6,277" "6,067"
## [11,] "19,452" "159"     "2,599" "3,468" "82"  "4"   "29"  "6,175" "6,133"
## [12,] "19,893" "844"     "2,593" "3,488" "98"  "5"   "31"  "6,206" "6,105"
## [13,] "19,467" "1,022"   "2,592" "3,553" "90"  "5"   "38"  "6,268" "6,098"
## [14,] "19,176" "-1,489"  "1,791" "3,554" "62"  "5"   "43"  "5,445" "5,857"
## [15,] "20,225" "-1,539"  "1,793" "3,548" "55"  "4"   "30"  "5,422" "5,610"
## [16,] "21,398" "-1,310"  "1,795" "3,541" "54"  "5"   "35"  "5,420" "5,341"
## [17,] "27,224" "-3,643"  "1,796" "3,534" "67"  "6"   "31"  "5,423" "5,336"
## [18,] "28,707" "-3,871"  "1,799" "3,547" "84"  "4"   "33"  "5,458" "5,337"
## [19,] "26,907" "-1,410"  "1,894" "3,556" "81"  "4"   "20"  "5,547" "5,375"
## [20,] "27,860" "-1,639"  "1,796" "3,582" "85"  "4"   "35"  "5,494" "5,391"
## [21,] "28,799" "-3,236"  "1,794" "3,589" "84"  "3"   "31"  "5,496" "5,404"
## [22,] "29,584" "-3,971"  "1,893" "3,597" "88"  "4"   "31"  "5,606" "5,417"
## [23,] "36,701" "-9,904"  "293"   "3,586" "103" "5"   "31"  "4,009" "4,917"
## [24,] "44,899" "-10,183" "470"   "2,665" "104" "3"   "23"  "3,260" "4,168"
## [25,] "48,854" "-11,780" "1,089" "2,666" "102" "4"   "33"  "3,886" "3,590"
## [26,] "50,054" "-10,183" "1,287" "2,658" "100" "5"   "33"  "4,073" "3,612"
## [27,] "50,502" "-7,216"  "1,087" "2,651" "103" "3"   "31"  "3,870" "3,813"
## [28,] "49,720" "-1,104"  "1,095" "2,651" "105" "4"   "31"  "3,878" "3,810"
## [29,] "47,882" "-876"    "1,992" "1,826" "118" "3"   "32"  "3,965" "3,767"
## [30,] "43,288" "950"     "1,995" "1,827" "93"  "0"   "35"  "3,951" "3,795"
## [31,] "39,041" "950"     "1,096" "2,763" "103" "4"   "32"  "3,990" "3,833"
##       [,18]    [,19]    [,20]    [,21] [,22] [,23]
##  [1,] "20,561" "11,399" "20,561" "27%" "41%" "43%"
##  [2,] "23,177" "13,381" "21,869" "23%" "31%" "35%"
##  [3,] "26,136" "15,980" "23,291" "22%" "24%" "28%"
##  [4,] "21,424" "17,927" "22,824" "23%" "22%" "27%"
##  [5,] "18,552" "19,423" "21,970" "24%" "23%" "27%"
##  [6,] "18,432" "20,479" "21,380" "25%" "24%" "27%"
##  [7,] "19,302" "21,083" "21,083" "25%" "25%" "27%"
##  [8,] "16,586" "20,515" "20,521" "26%" "25%" "27%"
##  [9,] "14,312" "19,249" "19,831" "29%" "27%" "28%"
## [10,] "13,430" "17,434" "19,191" "31%" "28%" "28%"
## [11,] "13,118" "16,247" "18,639" "31%" "30%" "28%"
## [12,] "12,844" "15,432" "18,156" "31%" "31%" "28%"
## [13,] "12,177" "14,538" "17,696" "32%" "31%" "27%"
## [14,] "15,219" "13,955" "17,519" "28%" "30%" "26%"
## [15,] "16,342" "13,920" "17,441" "26%" "29%" "25%"
## [16,] "17,288" "14,345" "17,431" "25%" "26%" "25%"
## [17,] "25,443" "16,062" "17,902" "20%" "23%" "24%"
## [18,] "27,120" "18,062" "18,415" "19%" "21%" "24%"
## [19,] "22,771" "19,480" "18,644" "20%" "19%" "24%"
## [20,] "24,004" "21,170" "18,912" "19%" "19%" "24%"
## [21,] "26,539" "22,787" "19,275" "19%" "19%" "23%"
## [22,] "27,949" "24,445" "19,669" "19%" "19%" "23%"
## [23,] "42,597" "28,060" "20,666" "11%" "16%" "20%"
## [24,] "51,822" "31,829" "21,964" "7%"  "11%" "16%"
## [25,] "56,748" "36,061" "23,356" "8%"  "8%"  "13%"
## [26,] "56,163" "40,832" "24,617" "8%"  "8%"  "12%"
## [27,] "53,847" "45,095" "25,700" "7%"  "8%"  "12%"
## [28,] "46,946" "48,010" "26,459" "8%"  "8%"  "11%"
## [29,] "44,793" "50,417" "27,091" "8%"  "8%"  "10%"
## [30,] "38,387" "49,815" "27,468" "9%"  "8%"  "10%"
## [31,] "34,101" "47,284" "27,682" "10%" "9%"  "10%"
# Keep the columns for Date, Export, and Outflow; rename them; convert to tibble
usbr_pdf_mat2 <- usbr_pdf_mat[,c(1, 16, 18)]
colnames(usbr_pdf_mat2) <- c("Date", "Export", "Outflow")
df_usbr_dout <- as_tibble(usbr_pdf_mat2)
df_usbr_dout
## # A tibble: 31 × 3
##    Date     Export Outflow
##    <chr>    <chr>  <chr>  
##  1 01/01/24 5,968  20,561 
##  2 01/02/24 5,974  23,177 
##  3 01/03/24 5,782  26,136 
##  4 01/04/24 5,753  21,424 
##  5 01/05/24 5,838  18,552 
##  6 01/06/24 6,012  18,432 
##  7 01/07/24 6,018  19,302 
##  8 01/08/24 6,004  16,586 
##  9 01/09/24 6,288  14,312 
## 10 01/10/24 6,277  13,430 
## # ℹ 21 more rows