This document provides examples and code for commonly-plotted data types using data collected in the San Francisco Estuary.
Load in packages
knitr::opts_chunk$set(warning=FALSE, message=F)
library(ggplot2) # Plotting data
library(dplyr) # Manipulating data
library(tidyr) # Pivot, tidy data
library(lubridate) # Easy ways to define time variables
library(viridis) # Color-blind friendly palettes
library(readr) # For reading and writing data faster
library(here) # For helping read in data - starts with your project directory filepath
library(sf) # For working with spatial data
library(deltamapr) # See instructions for downloading this basemap package https://github.com/InteragencyEcologicalProgram/deltamapr
library(ggspatial) # for north arrow and scale bar
library(mapview) # forinteractive plots
library(ggridges) # Add ridge plot support for ggplot
# Setting ggplot theme, useful to help with consistency and allows you to get away with not specifying everytime
theme_set(theme_classic(base_size = 15))
Some preliminary data reading and manipulation has been done in
prepare_data.R
.
df_wytype <- read_csv(here("figure_gallery/data/processed/wytype_1906_cur.csv"))
#plot water year index by year, color-coding by water year type.
ggplot(df_wytype, aes(x = Year, y = Sac_Index, fill = Sac_WYtype)) + geom_col()+
scale_fill_manual(values = c("firebrick", "orange", "yellow","skyblue", "blue"), name = "Water Year Type")+
ylab("Sacramento Valley Index")+ theme_bw()
ggplot(df_wytype, aes(x = Year, y = SJ_Index, fill = SJ_WYtype)) + geom_col()+
scale_fill_manual(values = c("firebrick", "orange", "yellow","skyblue", "blue"), name = "Water Year Type")+
ylab("San Joaquin Valley Index")+ theme_bw()
DTO <- readRDS(here("figure_gallery/data/raw/CDEC_DTO_2015_2023.rds"))
ggplot(DTO, aes(x = datetime, y = parameter_value))+ geom_line()+
ylab("Net Delta Outflow Index (cfs, cdec)")
Dayflow <- read_csv(here("figure_gallery/data/processed/dayflow_1929_2023.csv"))%>%
filter(Year > 1989) %>%
mutate(Exports = CVP + SWP) %>%
rename(Outflow = OUT)
dayflow_long <- Dayflow %>%
select(Date, Year, Outflow, Exports) %>%
pivot_longer(cols = c("Outflow", "Exports"), names_to = "Variable",
values_to = "Flow (cfs)")
ggplot(dayflow_long) +
geom_line(aes(x = Date, y = `Flow (cfs)`, color = Variable)) +
facet_wrap(~Variable, nrow = 2, scales = "free_y") +
theme_bw() +
theme(legend.position = "none")
Use patchwork to combine two plots
library(patchwork)
outflow <- ggplot() + geom_line(data = Dayflow, aes(x = Date, y = Outflow), color = "navy") +
theme_bw()
export <- ggplot() + geom_line(data = Dayflow, aes(x = Date, y = Exports), color = "goldenrod4") +
theme_bw()
export + outflow + plot_layout(nrow = 2)
Next we have some examples of plots using environmental data from CDEC.
Rio Vista data - looking at a few different parameters. Also see an example of adding a caption to the document.
# Read in data for Rio Vista
RVB_df <- readRDS(here("figure_gallery/data/raw/CDEC_parameters_RVB_2015_2019.rds")) %>%
filter(!(parameter == "EC" & parameter_value <100)) # removing an outlier
# Filter to 2017
RVB_2017 <- RVB_df %>%
filter(year == 2017) %>%
mutate(parameter = case_when(parameter == "AirTempF" ~ "Air Temp (°F)",
parameter == "DO" ~ "DO (mg/L)",
parameter == "EC" ~ "EC (uS/cm)",
parameter == "TurbidityNTU" ~ "Turbidity (NTU)"))
# Plot data
(rvb_plot <- ggplot(data = RVB_2017) +
geom_point(aes(x = datetime, y = parameter_value)) +
facet_wrap(~parameter, scale = "free_y", nrow = 4, strip.position = "left") +
scale_x_datetime(date_breaks = "1 month", date_labels = "%b") +
theme_bw() +
theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
axis.title = element_blank(),
strip.background = element_rect(fill="White", color = "White"),
strip.placement = "outside"))
png(here("figure_gallery/figures/plot_RVB_2017.png"), units = "in", width = 6, height = 8, res = 300)
rvb_plot
dev.off()
Here is an interactive version of the plot, which can be helpful for identifying outliers, max, mins, etc.
library(plotly)
RVB_daily <- RVB_2017 %>%
group_by(date, parameter) %>%
summarize(mean_value = mean(parameter_value, na.rm = TRUE))
ggplotly(ggplot(RVB_daily) +
geom_point(aes(x = date, y = mean_value, color = parameter), size = 0.8) +
facet_wrap(~parameter, scale = "free_y", nrow = 4, strip.position = "left") +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
scale_color_viridis(option = "turbo", discrete = TRUE) +
theme_bw() +
theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
axis.title = element_blank(),
strip.background = element_rect(fill="White", color = "White"),
strip.placement = "outside"))
Here we group data by stations and water year type and add in a temperature threshold line.
# Read in data
watertemp_df <- readRDS(here("figure_gallery/data/raw/CDEC_watertemp_JER_VER_RVB_2015_2019.rds"))
wytype <- read.csv(here("figure_gallery/data/raw/WYType.csv")) %>%
filter(Basin == "SacramentoValley") %>%
select(WY, Yr.type)
# Filter to 2017
watertemp <- watertemp_df %>%
rename(station = location_id,
watertemp = parameter_value) %>%
mutate(year = year(datetime),
yearF = factor(year),
month = month(datetime),
WY = if_else(month>10, year + 1, year))%>%
filter(WY <2019 & WY > 2014) %>%
left_join(wytype, by = "WY")
# Plot data
ggplot(data = watertemp) +
geom_point(aes(x = datetime, y = watertemp, color = Yr.type), size = 1.5) +
geom_hline(yintercept = 70, linetype = "dashed") +
facet_wrap(~station, nrow = 3) +
scale_x_datetime(date_breaks = "3 months", date_labels = "%b %Y") +
labs(y = "Water Temperature (°F)", color = "WY Type") +
scale_color_viridis(discrete = TRUE)+
theme_bw() +
theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))
This plot works best when you want to highlight a particular trend against other trends across different instances, such as year or station.
malWaterTemp <- readRDS(here("figure_gallery/data/processed/CDEC_MAL_2024.rds"))
ggplot(malWaterTemp, aes(dummyDate, dailyTemperature, group = year, color = color, size = color)) +
geom_line() +
scale_color_manual(values = c("2024" = "firebrick", "Others" = "grey70")) +
scale_size_manual(values = c("2024" = 1.5, "Others" = 0.7)) +
scale_x_date(date_labels = "%b-%d") +
labs(x = "Date", y = "Water Temperature (\u00B0C)",
title = "MAL, Daily Mean Water Temperature Across a Season",
subtitle = paste0("Year range: ", paste(range(malWaterTemp$year), collapse = " to ")),
color = "Year",
size = "Year")
This type of plot allows you to very quickly place the trend of interest in respect with all other historical trends.
Heat plots are very useful when you have three axes of interest (so are facets). Here, the three axes are date on the x, year on the y, and temperature on the z.
ggplot(malWaterTemp, aes(dummyDate, year, fill = dailyTemperature)) +
geom_tile() +
# The turbo color palette is the easiest to see for heatmaps, in my opinion
scale_fill_viridis(option = "turbo") +
# expand = c(0, 0) is used to remove the white spaces before and after the first axis values
scale_x_date(date_labels = "%b", expand = c(0, 0), date_breaks = "1 month") +
scale_y_continuous(expand = c(0, 0)) +
labs(x = "Date", y = "Year",
title = "MAL, Daily Mean Water Temeprature Across a Season",
subtitle = paste0("Year range: ", paste(range(malWaterTemp$year), collapse = " to ")),
fill = "Water Temperature (\u00B0C)") +
theme_classic(base_size = 13) +
# Can move the legend position, size, and height via theme() and guide()
theme(legend.position = "bottom",
legend.key.width = grid::unit(3, "cm"),
legend.key.height = grid::unit(0.75, "cm")) +
guides(fill = guide_colorbar(title.position = "bottom",
title.hjust = 0.5))
The heatplot allows us to see quickly that the middle of June through October is consistently the hottest period of the year.
We can also use a density ridge plot to visualize this as well. This approach also highlights the distribution for each month throughout the years as well.
library(ggridges)
# Note that this is a density plot across all years
malWaterTemp %>%
mutate(Month = month(dummyDate, label = T, abbr = F)) %>%
ggplot(aes(dailyTemperature, Month, fill = after_stat(x))) +
geom_density_ridges_gradient(rel_min_height = 0.01) +
scale_fill_viridis(option = "H") +
theme_classic(base_size = 12) +
theme(panel.grid.major.y = element_line()) +
labs(x = "Water Temperature (\u00B0C)", y = "Month",
title = "MAL, General Daily Mean Water Temeprature Trend Across a Season",
subtitle = paste0("Year range: ", paste(range(malWaterTemp$year), collapse = " to ")),
fill = "Water Temperature (\u00B0C)")
This is a commonly made type of plot to look at abundance index, or some other annual metric, by year.
FMWTindices <- read_csv("https://filelib.wildlife.ca.gov/Public/TownetFallMidwaterTrawl/FMWT%20Data/FMWTindices.csv") %>%
janitor::clean_names() # this function cleans column names
ggplot(FMWTindices) +
geom_col(aes(x = factor(year), y = delta_smelt), fill = "steelblue4") +
geom_text(aes(x = factor(year), y = delta_smelt +100, label = delta_smelt, angle = 90)) +
labs(y = "Delta Smelt Index", x = "Year", title = "Annual Delta Smelt Index, 1967-2023") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90))
Looking at sample sizes for studies is often an important first step when analyzing a dataset. We can use the tile plot again to look at this.
# Read in data
fishdata <- readRDS(here("figure_gallery/data/processed/djfmp_data_2015-2019.rds")) %>%
mutate(fYear = factor(Year),
fMonth = factor(Month),
RegionCode = factor(RegionCode))
# Get down to unique samples
unique_samples <- fishdata %>%
select(-ForkLength, -Count, -IEPFishCode) %>%
distinct() %>%
group_by(fYear, fMonth, RegionCode) %>%
summarize(n = n()) %>%
ungroup()
# Plot with tile plot
ggplot(unique_samples) +
geom_tile(aes(fYear, fMonth, fill = n), color = "black") +
facet_wrap(~RegionCode) +
labs(title = "DJFMP Sample Size by Month, Year, Region", x = "Year",y = "Month", fill = "Sample Size") +
scale_fill_viridis(option = "plasma") +
theme_classic()
Here we select some native species and look at the mean CPUE by month and year.
native_spp <- c("SACSUC", "HITCH", "SPLITT", "SACPIK")
common_spp <- c("MISSIL", "BLUEGI", "WESMOS")
daily_data <- fishdata %>%
filter(IEPFishCode %in% native_spp) %>%
mutate(CPUE = Count/Volume) %>%
group_by(SampleDate, StationCode) %>%
mutate(dailyCPUE = sum(CPUE)) %>%
ungroup() %>%
select(-Datetime, -ForkLength, -Count, -CPUE) %>%
distinct()
monthly_data <- daily_data %>%
group_by(fYear, fMonth, IEPFishCode) %>%
summarize(meanCPUE = mean(dailyCPUE),
meanTemp = mean(WaterTemp))
ggplot(monthly_data) +
geom_col(aes(factor(fMonth), meanCPUE, fill = factor(fYear))) +
facet_grid(factor(fYear)~IEPFishCode, scales = "free_y") +
scale_fill_viridis(discrete = TRUE) +
labs(y = "Mean CPUE (Count m^-3)", x = "Month", fill = "Year", title = "Monthly Native Species CPUE, 2015-2019") +
theme_bw()
daily_data_all <- fishdata %>%
mutate(CPUE = Count/Volume) %>%
select(-Datetime, -ForkLength, -IEPFishCode, -Count) %>%
distinct() %>%
group_by(SampleDate, StationCode) %>%
mutate(dailyCPUE = sum(CPUE, na.rm = TRUE)) %>%
ungroup()%>%
select(-CPUE) %>%
distinct()
monthly_data_all <- daily_data_all %>%
group_by(fYear, fMonth, Month, Year, RegionCode) %>%
summarize(meanCPUE = mean(dailyCPUE)) %>%
mutate(WY=if_else(Month>=10, Year + 1,Year))
annual_data_all <- daily_data_all %>%
group_by(fYear,RegionCode) %>%
summarize(meanCPUE = mean(dailyCPUE))
ggplot(monthly_data_all) +
geom_col(aes(x = RegionCode, y = meanCPUE, fill = fMonth)) +
facet_wrap(~fYear) +
labs(title = "Mean Monthly DJFMP CPUE across regions, 2015-2019", y = "Mean CPUE (Count m^-3)", fill = "Month") +
theme_bw() +
scale_fill_viridis(discrete = TRUE)
wytype <- read_csv(here::here("figure_gallery/data/processed/wytype_1906_cur.csv")) %>%
rename(WY = Year)
cpue_wytype <- left_join(monthly_data_all, wytype, by = "WY")
ggplot(cpue_wytype) +
geom_col(aes(x = WY, y = meanCPUE, fill = Sac_WYtype)) +
scale_fill_viridis(discrete = TRUE) +
labs(x = "Water Year" , y = "Mean CPUE (Count m^-3)", fill = "Water Year Type", title = "Mean DJFMP CPUE by Year and Water Year Type") +
theme_bw()
chl <- read_csv(here("figure_gallery/data/processed/yolo_chl_2016_2019.csv"))
max_chl <- max(chl$chlorophyll, na.rm = TRUE)
# Set bar at maximum value
inun <- read_csv(here("figure_gallery/data/processed/yolo_inun_2016_2019.csv")) %>%
mutate(inun_val = if_else(Inundation == TRUE, max_chl, 0))%>%
rename(sample_date = Dates)
(plot_inunChl <- ggplot() +
geom_col(data = inun, aes(x = DOY, y = inun_val), fill = "gray90", color = "gray90") +
geom_line(data = chl, aes(x = DOY, y = chlorophyll, linetype = station_code, color = station_code),size = 1) +
facet_wrap(~Year, nrow = 4) +
scale_colour_viridis(discrete = TRUE) +
scale_linetype_manual(values = c("dotted", "solid", "longdash"))+
labs(title = "Yolo Bypass Chlorophyll a Concentrations, \n2016-2019, with Inundation Periods", y = expression(Chlorophyll~a~(mg~L^-1)), fill = "Inundation") +
theme_bw() +
theme(axis.title.x = element_blank(),
legend.key=element_rect(colour="black"),
axis.text.x = element_blank()))
Here are some examples for making maps
# Read in EDSM stations
stations <- readRDS(here("figure_gallery/data/processed/djfmp_stations.rds"))
# Convert stations to sf object, to be read as spatial data
stations_sf <- st_as_sf(stations, coords = c("Longitude", "Latitude"), crs = 4326)
# Read in EDSM Phase 3 Strata (naming them regions) - this object is in the deltamapr package
regions <- R_EDSM_Strata_17P3
# Transform stations to the same coordinate projection as EDSM regions
stationsT <- st_transform(stations_sf, crs = st_crs(regions))
# Join regions and stations
station_regions <- st_join(stationsT, regions)
# Only include stations within regions
stations_in_regions <- st_intersection(stationsT, regions)
This is a quick way to look at/share stations for a study. You can modify the variable being color-coded by modifying the “zcol” parameter.
mapView(station_regions, zcol = "Stratum")
# WW_Delta is a basemap layer you can use from the deltamapr package.
(stratumMap <- ggplot() +
geom_sf(data = WW_Delta, fill = "gray80", color = "gray80") + #basemap layer
geom_sf(data = R_EDSM_Strata_17P3, aes(fill = Stratum), alpha = 0.3) + # region layer
geom_sf(data = stations_in_regions, size = 1.5, shape = 22)+ # station layer
scale_fill_brewer(palette = "Dark2") +
annotation_north_arrow(location = "tr", which_north = "true",
pad_x = unit(0.1, "in"), pad_y = unit(0.1, "in"),
width = unit(0.5, "in"), height = unit(0.5, "in"),
style = north_arrow_fancy_orienteering) +
annotation_scale(location = "bl", bar_cols = c("magenta4", "white", "magenta4", "white")) +
labs(title = "DJFMP Stations in EDSM Strata")+
theme(axis.title = element_blank(),
panel.grid.major = element_line(color = "grey80", linetype = "dashed", size = 0.5))+
theme_bw()+
theme(axis.title = element_blank()))