moco-water/workflow/scripts/summarize_wqa.R

121 lines
2.4 KiB
R
Raw Normal View History

2023-04-05 21:00:46 -04:00
library(tidyverse)
split_df <- function(df, flt) {
.in <- df %>%
filter({{ flt }})
.out <- df %>%
filter(! {{ flt }})
list(i = .in,
o = .out)
}
df <- readr::read_tsv(
"../../results/wqa/process/all.tsv.gz",
col_types = cols(
start = "D",
species = "f",
std_value = "d",
lat = "d",
long = "d",
location_name = "c",
.default = "-"
)
) %>%
# get rid of the deuterium distinction on some pharma species
mutate(species = str_replace(species, "-(d|D)\\d", ""))
not_detected <- df %>%
group_by(species) %>%
summarize(total = sum(std_value)) %>%
filter(total == 0) %>%
pull(species)
harmless <- c(
"Sodium",
"Bicarbonate",
"Calcium",
"Magnesium",
"Potassium",
"Carbonate",
"Oxygen",
"Silica"
)
df_detected <- df %>%
filter(! species %in% not_detected) %>%
filter(! species %in% harmless)
df %>%
filter(lubridate::year(start) > 1990) %>%
group_by(species) %>%
summarize(fraction = mean(std_value > 0),
n = n()) %>%
mutate(stderr = sqrt(fraction * (1 - fraction) / n)) %>%
filter(n > 3) %>%
ggplot(aes(fraction, fct_reorder(species, fraction))) +
geom_col() +
geom_errorbarh(aes(xmin = fraction - stderr, xmax = fraction + stderr))
metals <- c(
"Lithium",
"Beryllium",
"Boron",
"Aluminum",
"Vanadium",
"Chromium",
"Manganese",
"Iron",
"Cobalt",
"Nickel",
"Copper",
"Zinc",
"Arsenic",
"Selenium",
"Strontium",
"Molybdenum",
"Silver",
"Cadmium",
"Antimony",
"Barium",
"Mercury",
"Thallium",
"Lead",
"Uranium"
)
halides <- c("Chloride", "Fluoride", "Bromide")
.nitro <- split_df(df_detected, str_detect(species, "(n|N)itr")
| str_detect(species, "Ammon"))
.phospho <- split_df(.nitro$o, str_detect(species, "(P|p)hosph"))
.metal <- split_df(.phospho$o, species %in% metals)
.halides <- split_df(.metal$o, species %in% halides)
.nitro$i %>%
ggplot(aes(start, std_value, color = species, group = species)) +
geom_line()
.halides$i %>%
ggplot(aes(start, std_value)) +
geom_point() +
facet_wrap(scales = "free", c("species"))
.metal$i %>%
ggplot(aes(start, std_value)) +
geom_point() +
facet_wrap(scales = "free", c("species"))
.phospho$i %>%
ggplot(aes(start, std_value)) +
geom_point() +
facet_wrap(scales = "free", c("species"))
.halides$o %>%
filter(std_value > 1) %>%
ggplot(aes(std_value, species)) +
geom_jitter()