121 lines
2.4 KiB
R
121 lines
2.4 KiB
R
|
library(tidyverse)
|
||
|
|
||
|
split_df <- function(df, flt) {
|
||
|
.in <- df %>%
|
||
|
filter({{ flt }})
|
||
|
.out <- df %>%
|
||
|
filter(! {{ flt }})
|
||
|
list(i = .in,
|
||
|
o = .out)
|
||
|
}
|
||
|
|
||
|
df <- readr::read_tsv(
|
||
|
"../../results/wqa/process/all.tsv.gz",
|
||
|
col_types = cols(
|
||
|
start = "D",
|
||
|
species = "f",
|
||
|
std_value = "d",
|
||
|
lat = "d",
|
||
|
long = "d",
|
||
|
location_name = "c",
|
||
|
.default = "-"
|
||
|
)
|
||
|
) %>%
|
||
|
# get rid of the deuterium distinction on some pharma species
|
||
|
mutate(species = str_replace(species, "-(d|D)\\d", ""))
|
||
|
|
||
|
not_detected <- df %>%
|
||
|
group_by(species) %>%
|
||
|
summarize(total = sum(std_value)) %>%
|
||
|
filter(total == 0) %>%
|
||
|
pull(species)
|
||
|
|
||
|
|
||
|
harmless <- c(
|
||
|
"Sodium",
|
||
|
"Bicarbonate",
|
||
|
"Calcium",
|
||
|
"Magnesium",
|
||
|
"Potassium",
|
||
|
"Carbonate",
|
||
|
"Oxygen",
|
||
|
"Silica"
|
||
|
)
|
||
|
|
||
|
df_detected <- df %>%
|
||
|
filter(! species %in% not_detected) %>%
|
||
|
filter(! species %in% harmless)
|
||
|
|
||
|
df %>%
|
||
|
filter(lubridate::year(start) > 1990) %>%
|
||
|
group_by(species) %>%
|
||
|
summarize(fraction = mean(std_value > 0),
|
||
|
n = n()) %>%
|
||
|
mutate(stderr = sqrt(fraction * (1 - fraction) / n)) %>%
|
||
|
filter(n > 3) %>%
|
||
|
ggplot(aes(fraction, fct_reorder(species, fraction))) +
|
||
|
geom_col() +
|
||
|
geom_errorbarh(aes(xmin = fraction - stderr, xmax = fraction + stderr))
|
||
|
|
||
|
metals <- c(
|
||
|
"Lithium",
|
||
|
"Beryllium",
|
||
|
"Boron",
|
||
|
"Aluminum",
|
||
|
"Vanadium",
|
||
|
"Chromium",
|
||
|
"Manganese",
|
||
|
"Iron",
|
||
|
"Cobalt",
|
||
|
"Nickel",
|
||
|
"Copper",
|
||
|
"Zinc",
|
||
|
"Arsenic",
|
||
|
"Selenium",
|
||
|
"Strontium",
|
||
|
"Molybdenum",
|
||
|
"Silver",
|
||
|
"Cadmium",
|
||
|
"Antimony",
|
||
|
"Barium",
|
||
|
"Mercury",
|
||
|
"Thallium",
|
||
|
"Lead",
|
||
|
"Uranium"
|
||
|
)
|
||
|
|
||
|
halides <- c("Chloride", "Fluoride", "Bromide")
|
||
|
|
||
|
.nitro <- split_df(df_detected, str_detect(species, "(n|N)itr")
|
||
|
| str_detect(species, "Ammon"))
|
||
|
|
||
|
.phospho <- split_df(.nitro$o, str_detect(species, "(P|p)hosph"))
|
||
|
|
||
|
.metal <- split_df(.phospho$o, species %in% metals)
|
||
|
|
||
|
.halides <- split_df(.metal$o, species %in% halides)
|
||
|
|
||
|
.nitro$i %>%
|
||
|
ggplot(aes(start, std_value, color = species, group = species)) +
|
||
|
geom_line()
|
||
|
|
||
|
.halides$i %>%
|
||
|
ggplot(aes(start, std_value)) +
|
||
|
geom_point() +
|
||
|
facet_wrap(scales = "free", c("species"))
|
||
|
|
||
|
.metal$i %>%
|
||
|
ggplot(aes(start, std_value)) +
|
||
|
geom_point() +
|
||
|
facet_wrap(scales = "free", c("species"))
|
||
|
|
||
|
.phospho$i %>%
|
||
|
ggplot(aes(start, std_value)) +
|
||
|
geom_point() +
|
||
|
facet_wrap(scales = "free", c("species"))
|
||
|
|
||
|
.halides$o %>%
|
||
|
filter(std_value > 1) %>%
|
||
|
ggplot(aes(std_value, species)) +
|
||
|
geom_jitter()
|