library(tidyverse) split_df <- function(df, flt) { .in <- df %>% filter({{ flt }}) .out <- df %>% filter(! {{ flt }}) list(i = .in, o = .out) } df <- readr::read_tsv( "../../results/wqa/process/all.tsv.gz", col_types = cols( start = "D", species = "f", std_value = "d", lat = "d", long = "d", location_name = "c", .default = "-" ) ) %>% # get rid of the deuterium distinction on some pharma species mutate(species = str_replace(species, "-(d|D)\\d", "")) not_detected <- df %>% group_by(species) %>% summarize(total = sum(std_value)) %>% filter(total == 0) %>% pull(species) harmless <- c( "Sodium", "Bicarbonate", "Calcium", "Magnesium", "Potassium", "Carbonate", "Oxygen", "Silica" ) df_detected <- df %>% filter(! species %in% not_detected) %>% filter(! species %in% harmless) df %>% filter(lubridate::year(start) > 1990) %>% group_by(species) %>% summarize(fraction = mean(std_value > 0), n = n()) %>% mutate(stderr = sqrt(fraction * (1 - fraction) / n)) %>% filter(n > 3) %>% ggplot(aes(fraction, fct_reorder(species, fraction))) + geom_col() + geom_errorbarh(aes(xmin = fraction - stderr, xmax = fraction + stderr)) metals <- c( "Lithium", "Beryllium", "Boron", "Aluminum", "Vanadium", "Chromium", "Manganese", "Iron", "Cobalt", "Nickel", "Copper", "Zinc", "Arsenic", "Selenium", "Strontium", "Molybdenum", "Silver", "Cadmium", "Antimony", "Barium", "Mercury", "Thallium", "Lead", "Uranium" ) halides <- c("Chloride", "Fluoride", "Bromide") .nitro <- split_df(df_detected, str_detect(species, "(n|N)itr") | str_detect(species, "Ammon")) .phospho <- split_df(.nitro$o, str_detect(species, "(P|p)hosph")) .metal <- split_df(.phospho$o, species %in% metals) .halides <- split_df(.metal$o, species %in% halides) .nitro$i %>% ggplot(aes(start, std_value, color = species, group = species)) + geom_line() .halides$i %>% ggplot(aes(start, std_value)) + geom_point() + facet_wrap(scales = "free", c("species")) .metal$i %>% ggplot(aes(start, std_value)) + geom_point() + facet_wrap(scales = "free", c("species")) .phospho$i %>% ggplot(aes(start, std_value)) + geom_point() + facet_wrap(scales = "free", c("species")) .halides$o %>% filter(std_value > 1) %>% ggplot(aes(std_value, species)) + geom_jitter()