moco-water/workflow/scripts/analyze_wssc.R

library(tidyverse)

split_df <- function(df, test) {
  list(i = filter(df, {{ test }}), o = filter(df, !{{ test }}))
}

path <- snakemake@input[[1]]

df <- readr::read_tsv(
  path,
  col_types = "iccddddddd",
  col_names =
    c("year",
      "species",
      "unit",
      "ave_lower",
      "ave_upper",
      "min_lower",
      "min_upper",
      "max_lower",
      "max_upper",
      "limit"
      )
) %>%
  # there are some TTHM/HHA5 entries in here twice, use the ones with limits
  filter(!str_detect(species, "(TTHM|HAA5)") | limit > 0)

has_limit <- df %>%
  group_by(species) %>%
  summarize(limit = max(limit)) %>%
  filter(limit > 0) %>%
  pull(species)

limited <- split_df(df, species %in% has_limit)

binned_limit <- limited$i %>%
  group_by(species) %>%
  summarize(av = max(ave_upper), mx = max(max_upper), limit = max(limit), .groups = "drop") %>%
  mutate(bin = case_when(mx == 0 ~ "undetected",
                            mx > limit ~ "over",
                            mx > limit / 10 ~ "over10",
                            mx > limit / 100 ~ "over100",
                            TRUE ~ "safeIGuess")) %>%
  filter(bin != "undetected") %>%
  arrange(bin, species) %>%
  readr::write_tsv(snakemake@output[["limit"]])

detected_nolimit <- limited$o %>%
  group_by(species) %>%
  summarize(av = max(ave_upper), mx = max(max_upper)) %>%
  mutate(detected = mx > 0) %>%
  filter(detected) %>%
  arrange(species) %>%
  readr::write_tsv(snakemake@output[["nolimit"]])