moco-water/workflow/scripts/analyze_wssc.R

55 lines
1.5 KiB
R
Raw Normal View History

2023-04-05 21:00:46 -04:00
library(tidyverse)
split_df <- function(df, test) {
list(i = filter(df, {{ test }}), o = filter(df, !{{ test }}))
}
path <- snakemake@input[[1]]
df <- readr::read_tsv(
path,
col_types = "iccddddddd",
col_names =
c("year",
"species",
"unit",
"ave_lower",
"ave_upper",
"min_lower",
"min_upper",
"max_lower",
"max_upper",
"limit"
)
) %>%
# there are some TTHM/HHA5 entries in here twice, use the ones with limits
filter(!str_detect(species, "(TTHM|HAA5)") | limit > 0)
has_limit <- df %>%
group_by(species) %>%
summarize(limit = max(limit)) %>%
filter(limit > 0) %>%
pull(species)
limited <- split_df(df, species %in% has_limit)
binned_limit <- limited$i %>%
group_by(species) %>%
summarize(av = max(ave_upper), mx = max(max_upper), limit = max(limit), .groups = "drop") %>%
mutate(bin = case_when(mx == 0 ~ "undetected",
mx > limit ~ "over",
mx > limit / 10 ~ "over10",
mx > limit / 100 ~ "over100",
TRUE ~ "safeIGuess")) %>%
filter(bin != "undetected") %>%
arrange(bin, species) %>%
readr::write_tsv(snakemake@output[["limit"]])
detected_nolimit <- limited$o %>%
group_by(species) %>%
summarize(av = max(ave_upper), mx = max(max_upper)) %>%
mutate(detected = mx > 0) %>%
filter(detected) %>%
arrange(species) %>%
readr::write_tsv(snakemake@output[["nolimit"]])