55 lines
1.5 KiB
R
55 lines
1.5 KiB
R
|
library(tidyverse)
|
||
|
|
||
|
split_df <- function(df, test) {
|
||
|
list(i = filter(df, {{ test }}), o = filter(df, !{{ test }}))
|
||
|
}
|
||
|
|
||
|
path <- snakemake@input[[1]]
|
||
|
|
||
|
df <- readr::read_tsv(
|
||
|
path,
|
||
|
col_types = "iccddddddd",
|
||
|
col_names =
|
||
|
c("year",
|
||
|
"species",
|
||
|
"unit",
|
||
|
"ave_lower",
|
||
|
"ave_upper",
|
||
|
"min_lower",
|
||
|
"min_upper",
|
||
|
"max_lower",
|
||
|
"max_upper",
|
||
|
"limit"
|
||
|
)
|
||
|
) %>%
|
||
|
# there are some TTHM/HHA5 entries in here twice, use the ones with limits
|
||
|
filter(!str_detect(species, "(TTHM|HAA5)") | limit > 0)
|
||
|
|
||
|
has_limit <- df %>%
|
||
|
group_by(species) %>%
|
||
|
summarize(limit = max(limit)) %>%
|
||
|
filter(limit > 0) %>%
|
||
|
pull(species)
|
||
|
|
||
|
limited <- split_df(df, species %in% has_limit)
|
||
|
|
||
|
binned_limit <- limited$i %>%
|
||
|
group_by(species) %>%
|
||
|
summarize(av = max(ave_upper), mx = max(max_upper), limit = max(limit), .groups = "drop") %>%
|
||
|
mutate(bin = case_when(mx == 0 ~ "undetected",
|
||
|
mx > limit ~ "over",
|
||
|
mx > limit / 10 ~ "over10",
|
||
|
mx > limit / 100 ~ "over100",
|
||
|
TRUE ~ "safeIGuess")) %>%
|
||
|
filter(bin != "undetected") %>%
|
||
|
arrange(bin, species) %>%
|
||
|
readr::write_tsv(snakemake@output[["limit"]])
|
||
|
|
||
|
detected_nolimit <- limited$o %>%
|
||
|
group_by(species) %>%
|
||
|
summarize(av = max(ave_upper), mx = max(max_upper)) %>%
|
||
|
mutate(detected = mx > 0) %>%
|
||
|
filter(detected) %>%
|
||
|
arrange(species) %>%
|
||
|
readr::write_tsv(snakemake@output[["nolimit"]])
|