library(tidyverse) split_df <- function(df, test) { list(i = filter(df, {{ test }}), o = filter(df, !{{ test }})) } path <- snakemake@input[[1]] df <- readr::read_tsv( path, col_types = "iccddddddd", col_names = c("year", "species", "unit", "ave_lower", "ave_upper", "min_lower", "min_upper", "max_lower", "max_upper", "limit" ) ) %>% # there are some TTHM/HHA5 entries in here twice, use the ones with limits filter(!str_detect(species, "(TTHM|HAA5)") | limit > 0) has_limit <- df %>% group_by(species) %>% summarize(limit = max(limit)) %>% filter(limit > 0) %>% pull(species) limited <- split_df(df, species %in% has_limit) binned_limit <- limited$i %>% group_by(species) %>% summarize(av = max(ave_upper), mx = max(max_upper), limit = max(limit), .groups = "drop") %>% mutate(bin = case_when(mx == 0 ~ "undetected", mx > limit ~ "over", mx > limit / 10 ~ "over10", mx > limit / 100 ~ "over100", TRUE ~ "safeIGuess")) %>% filter(bin != "undetected") %>% arrange(bin, species) %>% readr::write_tsv(snakemake@output[["limit"]]) detected_nolimit <- limited$o %>% group_by(species) %>% summarize(av = max(ave_upper), mx = max(max_upper)) %>% mutate(detected = mx > 0) %>% filter(detected) %>% arrange(species) %>% readr::write_tsv(snakemake@output[["nolimit"]])