108 lines
4.3 KiB
R
108 lines
4.3 KiB
R
|
library(tidyverse)
|
||
|
|
||
|
|
||
|
site_df <- readr::read_tsv(
|
||
|
snakemake@input[["station"]],
|
||
|
col_types = cols(
|
||
|
MonitoringLocationIdentifier = "f",
|
||
|
MonitoringLocationTypeName = "f",
|
||
|
MonitoringLocationName = "f",
|
||
|
MonitoringLocationDescriptionText = "c",
|
||
|
LatitudeMeasure = "d",
|
||
|
LongitudeMeasure = "d",
|
||
|
.default = "-")
|
||
|
) %>%
|
||
|
rename(
|
||
|
location = MonitoringLocationIdentifier,
|
||
|
location_type = MonitoringLocationTypeName,
|
||
|
location_name = MonitoringLocationName,
|
||
|
location_desc = MonitoringLocationDescriptionText,
|
||
|
lat = LatitudeMeasure,
|
||
|
long = LongitudeMeasure,
|
||
|
)
|
||
|
|
||
|
# This has a bunch of crap in it that has nothing to do with chemicals in
|
||
|
# water (which might make amphibians gay). Additionally, there are many
|
||
|
# different units that need to be standardized (eventually to be put in
|
||
|
# terms of ug/ml)
|
||
|
result_df <- readr::read_tsv(
|
||
|
snakemake@input[["results"]],
|
||
|
col_types = cols(
|
||
|
ActivityTypeCode = "f",
|
||
|
ActivityStartDate = "D",
|
||
|
ActivityEndDate = "D",
|
||
|
ActivityMediaName = "f",
|
||
|
MonitoringLocationIdentifier = "f",
|
||
|
CharacteristicName = "f",
|
||
|
ResultMeasureValue = "c",
|
||
|
"ResultMeasure/MeasureUnitCode" = "c",
|
||
|
"DetectionQuantitationLimitMeasure/MeasureValue" = "c",
|
||
|
"DetectionQuantitationLimitMeasure/MeasureUnitCode" = "f",
|
||
|
.default = "-",
|
||
|
)
|
||
|
) %>%
|
||
|
rename(activity = ActivityTypeCode,
|
||
|
unit = "ResultMeasure/MeasureUnitCode",
|
||
|
media = ActivityMediaName,
|
||
|
start = ActivityStartDate,
|
||
|
end = ActivityEndDate,
|
||
|
location = MonitoringLocationIdentifier,
|
||
|
value = ResultMeasureValue,
|
||
|
limit = "DetectionQuantitationLimitMeasure/MeasureValue",
|
||
|
limit_unit = "DetectionQuantitationLimitMeasure/MeasureUnitCode",
|
||
|
species = CharacteristicName) %>%
|
||
|
arrange(start) %>%
|
||
|
filter(media == "Water") %>%
|
||
|
select(-media) %>%
|
||
|
# select values that are numbers (some are just descriptive strings) assuming
|
||
|
# blanks are actually zeros (to be filtered out later)
|
||
|
replace_na(list(value = "0")) %>%
|
||
|
filter(str_detect(value, "^-?\\d+\\.?\\d*$")) %>%
|
||
|
mutate(value = as.numeric(value)) %>%
|
||
|
# select units that are mass concentrations or "counts per something"
|
||
|
filter(str_detect(unit, "^.*g/.*(l|L|g)$") |
|
||
|
unit %in% c("%", "ppb", "ppm")) %>%
|
||
|
# remove a bunch of crap with "%" units
|
||
|
filter(! str_detect(species, "SSC|Cloud cover|Sediment|solids|demand")) %>%
|
||
|
filter(! str_detect(species, "Pha?eophytin|Chlorophyll|Alkalinity")) %>%
|
||
|
filter(species != "Sodium, percent total cations" # not sure what this means
|
||
|
& species != "Dissolved oxygen saturation" # whatever
|
||
|
& species != "Water" # ironic...
|
||
|
& species != "Barometric pressure" # not a chemical
|
||
|
& species != "Relative humidity" # not a chemical either
|
||
|
& species != "Extract volume" # ''
|
||
|
& species != "Volume, total" # ''
|
||
|
& species != "Acidity, (H+)" # will change later
|
||
|
& species != "Carbon dioxide" # ditto
|
||
|
& species != "Dissolved oxygen (DO)" # ditto
|
||
|
& species != "Total hardness" # not specific
|
||
|
) %>%
|
||
|
# these seems like a typos
|
||
|
mutate(species = case_when(
|
||
|
species == "Diazinon0" ~ "Diazinon",
|
||
|
species == "Phosphate-phosphorus***retired***use Total Phosphorus, mixed forms" ~ "Total Phosphorus, mixed forms",
|
||
|
species == "Inorganic nitrogen (nitrate and nitrite) ***retired***use Nitrate + Nitrite" ~ "Nitrate + Nitrite",
|
||
|
TRUE ~ species
|
||
|
)) %>%
|
||
|
# collapse units to (f/n/u/m)g/L
|
||
|
mutate(unit = str_replace(unit, "/l", "/L"),
|
||
|
unit = str_replace(unit, "kg", "L"),
|
||
|
unit = case_when(unit == "ppb" ~ "ug/L",
|
||
|
unit == "ppm" ~ "mg/L",
|
||
|
TRUE ~ unit),
|
||
|
# percent will just be in mg/L
|
||
|
value = if_else(unit == "%", value * 10, value),
|
||
|
unit = if_else(unit == "%", "mg/L", unit)) %>%
|
||
|
# standardize all values to ug/L
|
||
|
mutate(std_value = case_when(unit == "g/L" ~ value * 1e6,
|
||
|
unit == "mg/L" ~ value * 1e3,
|
||
|
unit == "ng/L" ~ value / 1e3,
|
||
|
unit == "fg/L" ~ value / 1e6,
|
||
|
TRUE ~ value)) %>%
|
||
|
select(-value, -unit)
|
||
|
|
||
|
result_df %>%
|
||
|
left_join(site_df, by = "location") %>%
|
||
|
select(-location) %>%
|
||
|
readr::write_tsv(snakemake@output[[1]])
|