init commit
This commit is contained in:
commit
3f68fbbbe6
|
@ -0,0 +1 @@
|
||||||
|
my_pswid: MD0150005
|
|
@ -0,0 +1,7 @@
|
||||||
|
name: moco-water
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- snakemake-minimal=7.25.0
|
||||||
|
- mamba
|
||||||
|
- snakefmt
|
|
@ -0,0 +1,109 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from os.path import dirname
|
||||||
|
from snakemake.utils import min_version
|
||||||
|
|
||||||
|
min_version("7.20")
|
||||||
|
|
||||||
|
configfile: "config/config.yml"
|
||||||
|
|
||||||
|
|
||||||
|
ucmr_data = {
|
||||||
|
1: (
|
||||||
|
"https://www.epa.gov/sites/default/files/2015-09/ucmr1_list1and2chem.zip",
|
||||||
|
"ucmr1_list1and2chem_final.xls",
|
||||||
|
),
|
||||||
|
2: (
|
||||||
|
"https://www.epa.gov/sites/default/files/2015-09/ucmr2_occurrencedata_jan12.zip",
|
||||||
|
"UCMR2_All_OccurrenceData_Jan12.txt",
|
||||||
|
),
|
||||||
|
3: (
|
||||||
|
"https://www.epa.gov/sites/default/files/2017-02/ucmr-3-occurrence-data.zip",
|
||||||
|
"UCMR3_All.txt",
|
||||||
|
),
|
||||||
|
4: (
|
||||||
|
"https://www.epa.gov/sites/default/files/2020-04/ucmr_4_occurrence_data.zip",
|
||||||
|
"UCMR4_All.txt",
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
rule download_ucmr:
|
||||||
|
output:
|
||||||
|
"resources/ucmr/{ucmr}.zip",
|
||||||
|
params:
|
||||||
|
url=lambda w: ucmr_data[int(w.ucmr)][0],
|
||||||
|
shell:
|
||||||
|
"curl -sS -L -o {output} {params.url}"
|
||||||
|
|
||||||
|
|
||||||
|
rule unzip_ucmr_1:
|
||||||
|
input:
|
||||||
|
expand(rules.download_ucmr.output, ucmr=1),
|
||||||
|
output:
|
||||||
|
"results/ucmr/ucmr_unzipped_1/ucmr1_list1and2chem_final.xls",
|
||||||
|
params:
|
||||||
|
zipdest=lambda _, output: dirname(output[0]),
|
||||||
|
shell:
|
||||||
|
"""
|
||||||
|
rm -rf {params.zipdest} && \
|
||||||
|
mkdir {params.zipdest} && \
|
||||||
|
unzip {input} -d {params.zipdest}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
use rule unzip_ucmr_1 as unzip_ucmr_2 with:
|
||||||
|
input:
|
||||||
|
expand(rules.download_ucmr.output, ucmr=2),
|
||||||
|
output:
|
||||||
|
"results/ucmr/ucmr_unzipped_2/UCMR2_All_OccurrenceData_Jan12.txt",
|
||||||
|
|
||||||
|
|
||||||
|
use rule unzip_ucmr_1 as unzip_ucmr_3 with:
|
||||||
|
input:
|
||||||
|
expand(rules.download_ucmr.output, ucmr=3),
|
||||||
|
output:
|
||||||
|
"results/ucmr/ucmr_unzipped_3/UCMR3_All.txt",
|
||||||
|
|
||||||
|
|
||||||
|
use rule unzip_ucmr_1 as unzip_ucmr_4 with:
|
||||||
|
input:
|
||||||
|
expand(rules.download_ucmr.output, ucmr=4),
|
||||||
|
output:
|
||||||
|
"results/ucmr/ucmr_unzipped_4/UCMR4_All.txt",
|
||||||
|
|
||||||
|
|
||||||
|
rule fix_ucmr4_data_tbl:
|
||||||
|
input:
|
||||||
|
rules.unzip_ucmr_4.output,
|
||||||
|
output:
|
||||||
|
"results/ucmr/ucmr_unzipped_4/UCMR4_All_fixed.txt",
|
||||||
|
shell:
|
||||||
|
"""
|
||||||
|
cat {input} | sed 's/(\\xB5g\\/L)//' > {output}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
rule standardize_ucmr_3:
|
||||||
|
input:
|
||||||
|
rules.unzip_ucmr_3.output,
|
||||||
|
output:
|
||||||
|
"results/ucmr_data/all_std_3.txv.gz",
|
||||||
|
conda:
|
||||||
|
"envs/tidyverse.yml"
|
||||||
|
script:
|
||||||
|
"scripts/standardize_ucmr_34.R"
|
||||||
|
|
||||||
|
use rule standardize_ucmr_3 as standardize_ucmr_4 with:
|
||||||
|
input:
|
||||||
|
rules.fix_ucmr4_data_tbl.output,
|
||||||
|
output:
|
||||||
|
"results/ucmr_data/all_std_4.txv.gz",
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
rule all:
|
||||||
|
input:
|
||||||
|
rules.standardize_ucmr_3.output,
|
||||||
|
rules.standardize_ucmr_4.output,
|
||||||
|
# expand(rules.unzip_ucmr.output, ucmr=[1, 2, 3]),
|
||||||
|
# rules.fix_ucmr4_data_tbl.output,
|
|
@ -0,0 +1,7 @@
|
||||||
|
name: moco-water-tidyverse
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- r-tidyverse
|
||||||
|
- r-styler
|
||||||
|
- r-lintr
|
|
@ -0,0 +1,21 @@
|
||||||
|
library(tidyverse)
|
||||||
|
|
||||||
|
snakemake@input[[1]] %>%
|
||||||
|
readr::read_tsv(
|
||||||
|
col_types = cols(
|
||||||
|
PWSID = "c",
|
||||||
|
PWSName = "c",
|
||||||
|
FacilityName = "c",
|
||||||
|
SamplePointID = "c",
|
||||||
|
SamplePointName = "c",
|
||||||
|
SamplePointType = "c",
|
||||||
|
CollectionDate = "c",
|
||||||
|
Contaminant = "c",
|
||||||
|
AnalyticalResultsSign = "c",
|
||||||
|
AnalyticalResultValue = "d",
|
||||||
|
.default = "-"
|
||||||
|
)) %>%
|
||||||
|
mutate(CollectionDate = as.Date(CollectionDate)) %>%
|
||||||
|
rename(value = "AnalyticalResultValue") %>%
|
||||||
|
filter(PWSID == snakemake@config[["my_pswid"]]) %>%
|
||||||
|
readr::write_tsv(snakemake@output[[1]])
|
Loading…
Reference in New Issue