|
|
@@ -0,0 +1,79 @@ |
|
|
|
task pathseq { |
|
|
|
String sample_id |
|
|
|
File fastq1 |
|
|
|
File fastq2 |
|
|
|
File host_image |
|
|
|
File host_kmer |
|
|
|
File microbe_dict |
|
|
|
File microbe_bwa_image |
|
|
|
File microbe_taxonomy |
|
|
|
|
|
|
|
String disk_size |
|
|
|
String docker |
|
|
|
String cluster |
|
|
|
|
|
|
|
command <<< |
|
|
|
|
|
|
|
set -o pipefail |
|
|
|
set -e |
|
|
|
|
|
|
|
mkdir ubam |
|
|
|
mkdir filter_log |
|
|
|
mkdir clean_pair_bam |
|
|
|
mkdir clean_unpaired_bam |
|
|
|
mkdir pathseq_result |
|
|
|
mkdir bwa_pair_bam |
|
|
|
mkdir bwa_pair_unpair_bam |
|
|
|
|
|
|
|
gatk FastqToSam \ |
|
|
|
-F1 ${fastq1} \ |
|
|
|
-F2 ${fastq2} \ |
|
|
|
-O ./ubam/${sample_id}.bam \ |
|
|
|
-SM ${sample_id} |
|
|
|
|
|
|
|
time gatk --java-options "-Xmx185g" PathSeqFilterSpark \ |
|
|
|
--input ./ubam/${sample_id}.bam \ |
|
|
|
--paired-output ./clean_pair_bam/${sample_id}_paired.bam \ |
|
|
|
--unpaired-output ./clean_unpaired_bam/${sample_id}_unpaired.bam \ |
|
|
|
--min-clipped-read-length 70 \ |
|
|
|
--kmer-file ${host_kmer} \ |
|
|
|
--filter-bwa-image ${host_image} \ |
|
|
|
--filter-metrics filter_log/${sample_id}.log |
|
|
|
|
|
|
|
time gatk --java-options "-Xmx185g" PathSeqBwaSpark \ |
|
|
|
--paired-input ./clean_pair_bam/${sample_id}_paired.bam \ |
|
|
|
--unpaired-input ./clean_unpaired_bam/${sample_id}_unpaired.bam \ |
|
|
|
--paired-output bwa_pair_bam/${sample_id}_bwa_paired.bam \ |
|
|
|
--unpaired-output bwa_unpair_bam/${sample_id}_bwa_unpaired.bam \ |
|
|
|
--microbe-bwa-image ${microbe_bwa_image} \ |
|
|
|
--microbe-dict ${microbe_dict} |
|
|
|
|
|
|
|
|
|
|
|
time gatk --java-options "-Xmx185g" PathSeqScoreSpark \ |
|
|
|
--paired-input bwa_pair_bam/${sample_id}_bwa_paired.bam \ |
|
|
|
--unpaired-input bwa_unpair_bam/${sample_id}_bwa_unpaired.bam \ |
|
|
|
--taxonomy-file ${microbe_taxonomy} \ |
|
|
|
--scores-output pathseq_result/${sample_id}.pathseq.txt \ |
|
|
|
--output pathseq_result/${sample_id}.pathseq_reads.bam \ |
|
|
|
--min-score-identity 0.90 \ |
|
|
|
--identity-margin 0.02 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>>> |
|
|
|
|
|
|
|
runtime { |
|
|
|
docker: docker |
|
|
|
cluster: cluster |
|
|
|
systemDisk: "cloud_ssd 40" |
|
|
|
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" |
|
|
|
} |
|
|
|
|
|
|
|
output { |
|
|
|
File pathseq_txt = "pathseq_result/${sample_id}.pathseq.txt" |
|
|
|
File pathseq_bam = "pathseq_result/${sample_id}.pathseq_reads.bam" |
|
|
|
File pathseq_sbi = "pathseq_result/${sample_id}.pathseq_reads.bam.sbi" |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|