|
- task merge {
- Array[File] family_vcf_gz
- Array[File] family_vcf_idx
- String test_name
- String sample
- String docker
- String cluster_config
- String disk_size
-
- command <<<
-
- rtg vcfmerge --force-merge-all -o ${sample}.merged.vcf.gz ${sep=" " family_vcf_gz}
- rtg vcffilter -i ${sample}.merged.vcf.gz -o ${sample}.snv.merged.vcf.gz --snps-only --all-samples
- rtg vcffilter -i ${sample}.merged.vcf.gz -o ${sample}.indel.merged.vcf.gz --non-snps-only --all-samples
-
- zcat ${sample}.indel.merged.vcf.gz | grep '#CHROM' | cut -f10-12 > name
-
- for i in {10..12}; do zcat ${sample}.snv.merged.vcf.gz | grep -v '#' | cut -f$i | cut -d ':' -f2-4 | grep -v '\.'| sort | uniq -c | awk '{print $1, substr($1,0,7)}' | sed 's/\s\+/\t/g' | cut -f1 > $i.snv.txt; done
-
- paste *.snv.txt | cat name - > snv.txt
-
- for i in {10..12}; do zcat ${sample}.indel.merged.vcf.gz | grep -v '#' | cut -f$i | cut -d ':' -f2-4 | grep -v '\.'| sort | uniq -c | awk '{print $1, substr($1,0,7)}' | sed 's/\s\+/\t/g' | cut -f1 > $i.indel.txt; done
-
- paste *.indel.txt | cat name - > index.txt
-
- echo 'type' > column
- echo '0,0,0' >> column
- echo '0,0,1' >> column
- echo '0,1,0' >> column
- echo '0,1,1' >> column
- echo '1,0,0' >> column
- echo '1,0,1' >> column
- echo '1,1,0' >> column
- echo '1,1,1' >> column
-
- paste column snv.txt > ${test_name}.snv.txt
-
- paste column indel.txt > ${test_name}.indel.txt
-
-
- >>>
-
- runtime {
- docker:docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
- output {
- File merged_vcf = "${sample}.merged.vcf.gz"
- File merged_vcf_idx = "${sample}.merged.vcf.gz.tbi"
- File merged_snv = "${sample}.snv.merged.vcf.gz"
- File merged_snv_idx = "${sample}.snv.merged.vcf.gz.tbi"
- File merged_indel = "${sample}.indel.merged.vcf.gz"
- File merged_indel_idx = "${sample}.indel.merged.vcf.gz.tbi"
- File snv = "${test_name}.snv.txt"
- File indel = "${test_name}.indel.txt"
- }
- }
|