|
- task region {
- File sub_vcf
- File sub_vcf_idx
-
- File satellite
- File simple_repeat
- File SINE
- File LINE
- File LTR
- File low_complexity
- File SD
- File ins_breakpoint
- File del_breakpoint
- File mhc
-
- String sample_name
- String tag
- String docker
- String cluster_config
- String disk_size
-
- command <<<
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.satellite.vcf.gz --include-bed=${satellite}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.simple_repeat.vcf.gz --include-bed=${simple_repeat}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SINE.vcf.gz --include-bed=${SINE}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LINE.vcf.gz --include-bed=${LINE}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LTR.vcf.gz --include-bed=${LTR}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.low_complexity.vcf.gz --include-bed=${low_complexity}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SD.vcf.gz --include-bed=${SD}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.ins_breakpoint.vcf.gz --include-bed=${ins_breakpoint}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.del_breakpoint.vcf.gz --include-bed=${del_breakpoint}
-
- rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.mhc.vcf.gz --include-bed=${mhc}
-
-
- rtg vcfstat ${sample_name}.${tag}.satellite.vcf.gz > ${sample_name}.${tag}.satellite.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.simple_repeat.vcf.gz > ${sample_name}.${tag}.simple_repeat.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.SINE.vcf.gz > ${sample_name}.${tag}.SINE.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.LINE.vcf.gz > ${sample_name}.${tag}.LINE.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.LTR.vcf.gz > ${sample_name}.${tag}.LTR.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.low_complexity.vcf.gz > ${sample_name}.${tag}.low_complexity.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.SD.vcf.gz > ${sample_name}.${tag}.SD.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.ins_breakpoint.vcf.gz > ${sample_name}.${tag}.ins_breakpoint.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.del_breakpoint.vcf.gz > ${sample_name}.${tag}.del_breakpoint.vcfstat.txt
- rtg vcfstat ${sample_name}.${tag}.mhc.vcf.gz > ${sample_name}.${tag}.mhc.vcfstat.txt
-
- for i in *vcf.gz
- do
- zcat $i | grep -v '#' | grep 'SNV' | wc -l >> ${sample_name}.${tag}.all.snv.count.txt
- zcat $i | grep -v '#' | grep 'INDEL'| wc -l >> ${sample_name}.${tag}.all.indel.count.txt
- zcat $i | grep -v '#' | grep 'SNV;MC' | wc -l >> ${sample_name}.${tag}.snv.mc.count.txt
- zcat $i | grep -v '#' | grep 'SNV;MC' | awk '{ if (($10 == $11) && ($11 == $12) && ($12 == $13)) { print } }' | wc -l >> ${sample_name}.${tag}.snv.mc.same.genotype.count.txt
- zcat $i | grep -v '#' | grep 'INDEL;MC' | wc -l >> ${sample_name}.${tag}.indel.mc.count.txt
- zcat $i | grep -v '#' | grep 'INDEL;MC' | awk '{ if (($10 == $11) && ($11 == $12) && ($12 == $13)) { print } }' | wc -l >> ${sample_name}.${tag}.indel.mc.same.genotype.count.txt
- done
-
-
- for i in *vcfstat.txt
- do
- cat $i | sed -n '13,13p' | sed 's/\s\+/\t/g' | cut -f3 >> ${sample_name}.${tag}.ti_tv.txt
- done
-
-
- for i in *vcfstat.txt
- do
- cat $i | sed -n '14,14p' | sed 's/\s\+/\t/g' | cut -f5 >> ${sample_name}.${tag}.het_hom.txt
- done
-
-
- ls *vcf.gz > file.list.txt
-
-
-
- >>>
-
- runtime {
- docker:docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
- output {
- File satellite_vcf = "${sample_name}.${tag}.satellite.vcf.gz"
- File simple_repeat_vcf = "${sample_name}.${tag}.simple_repeat.vcf.gz"
- File SINE_vcf = "${sample_name}.${tag}.SINE.vcf.gz"
- File LINE_vcf = "${sample_name}.${tag}.LINE.vcf.gz"
- File LTR_vcf = "${sample_name}.${tag}.LTR.vcf.gz"
- File low_complexity_vcf = "${sample_name}.${tag}.low_complexity.vcf.gz"
- File SD_vcf = "${sample_name}.${tag}.SD.vcf.gz"
- File ins_breakpoint_vcf = "${sample_name}.${tag}.ins_breakpoint.vcf.gz"
- File del_breakpoint_vcf = "${sample_name}.${tag}.del_breakpoint.vcf.gz"
- File mhc_vcf = "${sample_name}.${tag}.mhc.vcf.gz"
- File satellite_stat = "${sample_name}.${tag}.satellite.vcfstat.txt"
- File simple_repeat_stat = "${sample_name}.${tag}.simple_repeat.vcfstat.txt"
- File SINE_stat = "${sample_name}.${tag}.SINE.vcfstat.txt"
- File LINE_stat = "${sample_name}.${tag}.LINE.vcfstat.txt"
- File LTR_stat = "${sample_name}.${tag}.LTR.vcfstat.txt"
- File low_complexity_stat = "${sample_name}.${tag}.low_complexity.vcfstat.txt"
- File SD_stat = "${sample_name}.${tag}.SD.vcfstat.txt"
- File ins_breakpoint_stat = "${sample_name}.${tag}.ins_breakpoint.vcfstat.txt"
- File del_breakpoint_stat = "${sample_name}.${tag}.del_breakpoint.vcfstat.txt"
- File mhc_stat = "${sample_name}.${tag}.mhc.vcfstat.txt"
- File snv = "${sample_name}.${tag}.snv.mc.count.txt"
- File snv_mc = "${sample_name}.${tag}.snv.mc.same.genotype.count.txt"
- File indel = "${sample_name}.${tag}.indel.mc.count.txt"
- File indel_mc = "${sample_name}.${tag}.indel.mc.same.genotype.count.txt"
- File file_list = "file.list.txt"
- File all_snv = "${sample_name}.${tag}.all.snv.count.txt"
- File all_indel = "${sample_name}.${tag}.all.indel.count.txt"
- File ti_tv = "${sample_name}.${tag}.ti_tv.txt"
- File het_homo = "${sample_name}.${tag}.het_hom.txt"
- }
- }
|