|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- task SepSnvIndel {
- File vcf
- String sampleName = basename(vcf,".normed.vcf")
- File keeped_vcf
- String docker
- String cluster_config
- String disk_size
-
- command <<<
-
- cat ${vcf} | grep '#' > header
- cat ${vcf} | sed '/^#/d' | awk '$5!~/,/' > removed.body
- cat ${vcf} | sed '/^#/d' | awk '$5~/,/' > MNP.body
- cat header removed.body > ${sampleName}.MNPremoved.vcf
- cat header MNP.body > ${sampleName}.MNP.vcf
-
- rtg bgzip ${sampleName}.MNPremoved.vcf
- rtg index -f vcf ${sampleName}.MNPremoved.vcf.gz
-
-
- rtg bgzip ${keeped_vcf} -c > all.selected.vcf.gz
- rtg index -f vcf all.selected.vcf.gz
-
- rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.snv.train.vcf.gz --include-vcf=all.selected.vcf.gz --snps-only
-
- rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.snv.test.vcf.gz --exclude-vcf=all.selected.vcf.gz --snps-only
-
- rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.indel.train.vcf.gz --include-vcf=all.selected.vcf.gz --non-snps-only
-
- rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.indel.test.vcf.gz --exclude-vcf=all.selected.vcf.gz --non-snps-only
-
- rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.snv.vcf.gz --snps-only
-
- rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.indel.vcf.gz --non-snps-only
-
-
- gzip -d ${sampleName}.normed.snv.train.vcf.gz -c > ${sampleName}.normed.snv.train.vcf
- gzip -d ${sampleName}.normed.snv.test.vcf.gz -c > ${sampleName}.normed.snv.test.vcf
- gzip -d ${sampleName}.normed.indel.train.vcf.gz -c > ${sampleName}.normed.indel.train.vcf
- gzip -d ${sampleName}.normed.indel.test.vcf.gz -c > ${sampleName}.normed.indel.test.vcf
- >>>
-
- runtime {
- docker:docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output {
- File MNP="${sampleName}.MNP.vcf"
- File snv_gz = "${sampleName}.normed.snv.vcf.gz"
- File snv_idx = "${sampleName}.normed.snv.vcf.gz.tbi"
- File indel_gz = "${sampleName}.normed.indel.vcf.gz"
- File indel_idx = "${sampleName}.normed.indel.vcf.gz.tbi"
- File snv_train = "${sampleName}.normed.snv.train.vcf"
- File snv_test = "${sampleName}.normed.snv.test.vcf"
- File indel_train = "${sampleName}.normed.indel.train.vcf"
- File indel_test = "${sampleName}.normed.indel.test.vcf"
- File snv_train_gz = "${sampleName}.normed.snv.train.vcf.gz"
- File snv_test_gz = "${sampleName}.normed.snv.test.vcf.gz"
- File indel_train_gz = "${sampleName}.normed.indel.train.vcf.gz"
- File indel_test_gz = "${sampleName}.normed.indel.test.vcf.gz"
- File snv_train_idx = "${sampleName}.normed.snv.train.vcf.gz.tbi"
- File snv_test_idx = "${sampleName}.normed.snv.test.vcf.gz.tbi"
- File indel_train_idx = "${sampleName}.normed.indel.train.vcf.gz.tbi"
- File indel_test_idx = "${sampleName}.normed.indel.test.vcf.gz.tbi"
- }
- }
|