|
- task split_gvcf_files {
- File gvcf
- String project
- String docker
- String cluster_config
- String disk_size
-
- command <<<
-
- cat ${gvcf} | grep '#CHROM' | sed s'/\t/\n/g' > name
-
- ncol=`cat name | wc -l`
-
- sed -i '1,9d' name
-
- for i in $(seq 1 $ncol); do cat ${gvcf}| cut -f1-9,$i > $i.splited.vcf; done
-
- ls *splited.vcf | sort -n | paste - name > rename
-
- cat rename | while read a b
- do
- mv $a $b.vcf
- if [[ $b.vcf =~ "LCL5_1" ]];then
- cp $b.vcf ${project}.LCL5_1.vcf
- elif [[ $b.vcf =~ "LCL5_2" ]]; then
- cp $b.vcf ${project}.LCL5_2.vcf
- elif [[ $b.vcf =~ "LCL5_3" ]]; then
- cp $b.vcf ${project}.LCL5_3.vcf
- elif [[ $b.vcf =~ "LCL6_1" ]]; then
- cp $b.vcf ${project}.LCL6_1.vcf
- elif [[ $b.vcf =~ "LCL6_2" ]]; then
- cp $b.vcf ${project}.LCL6_2.vcf
- elif [[ $b.vcf =~ "LCL6_3" ]]; then
- cp $b.vcf ${project}.LCL6_3.vcf
- elif [[ $b.vcf =~ "LCL7_1" ]]; then
- cp $b.vcf ${project}.LCL7_1.vcf
- elif [[ $b.vcf =~ "LCL7_2" ]]; then
- cp $b.vcf ${project}.LCL7_2.vcf
- elif [[ $b.vcf =~ "LCL7_3" ]]; then
- cp $b.vcf ${project}.LCL7_3.vcf
- elif [[ $b.vcf =~ "LCL8_1" ]]; then
- cp $b.vcf ${project}.LCL8_1.vcf
- elif [[ $b.vcf =~ "LCL8_2" ]]; then
- cp $b.vcf ${project}.LCL8_2.vcf
- elif [[ $b.vcf =~ "LCL8_3" ]]; then
- cp $b.vcf ${project}.LCL8_3.vcf
- fi
- done
-
- cat ${project}.LCL5_1.vcf | grep -v '#' > LCL5_1.body
- cat ${project}.LCL5_2.vcf | grep -v '#' > LCL5_2.body
- cat ${project}.LCL5_3.vcf | grep -v '#' > LCL5_3.body
-
- cat ${project}.LCL6_1.vcf | grep -v '#' | cut -f 10 > LCL6_1.body
- cat ${project}.LCL6_2.vcf | grep -v '#' | cut -f 10 > LCL6_2.body
- cat ${project}.LCL6_3.vcf | grep -v '#' | cut -f 10 > LCL6_3.body
-
- cat ${project}.LCL7_1.vcf | grep -v '#' | cut -f 10 > LCL7_1.body
- cat ${project}.LCL7_2.vcf | grep -v '#' | cut -f 10 > LCL7_2.body
- cat ${project}.LCL7_3.vcf | grep -v '#' | cut -f 10 > LCL7_3.body
-
- cat ${project}.LCL8_1.vcf | grep -v '#' | cut -f 10 > LCL8_1.body
- cat ${project}.LCL8_2.vcf | grep -v '#' | cut -f 10 > LCL8_2.body
- cat ${project}.LCL8_3.vcf | grep -v '#' | cut -f 10 > LCL8_3.body
-
- echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5\tLCL6\tLCL7\tLCL8" > header_name
-
- cat ${project}.LCL5_1.vcf | grep '##' | cat - header_name > header
-
- paste LCL5_1.body LCL6_1.body LCL7_1.body LCL8_1.body > family_1.body
- paste LCL5_2.body LCL6_2.body LCL7_2.body LCL8_2.body > family_2.body
- paste LCL5_3.body LCL6_3.body LCL7_3.body LCL8_3.body > family_3.body
-
- cat header family_1.body > ${project}.1.family.vcf
- cat header family_2.body > ${project}.2.family.vcf
- cat header family_3.body > ${project}.3.family.vcf
-
-
-
-
- >>>
-
- runtime {
- docker:docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
- output {
- Array[File] splited_vcf = glob("*.vcf")
- Array[File] family_vcf = glob("*.family.vcf")
- File LCL5_1 = "${project}.LCL5_1.vcf"
- File LCL5_2 = "${project}.LCL5_2.vcf"
- File LCL5_3 = "${project}.LCL5_3.vcf"
- File LCL6_1 = "${project}.LCL6_1.vcf"
- File LCL6_2 = "${project}.LCL6_2.vcf"
- File LCL6_3 = "${project}.LCL6_3.vcf"
- File LCL7_1 = "${project}.LCL7_1.vcf"
- File LCL7_2 = "${project}.LCL7_2.vcf"
- File LCL7_3 = "${project}.LCL7_3.vcf"
- File LCL8_1 = "${project}.LCL8_1.vcf"
- File LCL8_2 = "${project}.LCL8_2.vcf"
- File LCL8_3 = "${project}.LCL8_3.vcf"
- }
- }
|