|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- task quartet_mendelian {
- File summary_1
- File summary_2
- File summary_3
- File LCL5_hap_1
- File LCL5_hap_2
- File LCL5_hap_3
- File LCL6_hap_1
- File LCL6_hap_2
- File LCL6_hap_3
- File LCL7_hap_1
- File LCL7_hap_2
- File LCL7_hap_3
- File LCL8_hap_1
- File LCL8_hap_2
- File LCL8_hap_3
- String docker
- String project
- String cluster_config
- String disk_size
-
- command <<<
- cat ${summary_1} ${summary_2} ${summary_3} | grep -v 'Family'> mendelian.summary
- sed '1iFamily\tTotal_Variants\tMendelian_Concordant_Variants\tMendelian_Concordance_Rate' mendelian.summary > mendelian.txt
-
- cat mendelian.txt | grep 'INDEL' | cut -f4 | grep -v 'Mendelian_Concordance_Rate' | awk '{for(i=1;i<=NF;i++) {sum[i] += $i; sumsq[i] += ($i)^2}}
- END {for (i=1;i<=NF;i++) {
- printf "%f %f \n", sum[i]/NR, sqrt((sumsq[i]-sum[i]^2/NR)/NR)}
- }' >> quartet_indel_aver-std.txt
-
- cat mendelian.txt | grep 'SNV' | cut -f4 | grep -v 'Mendelian_Concordance_Rate' | awk '{for(i=1;i<=NF;i++) {sum[i] += $i; sumsq[i] += ($i)^2}}
- END {for (i=1;i<=NF;i++) {
- printf "%f %f \n", sum[i]/NR, sqrt((sumsq[i]-sum[i]^2/NR)/NR)}
- }' >> quartet_snv_aver-std.txt
-
- cat ${LCL5_hap_1} ${LCL5_hap_2} ${LCL5_hap_3} ${LCL6_hap_1} ${LCL6_hap_2} ${LCL6_hap_3} ${LCL7_hap_1} ${LCL7_hap_2} ${LCL7_hap_3} ${LCL8_hap_1} ${LCL8_hap_2} ${LCL8_hap_3} | grep ALL | sed s'/,/\t/g' > hap.summary
- sed '1i\Type\tFilter\tTRUTH.TOTAL\tTRUTH.TP\tTRUTH.FN\tQUERY.TOTAL\tQUERY.FP\tQUERY.UNK\tFP.gt\tMETRIC.Recall\tMETRIC.Precision\tMETRIC.Frac_NA\tMETRIC.F1_Score\tTRUTH.TOTAL.TiTv_ratio\tQUERY.TOTAL.TiTv_ratio\tTRUTH.TOTAL.het_hom_ratio\tQUERY.TOTAL.het_hom_ratio' hap.summary > precision_recall
-
- python /opt/hap_summary.py -hap precision_recall -name ${project}
-
- cat variants.calling.qc.txt | cut -f12- | grep -v 'SNV' | awk '{for(i=1;i<=NF;i++) {sum[i] += $i; sumsq[i] += ($i)^2}}
- END {for (i=1;i<=NF;i++) {
- printf "%f %f \n", sum[i]/NR, sqrt((sumsq[i]-sum[i]^2/NR)/NR)}
- }' >> reference_datasets_aver-std.txt
-
- >>>
-
- runtime {
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
- }
- output {
- File mendelian_summary = "mendelian.txt"
- File snv_aver_std = "quartet_snv_aver-std.txt"
- File indel_aver_std = "quartet_indel_aver-std.txt"
- File pr = "precision_recall"
- File hap_summary = "variants.calling.qc.txt"
- File precision_recall_aver_std = "reference_datasets_aver-std.txt"
- }
- }
|