Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

62 lines
2.4KB

  1. task quartet_mendelian {
  2. File summary_1
  3. File summary_2
  4. File summary_3
  5. File LCL5_hap_1
  6. File LCL5_hap_2
  7. File LCL5_hap_3
  8. File LCL6_hap_1
  9. File LCL6_hap_2
  10. File LCL6_hap_3
  11. File LCL7_hap_1
  12. File LCL7_hap_2
  13. File LCL7_hap_3
  14. File LCL8_hap_1
  15. File LCL8_hap_2
  16. File LCL8_hap_3
  17. String docker
  18. String project
  19. String cluster_config
  20. String disk_size
  21. command <<<
  22. cat ${summary_1} ${summary_2} ${summary_3} | grep -v 'Family'> mendelian.summary
  23. sed '1iFamily\tTotal_Variants\tMendelian_Concordant_Variants\tMendelian_Concordance_Rate' mendelian.summary > mendelian.txt
  24. cat mendelian.txt | grep 'INDEL' | cut -f4 | grep -v 'Mendelian_Concordance_Rate' | awk '{for(i=1;i<=NF;i++) {sum[i] += $i; sumsq[i] += ($i)^2}}
  25. END {for (i=1;i<=NF;i++) {
  26. printf "%f %f \n", sum[i]/NR, sqrt((sumsq[i]-sum[i]^2/NR)/NR)}
  27. }' >> quartet_indel_aver-std.txt
  28. cat mendelian.txt | grep 'SNV' | cut -f4 | grep -v 'Mendelian_Concordance_Rate' | awk '{for(i=1;i<=NF;i++) {sum[i] += $i; sumsq[i] += ($i)^2}}
  29. END {for (i=1;i<=NF;i++) {
  30. printf "%f %f \n", sum[i]/NR, sqrt((sumsq[i]-sum[i]^2/NR)/NR)}
  31. }' >> quartet_snv_aver-std.txt
  32. cat ${LCL5_hap_1} ${LCL5_hap_2} ${LCL5_hap_3} ${LCL6_hap_1} ${LCL6_hap_2} ${LCL6_hap_3} ${LCL7_hap_1} ${LCL7_hap_2} ${LCL7_hap_3} ${LCL8_hap_1} ${LCL8_hap_2} ${LCL8_hap_3} | grep ALL | sed s'/,/\t/g' > hap.summary
  33. sed '1i\Type\tFilter\tTRUTH.TOTAL\tTRUTH.TP\tTRUTH.FN\tQUERY.TOTAL\tQUERY.FP\tQUERY.UNK\tFP.gt\tMETRIC.Recall\tMETRIC.Precision\tMETRIC.Frac_NA\tMETRIC.F1_Score\tTRUTH.TOTAL.TiTv_ratio\tQUERY.TOTAL.TiTv_ratio\tTRUTH.TOTAL.het_hom_ratio\tQUERY.TOTAL.het_hom_ratio' hap.summary > precision_recall
  34. python /opt/hap_summary.py -hap precision_recall -name ${project}
  35. cat variants.calling.qc.txt | cut -f12- | grep -v 'SNV' | awk '{for(i=1;i<=NF;i++) {sum[i] += $i; sumsq[i] += ($i)^2}}
  36. END {for (i=1;i<=NF;i++) {
  37. printf "%f %f \n", sum[i]/NR, sqrt((sumsq[i]-sum[i]^2/NR)/NR)}
  38. }' >> reference_datasets_aver-std.txt
  39. >>>
  40. runtime {
  41. docker:docker
  42. cluster:cluster_config
  43. systemDisk:"cloud_ssd 40"
  44. dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
  45. }
  46. output {
  47. File mendelian_summary = "mendelian.txt"
  48. File snv_aver_std = "quartet_snv_aver-std.txt"
  49. File indel_aver_std = "quartet_indel_aver-std.txt"
  50. File pr = "precision_recall"
  51. File hap_summary = "variants.calling.qc.txt"
  52. File precision_recall_aver_std = "reference_datasets_aver-std.txt"
  53. }
  54. }