You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mendelian.wdl 2.6KB

5 年之前
5 年之前
5 年之前
5 年之前
5 年之前
5 年之前
5 年之前
5 年之前
5 年之前
5 年之前
5 年之前
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. task mendelian {
  2. File mother_vcf_gz
  3. File father_vcf_gz
  4. File child_vcf_gz
  5. File mother_vcf_idx
  6. File father_vcf_idx
  7. File child_vcf_idx
  8. String mother_name
  9. String father_name
  10. String child_name
  11. String family_name
  12. File sdf
  13. String docker
  14. String cluster_config
  15. String disk_size
  16. command <<<
  17. rtg vcfmerge -o family.vcf.gz ${child_vcf_gz} ${mother_vcf_gz} ${father_vcf_gz}
  18. rtg vcfannotate -i family.vcf.gz -o family.anno.vcf.gz \
  19. --add-header "##SAMPLE=<ID=${mother_name},Sex=FEMALE>" \
  20. --add-header "##SAMPLE=<ID=${father_name},Sex=MALE>" \
  21. --add-header "##SAMPLE=<ID=${child_name},Sex=FEMALE>" \
  22. --add-header "##PEDIGREE=<Child=${child_name},Mother=${mother_name},Father=${father_name}>"
  23. rtg mendelian -i family.anno.vcf.gz -t ${sdf} -o ${family_name}.family.anno.mendelian.vcf.gz --lenient
  24. zcat ${family_name}.family.anno.mendelian.vcf.gz | grep '#' > header
  25. zcat ${family_name}.family.anno.mendelian.vcf.gz | grep -v '#'| grep 'MCU\|MCV' | cat header - > ${family_name}.violation.vcf
  26. zcat ${family_name}.family.anno.mendelian.vcf.gz | grep -v '#'| grep -v 'MCV' | grep -v 'MCU' | cat header - > ${family_name}.consistent.vcf
  27. rtg bgzip ${family_name}.violation.vcf
  28. rtg index -f vcf ${family_name}.violation.vcf.gz
  29. rtg bgzip ${family_name}.consistent.vcf
  30. rtg index -f vcf ${family_name}.consistent.vcf.gz
  31. zcat ${child_vcf_gz} | grep '##' > child_header
  32. zcat ${family_name}.violation.vcf.gz | grep -v '##' | cut -f1-10 | awk '$10!~/\./' | cat header - > ${child_name}.violation.vcf
  33. zcat ${family_name}.consistent.vcf.gz | grep -v '##' | cut -f1-10 | awk '$10!~/\./' | cat header - > ${child_name}.consistent.vcf
  34. rtg bgzip ${child_name}.violation.vcf
  35. rtg index -f vcf ${child_name}.violation.vcf.gz
  36. rtg bgzip ${child_name}.consistent.vcf
  37. rtg index -f vcf ${child_name}.consistent.vcf.gz
  38. >>>
  39. runtime {
  40. docker:docker
  41. cluster: cluster_config
  42. systemDisk: "cloud_ssd 40"
  43. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  44. }
  45. output {
  46. File family_vcf_gz = "${family_name}.family.anno.mendelian.vcf.gz"
  47. File family_vcf_index = "${family_name}.family.anno.mendelian.vcf.gz.tbi"
  48. File family_violation_vcf_gz = "${family_name}.violation.vcf.gz"
  49. File family_violation_vcf_idx = "${family_name}.violation.vcf.gz.tbi"
  50. File family_consistent_vcf_gz = "${family_name}.consistent.vcf.gz"
  51. File family_consistent_vcf_idx = "${family_name}.consistent.vcf.gz.tbi"
  52. File violation_vcf_gz = "${child_name}.violation.vcf.gz"
  53. File violation_vcf_idx = "${child_name}.violation.vcf.gz.tbi"
  54. File consistent_vcf_gz = "${child_name}.consistent.vcf.gz"
  55. File consistent_vcf_idx = "${child_name}.consistent.vcf.gz.tbi"
  56. }
  57. }