You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 satır
1.2KB

  1. task split_gvcf_files {
  2. File gvcf
  3. String project
  4. String docker
  5. String cluster_config
  6. String disk_size
  7. command <<<
  8. cat ${gvcf} | grep '#CHROM' | sed s'/\t/\n/g' > name
  9. ncol=`cat name | wc -l`
  10. sed -i '1,9d' name
  11. cat ${gvcf} | grep '#' > header
  12. cat ${gvcf} | grep -v '#' > body
  13. cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
  14. cat header body.filtered > ${project}.filtered.g.vcf
  15. for i in $(seq 10 $ncol); do cat ${project}.filtered.g.vcf | cut -f1-9,$i > $i.splited.vcf; done
  16. ls *splited.vcf | sort -n | paste - name > rename
  17. cat rename | while read a b
  18. do
  19. mv $a $b.splited.vcf
  20. sample=$(echo $b | cut -f6 -d_)
  21. rep=$(echo $b | cut -f7 -d_)
  22. echo $sample >> quartet_sample
  23. echo $rep >> quartet_rep
  24. done
  25. python /opt/how_many_samples.py -sample quartet_sample -rep quartet_rep
  26. >>>
  27. runtime {
  28. docker:docker
  29. cluster: cluster_config
  30. systemDisk: "cloud_ssd 40"
  31. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  32. }
  33. output {
  34. Array[File] splited_vcf = glob("*.splited.vcf")
  35. File sister_tag = "sister_tag"
  36. File quartet_tag = "quartet_tag"
  37. }
  38. }