Germline & Somatic short variant discovery (SNVs + Indels) for WGS & WES.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

83 lines
2.1KB

  1. task Pindel {
  2. String sample_id
  3. File bam
  4. File bam_index
  5. File ref_dir
  6. String fasta
  7. String docker
  8. String cluster_config
  9. String disk_size
  10. command <<<
  11. set -o pipefail
  12. set -e
  13. nt=$(nproc)
  14. mkdir ./pindel_result/
  15. mkdir ./input
  16. cp ${bam} ./input
  17. bam_file_name=`echo ${bam}|awk -F "/" '{print $NF}'`
  18. samtools index -@ 4 ./input/$bam_file_name
  19. java "-Xmx16G" -jar /software/picard/picard.jar CollectInsertSizeMetrics \
  20. -H ./pindel_result/${sample_id}_picard.pdf \
  21. -I ./input/$bam_file_name \
  22. -O ./pindel_result/${sample_id}_picard.txt
  23. temp_mean_insert_size=`cat pindel_result/${sample_id}_picard.txt|sed -n '8p'|cut -f 6|cut -d . -f 1`
  24. if [ $temp_mean_insert_size -lt 151 ];then
  25. mean_insert_size=151
  26. else
  27. mean_insert_size=`echo $temp_mean_insert_size`
  28. fi
  29. bam_file_name=`echo ${bam}|awk -F "/" '{print $NF}'`
  30. echo -e "./input/$bam_file_name\t$mean_insert_size\t${sample_id}" > ${sample_id}_config.txt
  31. pindel -i ${sample_id}_config.txt \
  32. -f ${ref_dir}/${fasta} \
  33. -o ./pindel_result/${sample_id} \
  34. -c all \
  35. -T $nt \
  36. -x 4 \
  37. -l \
  38. -B 0 \
  39. -M 3 \
  40. -J /software/picard/hg38_ucsc_centromere.bed
  41. grep "ChrID" pindel_result/${sample_id}_SI > pindel_result/${sample_id}_all_indel
  42. grep "ChrID" pindel_result/${sample_id}_D >> pindel_result/${sample_id}_all_indel
  43. awk -v chrID="chr1" '$8==chrID {print}' pindel_result/${sample_id}_all_indel > pindel_result/${sample_id}_indel
  44. for i in `seq 2 22` X Y M
  45. do
  46. awk -v chrID=chr$i '$8==chrID {print}' pindel_result/${sample_id}_all_indel >> pindel_result/${sample_id}_indel
  47. done
  48. pindel2vcf -r ${ref_dir}/${fasta} \
  49. -R GRCh38.d1.vd1 \
  50. -d GDC \
  51. -p pindel_result/${sample_id}_indel \
  52. -v pindel_result/${sample_id}.pindel.indel.vcf
  53. >>>
  54. runtime {
  55. docker: docker
  56. cluster: cluster_config
  57. systemDisk: "cloud_ssd 40"
  58. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  59. }
  60. output {
  61. Array[File] pindel_result = glob("./pindel_result/${sample_id}*")
  62. }
  63. }