|
- task CNVkit {
-
- String sample
- File ref_dir
- String fasta
- File ref_flat
- File regions
- File hrd
- File tumor_bam
- File tumor_bam_index
- File? normal_bam
- File? normal_bam_index
- String docker
- String cluster_config
- String disk_size
-
- command <<<
- set -o pipefail
- set -e
- nt=$(nproc)
-
- cnvkit.py access ${ref_dir}/${fasta} -o access.bed
- # Prepare the target bed
- cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
- if [ ${normal_bam} ]; then
- cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
- else
- cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
- fi
-
- # For each sample...
- cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
- cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
- if [ ${normal_bam} ]; then
- cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
- cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
- # With paired or pooled normals
- cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
- else
- # With no control sample
- cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
- fi
-
- # For each tumor sample...
- cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
- cnvkit.py segment ${sample}.cnr -o ${sample}.cns
-
- # Check noise
- cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
-
- # Derive each segment's absolute integer copy number, ploidy must be int value
- PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
- cnvkit.py call ${sample}.cns -y -m clonal --purity $PURITY -o ${sample}.call.cns
- cnvkit.py call ${sample}.cnr -y -m clonal --purity $PURITY -o ${sample}.call.cnr
-
- # Plot the results
- cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
- cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
- cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
-
- # Genemetrics
- mkdir gainloss
- cnvkit.py genemetrics ${sample}.call.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt
- cnvkit.py genemetrics ${sample}.call.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt
- # Filter genes
- cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt
- cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt
- comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt
- for gene in `cat ${sample}.trusted_genes.txt`
- do
- cnvkit.py scatter ${sample}.call.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf
- done
- >>>
-
- runtime {
- docker: docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output {
- File scatter_pdf = "${sample}.scatter.pdf"
- File diagram_pdf = "${sample}.diagram.pdf"
- File heatmap_pdf = "${sample}.heatmap.pdf"
- File cnr = "${sample}.cnr"
- File cns = "${sample}.cns"
- File stats = "${sample}.stats"
- File call_cnr = "${sample}.call.cnr"
- File call_cns = "${sample}.call.cns"
- File ratio_cnv = "${sample}.ratio_cnv.txt"
- File segment_cnv = "${sample}.segment_cnv.txt"
- File gainloss_genes = "${sample}.trusted_genes.txt"
- Array[File] gainloss = glob("./gainloss/*")
- }
- }
|