task CNVkit { String sample File tumor_bam File tumor_bam_index File? normal_bam File? normal_bam_index File regions File ref_dir String fasta File ref_flat String docker String cluster_config String disk_size command <<< set -o pipefail set -e nt=$(nproc) mkdir -p /cromwell_root/tmp/cnvkit cd /cromwell_root/tmp/cnvkit cnvkit.py access ${ref_dir}/${fasta} -o access.bed # Prepare the target bed cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed if [ ${normal_bam} ]; then cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed else cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed fi # For each sample... cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn if [ ${normal_bam} ]; then cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn # With paired or pooled normals cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn else # With no control sample cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed fi # For each tumor sample... cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr cnvkit.py segment ${sample}.cnr -o ${sample}.cns # Check noise cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats # Derive each segment's absolute integer copy number, ploidy must be int value purity=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'` cnvkit.py call ${sample}.cns -y -m clonal --purity ${purity} -o ${sample}.call.cns # Plot the results cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf # Genemetrics mkdir gainloss cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0 -m 0 -o ${sample}.cnv.txt # Filter genes cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio-genes.txt cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment-genes.txt cat ${sample}.ratio-genes.txt | tail -n+2 | cut -f1 | sort | uniq > ratio-genes.txt cat ${sample}.segment-genes.txt | tail -n+2 | cut -f1 | sort | uniq > segment-genes.txt comm -12 ratio-genes.txt segment-genes.txt > trusted_cnv_genes.txt for gene in `cat trusted_cnv_genes.txt` do cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf done >>> runtime { docker: docker cluster: cluster_config systemDisk: "cloud_ssd 40" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" } output { File scatter_pdf = "${sample}.scatter.pdf" File diagram_pdf = "${sample}.diagram.pdf" File heatmap_pdf = "${sample}.heatmap.pdf" File cnr = "${sample}.cnr" File cns = "${sample}.cns" File stats = "${sample}.stats" File call_cns = "${sample}.call.cns" File genemetrics = "${sample}.cnv.txt" File gainloss_genes = "/cromwell_root/tmp/cnvkit/trusted_cnv_genes.txt" Array[File] gainloss = glob("/cromwell_root/tmp/cnvkit/gainloss/*") } }