task CNVkit { String sample File ref_dir String fasta File ref_flat File regions File hrd File tumor_bam File tumor_bam_index File? normal_bam File? normal_bam_index String docker String cluster_config String disk_size command <<< set -o pipefail set -e nt=$(nproc) cnvkit.py access ${ref_dir}/${fasta} -o access.bed # Prepare the target bed cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed if [ ${normal_bam} ]; then cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed else cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed fi # For each sample... cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn if [ ${normal_bam} ]; then cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn # With paired or pooled normals cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn else # With no control sample cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed fi # For each tumor sample... cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr cnvkit.py segment ${sample}.cnr -o ${sample}.cns # Check noise cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats # Derive each segment's absolute integer copy number, ploidy must be int value PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'` cnvkit.py call ${sample}.cns -y -m clonal --purity $PURITY -o ${sample}.call.cns cnvkit.py call ${sample}.cnr -y -m clonal --purity $PURITY -o ${sample}.call.cnr # Plot the results cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf # Genemetrics mkdir gainloss cnvkit.py genemetrics ${sample}.call.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt cnvkit.py genemetrics ${sample}.call.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt # Filter genes cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt for gene in `cat ${sample}.trusted_genes.txt` do cnvkit.py scatter ${sample}.call.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf done >>> runtime { docker: docker cluster: cluster_config systemDisk: "cloud_ssd 40" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" } output { File scatter_pdf = "${sample}.scatter.pdf" File diagram_pdf = "${sample}.diagram.pdf" File heatmap_pdf = "${sample}.heatmap.pdf" File cnr = "${sample}.cnr" File cns = "${sample}.cns" File stats = "${sample}.stats" File call_cnr = "${sample}.call.cnr" File call_cns = "${sample}.call.cns" File ratio_cnv = "${sample}.ratio_cnv.txt" File segment_cnv = "${sample}.segment_cnv.txt" File gainloss_genes = "${sample}.trusted_genes.txt" Array[File] gainloss = glob("./gainloss/*") } }