task CNVkit { String sample File ref_dir String fasta File ref_flat File regions File hrd File tumor_bam File tumor_bam_index File? normal_bam File? normal_bam_index String docker String cluster_config String disk_size command <<< set -o pipefail set -e nt=$(nproc) cnvkit.py access ${ref_dir}/${fasta} -o access.bed # Prepare the target bed cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed if [ ${normal_bam} ]; then cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed else cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed fi # For each sample... cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn if [ ${normal_bam} ]; then cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn # With paired or pooled normals cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn else # With no control sample cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed fi # For each tumor sample... cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr cnvkit.py segment ${sample}.cnr -o ${sample}.cns # Check noise cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats # Derive each segment's absolute integer copy number, ploidy must be int value PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'` cnvkit.py segmetrics ${sample}.cnr -s ${sample}.cns --ci -o ${sample}.segmetrics.cns cnvkit.py call ${sample}.segmetrics.cns --drop-low-coverage --filter ci -m threshold --purity $PURITY -o ${sample}.call.cns # Plot the results cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf # Genemetrics cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt # Filter genes cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt # # Scatter plot for each gene # mkdir gainloss # touch failed_genes.txt # for gene in `cat ${sample}.trusted_genes.txt` # do # cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf || echo $gene >> failed_genes.txt # done # Filter by trusted_genes awk 'NR==FNR {a[$1]=$2;next} NR!=FNR {if(FNR == 1 || (FNR != 1 && $1 in a)) print $0}' ${sample}.trusted_genes.txt ${sample}.ratio_cnv.txt > ${sample}.ratio_cnv.trusted.txt # Infer absolute CN (not adjust by purity) cnvkit.py call ${sample}.ratio_cnv.trusted.txt -m threshold -o ${sample}.ratio_cnv.call.txt awk '{if ($6 != 2) print $0}' ${sample}.ratio_cnv.call.txt > ${sample}.ratio_cnv.call.filter.txt >>> runtime { docker: docker cluster: cluster_config systemDisk: "cloud_ssd 40" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" } output { File scatter_pdf = "${sample}.scatter.pdf" File diagram_pdf = "${sample}.diagram.pdf" File heatmap_pdf = "${sample}.heatmap.pdf" File cnr = "${sample}.cnr" File cns = "${sample}.cns" File stats = "${sample}.stats" File call_cns = "${sample}.call.cns" File ratio_cnv = "${sample}.ratio_cnv.txt" File segment_cnv = "${sample}.segment_cnv.txt" File trusted_genes = "${sample}.trusted_genes.txt" File? failed_genes = "${sample}.failed_genes.txt" Array[File]? gainloss = glob("./gainloss/*") File ratio_cnv_trusted = "${sample}.ratio_cnv.trusted.txt" File ratio_cnv_trusted_call = "${sample}.ratio_cnv.call.txt" File ratio_cnv_trusted_call_filter = "${sample}.ratio_cnv.call.filter.txt" } }