|
|
@@ -1,108 +0,0 @@ |
|
|
|
task CNVkit { |
|
|
|
|
|
|
|
String sample |
|
|
|
File ref_dir |
|
|
|
String fasta |
|
|
|
File ref_flat |
|
|
|
File regions |
|
|
|
File hrd |
|
|
|
File tumor_bam |
|
|
|
File tumor_bam_index |
|
|
|
File? normal_bam |
|
|
|
File? normal_bam_index |
|
|
|
String docker |
|
|
|
String cluster_config |
|
|
|
String disk_size |
|
|
|
|
|
|
|
command <<< |
|
|
|
set -o pipefail |
|
|
|
set -e |
|
|
|
nt=$(nproc) |
|
|
|
|
|
|
|
cnvkit.py access ${ref_dir}/${fasta} -o access.bed |
|
|
|
# Prepare the target bed |
|
|
|
cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed |
|
|
|
if [ ${normal_bam} ]; then |
|
|
|
cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed |
|
|
|
else |
|
|
|
cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed |
|
|
|
fi |
|
|
|
|
|
|
|
# For each sample... |
|
|
|
cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn |
|
|
|
cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn |
|
|
|
if [ ${normal_bam} ]; then |
|
|
|
cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn |
|
|
|
cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn |
|
|
|
# With paired or pooled normals |
|
|
|
cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn |
|
|
|
else |
|
|
|
# With no control sample |
|
|
|
cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed |
|
|
|
fi |
|
|
|
|
|
|
|
# For each tumor sample... |
|
|
|
cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr |
|
|
|
cnvkit.py segment ${sample}.cnr -o ${sample}.cns |
|
|
|
|
|
|
|
# Check noise |
|
|
|
cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats |
|
|
|
|
|
|
|
# Derive each segment's absolute integer copy number, ploidy must be int value |
|
|
|
PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'` |
|
|
|
cnvkit.py segmetrics ${sample}.cnr -s ${sample}.cns --ci -o ${sample}.segmetrics.cns |
|
|
|
cnvkit.py call ${sample}.segmetrics.cns --drop-low-coverage --filter ci -m threshold --purity $PURITY -o ${sample}.call.cns |
|
|
|
|
|
|
|
# Plot the results |
|
|
|
cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf |
|
|
|
cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf |
|
|
|
cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf |
|
|
|
|
|
|
|
# Genemetrics |
|
|
|
cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt |
|
|
|
cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt |
|
|
|
# Filter genes |
|
|
|
cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt |
|
|
|
cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt |
|
|
|
comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt |
|
|
|
|
|
|
|
# # Scatter plot for each gene |
|
|
|
# mkdir gainloss |
|
|
|
# touch failed_genes.txt |
|
|
|
# for gene in `cat ${sample}.trusted_genes.txt` |
|
|
|
# do |
|
|
|
# cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf || echo $gene >> failed_genes.txt |
|
|
|
# done |
|
|
|
|
|
|
|
# Filter by trusted_genes |
|
|
|
awk 'NR==FNR {a[$1]=$2;next} NR!=FNR {if(FNR == 1 || (FNR != 1 && $1 in a)) print $0}' ${sample}.trusted_genes.txt ${sample}.ratio_cnv.txt > ${sample}.ratio_cnv.trusted.txt |
|
|
|
# Infer absolute CN (not adjust by purity) |
|
|
|
cnvkit.py call ${sample}.ratio_cnv.trusted.txt -m threshold -o ${sample}.ratio_cnv.call.txt |
|
|
|
awk '{if ($6 != 2) print $0}' ${sample}.ratio_cnv.call.txt > ${sample}.ratio_cnv.call.filter.txt |
|
|
|
>>> |
|
|
|
|
|
|
|
runtime { |
|
|
|
docker: docker |
|
|
|
cluster: cluster_config |
|
|
|
systemDisk: "cloud_ssd 40" |
|
|
|
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" |
|
|
|
} |
|
|
|
|
|
|
|
output { |
|
|
|
File scatter_pdf = "${sample}.scatter.pdf" |
|
|
|
File diagram_pdf = "${sample}.diagram.pdf" |
|
|
|
File heatmap_pdf = "${sample}.heatmap.pdf" |
|
|
|
File cnr = "${sample}.cnr" |
|
|
|
File cns = "${sample}.cns" |
|
|
|
File stats = "${sample}.stats" |
|
|
|
File call_cns = "${sample}.call.cns" |
|
|
|
File ratio_cnv = "${sample}.ratio_cnv.txt" |
|
|
|
File segment_cnv = "${sample}.segment_cnv.txt" |
|
|
|
File trusted_genes = "${sample}.trusted_genes.txt" |
|
|
|
File? failed_genes = "${sample}.failed_genes.txt" |
|
|
|
Array[File]? gainloss = glob("./gainloss/*") |
|
|
|
File ratio_cnv_trusted = "${sample}.ratio_cnv.trusted.txt" |
|
|
|
File ratio_cnv_trusted_call = "${sample}.ratio_cnv.call.txt" |
|
|
|
File ratio_cnv_trusted_call_filter = "${sample}.ratio_cnv.call.filter.txt" |
|
|
|
} |
|
|
|
} |