@@ -0,0 +1,35 @@ | |||
task ANNOVAR { | |||
File vcf | |||
String basename = basename(vcf,".vcf") | |||
File annovar_database | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
/installations/annovar/table_annovar.pl ${vcf} \ | |||
${annovar_database} -buildver hg38 \ | |||
-out ${basename} -remove \ | |||
-protocol refGene,cytoBand,genomicSuperDups,clinvar_20220320,intervar_20180118,cosmic95_coding,cosmic95_noncoding,gnomad211_exome,dbnsfp42c,avsnp150 \ | |||
-operation g,r,r,f,f,f,f,f,f,f \ | |||
-nastring . -vcfinput -polish -thread $nt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File avinput = "${basename}.avinput" | |||
File multianno_txt = "${basename}.hg38_multianno.txt" | |||
File multianno_vcf = "${basename}.hg38_multianno.vcf" | |||
} | |||
} |
@@ -0,0 +1,36 @@ | |||
task AnnotSV { | |||
String sample | |||
File somatic_vcf | |||
File? germline_vcf | |||
File annotsv_database | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
export ANNOTSV=/opt/AnnotSV | |||
if [ ${somatic_vcf} ]; then | |||
$ANNOTSV/bin/AnnotSV -SVinputFile ${somatic_vcf} -outputFile ${sample}.somatic.SV.annotated.tsv -genomeBuild GRCh38 -annotationsDir ${annotsv_database} -outputDir . | |||
fi | |||
if [ ${germline_vcf} ]; then | |||
$ANNOTSV/bin/AnnotSV -SVinputFile ${germline_vcf} -outputFile ${sample}.germline.SV.annotated.tsv -genomeBuild GRCh38 -annotationsDir ${annotsv_database} -outputDir . | |||
fi | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File AnnotSV_somatic_SV = "${sample}.somatic.SV.annotated.tsv" | |||
File? AnnotSV_germline_SV = "${sample}.germline.SV.annotated.tsv" | |||
} | |||
} |
@@ -0,0 +1,66 @@ | |||
task BQSR { | |||
File ref_dir | |||
File dbsnp_dir | |||
File dbmills_dir | |||
String sample | |||
String SENTIEON_LICENSE | |||
String fasta | |||
String dbsnp | |||
String db_mills | |||
File deduped_bam | |||
File deduped_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
File? regions | |||
Int? interval_padding | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
if [ ${regions} ]; then | |||
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}" | |||
else | |||
INTERVAL="" | |||
fi | |||
sentieon driver -t $nt \ | |||
-r ${ref_dir}/${fasta} -i ${deduped_bam} \ | |||
$INTERVAL \ | |||
--algo QualCal \ | |||
-k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \ | |||
${sample}_recal_data.table | |||
sentieon driver -t $nt \ | |||
-r ${ref_dir}/${fasta} -i ${deduped_bam} -q ${sample}_recal_data.table \ | |||
--algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \ | |||
${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.recaled.bam | |||
sentieon driver -t $nt --algo QualCal \ | |||
--plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv | |||
sentieon plot bqsr -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File recal_table = "${sample}_recal_data.table" | |||
File recal_post = "${sample}_recal_data.table.post" | |||
File recaled_bam = "${sample}.sorted.deduped.recaled.bam" | |||
File recaled_bam_index = "${sample}.sorted.deduped.recaled.bam.bai" | |||
File recal_csv = "${sample}_recal_data.csv" | |||
File bqsrreport_pdf = "${sample}_bqsrreport.pdf" | |||
} | |||
} |
@@ -0,0 +1,108 @@ | |||
task CNVkit { | |||
String sample | |||
File ref_dir | |||
String fasta | |||
File ref_flat | |||
File regions | |||
File hrd | |||
File tumor_bam | |||
File tumor_bam_index | |||
File? normal_bam | |||
File? normal_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
cnvkit.py access ${ref_dir}/${fasta} -o access.bed | |||
# Prepare the target bed | |||
cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed | |||
if [ ${normal_bam} ]; then | |||
cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed | |||
else | |||
cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed | |||
fi | |||
# For each sample... | |||
cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn | |||
cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn | |||
if [ ${normal_bam} ]; then | |||
cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn | |||
cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn | |||
# With paired or pooled normals | |||
cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn | |||
else | |||
# With no control sample | |||
cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed | |||
fi | |||
# For each tumor sample... | |||
cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr | |||
cnvkit.py segment ${sample}.cnr -o ${sample}.cns | |||
# Check noise | |||
cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats | |||
# Derive each segment's absolute integer copy number, ploidy must be int value | |||
PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'` | |||
cnvkit.py segmetrics ${sample}.cnr -s ${sample}.cns --ci -o ${sample}.segmetrics.cns | |||
cnvkit.py call ${sample}.segmetrics.cns --drop-low-coverage --filter ci -m threshold --purity $PURITY -o ${sample}.call.cns | |||
# Plot the results | |||
cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf | |||
cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf | |||
cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf | |||
# Genemetrics | |||
cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt | |||
cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt | |||
# Filter genes | |||
cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt | |||
cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt | |||
comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt | |||
# # Scatter plot for each gene | |||
# mkdir gainloss | |||
# touch failed_genes.txt | |||
# for gene in `cat ${sample}.trusted_genes.txt` | |||
# do | |||
# cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf || echo $gene >> failed_genes.txt | |||
# done | |||
# Filter by trusted_genes | |||
awk 'NR==FNR {a[$1]=$2;next} NR!=FNR {if(FNR == 1 || (FNR != 1 && $1 in a)) print $0}' ${sample}.trusted_genes.txt ${sample}.ratio_cnv.txt > ${sample}.ratio_cnv.trusted.txt | |||
# Infer absolute CN (not adjust by purity) | |||
cnvkit.py call ${sample}.ratio_cnv.trusted.txt -m threshold -o ${sample}.ratio_cnv.call.txt | |||
awk '{if ($6 != 2) print $0}' ${sample}.ratio_cnv.call.txt > ${sample}.ratio_cnv.call.filter.txt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File scatter_pdf = "${sample}.scatter.pdf" | |||
File diagram_pdf = "${sample}.diagram.pdf" | |||
File heatmap_pdf = "${sample}.heatmap.pdf" | |||
File cnr = "${sample}.cnr" | |||
File cns = "${sample}.cns" | |||
File stats = "${sample}.stats" | |||
File call_cns = "${sample}.call.cns" | |||
File ratio_cnv = "${sample}.ratio_cnv.txt" | |||
File segment_cnv = "${sample}.segment_cnv.txt" | |||
File trusted_genes = "${sample}.trusted_genes.txt" | |||
File? failed_genes = "${sample}.failed_genes.txt" | |||
Array[File]? gainloss = glob("./gainloss/*") | |||
File ratio_cnv_trusted = "${sample}.ratio_cnv.trusted.txt" | |||
File ratio_cnv_trusted_call = "${sample}.ratio_cnv.call.txt" | |||
File ratio_cnv_trusted_call_filter = "${sample}.ratio_cnv.call.filter.txt" | |||
} | |||
} |
@@ -0,0 +1,31 @@ | |||
task bcftools { | |||
File ref_dir | |||
String fasta | |||
File vcf | |||
String basename = basename(vcf,".vcf") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
# bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf | |||
# Split multiallelic sites | |||
bcftools norm -m -both ${vcf} -o ${basename}.norm.vcf | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File norm_vcf = "${basename}.norm.vcf" | |||
} | |||
} |