Bladeren bron

上传文件至 'tasks'

master
meng 2 jaren geleden
bovenliggende
commit
212ad73307
5 gewijzigde bestanden met toevoegingen van 276 en 0 verwijderingen
  1. +35
    -0
      tasks/ANNOVAR.wdl
  2. +36
    -0
      tasks/AnnotSV.wdl
  3. +66
    -0
      tasks/BQSR.wdl
  4. +108
    -0
      tasks/CNVkit.wdl
  5. +31
    -0
      tasks/bcftools.wdl

+ 35
- 0
tasks/ANNOVAR.wdl Bestand weergeven

@@ -0,0 +1,35 @@
task ANNOVAR {

File vcf
String basename = basename(vcf,".vcf")
File annovar_database
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/installations/annovar/table_annovar.pl ${vcf} \
${annovar_database} -buildver hg38 \
-out ${basename} -remove \
-protocol refGene,cytoBand,genomicSuperDups,clinvar_20220320,intervar_20180118,cosmic95_coding,cosmic95_noncoding,gnomad211_exome,dbnsfp42c,avsnp150 \
-operation g,r,r,f,f,f,f,f,f,f \
-nastring . -vcfinput -polish -thread $nt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File avinput = "${basename}.avinput"
File multianno_txt = "${basename}.hg38_multianno.txt"
File multianno_vcf = "${basename}.hg38_multianno.vcf"
}
}

+ 36
- 0
tasks/AnnotSV.wdl Bestand weergeven

@@ -0,0 +1,36 @@
task AnnotSV {
String sample
File somatic_vcf
File? germline_vcf
File annotsv_database
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
export ANNOTSV=/opt/AnnotSV
if [ ${somatic_vcf} ]; then
$ANNOTSV/bin/AnnotSV -SVinputFile ${somatic_vcf} -outputFile ${sample}.somatic.SV.annotated.tsv -genomeBuild GRCh38 -annotationsDir ${annotsv_database} -outputDir .
fi
if [ ${germline_vcf} ]; then
$ANNOTSV/bin/AnnotSV -SVinputFile ${germline_vcf} -outputFile ${sample}.germline.SV.annotated.tsv -genomeBuild GRCh38 -annotationsDir ${annotsv_database} -outputDir .
fi
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File AnnotSV_somatic_SV = "${sample}.somatic.SV.annotated.tsv"
File? AnnotSV_germline_SV = "${sample}.germline.SV.annotated.tsv"
}
}

+ 66
- 0
tasks/BQSR.wdl Bestand weergeven

@@ -0,0 +1,66 @@
task BQSR {
File ref_dir
File dbsnp_dir
File dbmills_dir
String sample
String SENTIEON_LICENSE
String fasta
String dbsnp
String db_mills
File deduped_bam
File deduped_bam_index
String docker
String cluster_config
String disk_size
File? regions
Int? interval_padding
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}"
else
INTERVAL=""
fi
sentieon driver -t $nt \
-r ${ref_dir}/${fasta} -i ${deduped_bam} \
$INTERVAL \
--algo QualCal \
-k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
${sample}_recal_data.table
sentieon driver -t $nt \
-r ${ref_dir}/${fasta} -i ${deduped_bam} -q ${sample}_recal_data.table \
--algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.recaled.bam
sentieon driver -t $nt --algo QualCal \
--plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv
sentieon plot bqsr -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File recal_table = "${sample}_recal_data.table"
File recal_post = "${sample}_recal_data.table.post"
File recaled_bam = "${sample}.sorted.deduped.recaled.bam"
File recaled_bam_index = "${sample}.sorted.deduped.recaled.bam.bai"
File recal_csv = "${sample}_recal_data.csv"
File bqsrreport_pdf = "${sample}_bqsrreport.pdf"
}
}

+ 108
- 0
tasks/CNVkit.wdl Bestand weergeven

@@ -0,0 +1,108 @@
task CNVkit {
String sample
File ref_dir
String fasta
File ref_flat
File regions
File hrd
File tumor_bam
File tumor_bam_index
File? normal_bam
File? normal_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
nt=$(nproc)
cnvkit.py access ${ref_dir}/${fasta} -o access.bed
# Prepare the target bed
cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
if [ ${normal_bam} ]; then
cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
else
cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
fi
# For each sample...
cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
if [ ${normal_bam} ]; then
cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
# With paired or pooled normals
cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
else
# With no control sample
cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
fi
# For each tumor sample...
cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
cnvkit.py segment ${sample}.cnr -o ${sample}.cns
# Check noise
cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
# Derive each segment's absolute integer copy number, ploidy must be int value
PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
cnvkit.py segmetrics ${sample}.cnr -s ${sample}.cns --ci -o ${sample}.segmetrics.cns
cnvkit.py call ${sample}.segmetrics.cns --drop-low-coverage --filter ci -m threshold --purity $PURITY -o ${sample}.call.cns
# Plot the results
cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
# Genemetrics
cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt
cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt
# Filter genes
cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt
cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt
comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt
# # Scatter plot for each gene
# mkdir gainloss
# touch failed_genes.txt
# for gene in `cat ${sample}.trusted_genes.txt`
# do
# cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf || echo $gene >> failed_genes.txt
# done
# Filter by trusted_genes
awk 'NR==FNR {a[$1]=$2;next} NR!=FNR {if(FNR == 1 || (FNR != 1 && $1 in a)) print $0}' ${sample}.trusted_genes.txt ${sample}.ratio_cnv.txt > ${sample}.ratio_cnv.trusted.txt
# Infer absolute CN (not adjust by purity)
cnvkit.py call ${sample}.ratio_cnv.trusted.txt -m threshold -o ${sample}.ratio_cnv.call.txt
awk '{if ($6 != 2) print $0}' ${sample}.ratio_cnv.call.txt > ${sample}.ratio_cnv.call.filter.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File scatter_pdf = "${sample}.scatter.pdf"
File diagram_pdf = "${sample}.diagram.pdf"
File heatmap_pdf = "${sample}.heatmap.pdf"
File cnr = "${sample}.cnr"
File cns = "${sample}.cns"
File stats = "${sample}.stats"
File call_cns = "${sample}.call.cns"
File ratio_cnv = "${sample}.ratio_cnv.txt"
File segment_cnv = "${sample}.segment_cnv.txt"
File trusted_genes = "${sample}.trusted_genes.txt"
File? failed_genes = "${sample}.failed_genes.txt"
Array[File]? gainloss = glob("./gainloss/*")
File ratio_cnv_trusted = "${sample}.ratio_cnv.trusted.txt"
File ratio_cnv_trusted_call = "${sample}.ratio_cnv.call.txt"
File ratio_cnv_trusted_call_filter = "${sample}.ratio_cnv.call.filter.txt"
}
}

+ 31
- 0
tasks/bcftools.wdl Bestand weergeven

@@ -0,0 +1,31 @@
task bcftools {
File ref_dir
String fasta
File vcf
String basename = basename(vcf,".vcf")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
# bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf
# Split multiallelic sites
bcftools norm -m -both ${vcf} -o ${basename}.norm.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File norm_vcf = "${basename}.norm.vcf"
}
}

Laden…
Annuleren
Opslaan