Selaa lähdekoodia

only wgs

tags/v0.1.1
LUYAO REN 3 vuotta sitten
vanhempi
commit
03a5ee2407
6 muutettua tiedostoa jossa 53 lisäystä ja 132 poistoa
  1. +9
    -23
      tasks/benchmark.wdl
  2. +0
    -44
      tasks/corealigner.wdl
  3. +1
    -6
      tasks/deduped_Metrics.wdl
  4. +25
    -0
      tasks/filter_vcf.wdl
  5. +1
    -7
      tasks/qualimap.wdl
  6. +17
    -52
      workflow.wdl

+ 9
- 23
tasks/benchmark.wdl Näytä tiedosto

@@ -2,7 +2,6 @@ task benchmark {
File filtered_vcf
File benchmarking_dir
File ref_dir
File? qc_bed
String sample = basename(filtered_vcf,".filtered.vcf")
String fasta
String docker
@@ -23,29 +22,16 @@ task benchmark {
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${filtered_vcf} -c > ${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz

if [ ${qc_bed} ];then
if [[ ${sample} =~ "LCL5" ]];then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f ${qc_bed}--threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL6" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f ${qc_bed} --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL7" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f ${qc_bed} --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL8" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f ${qc_bed} --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
else
echo "only for quartet samples"
fi
if [[ ${sample} =~ "LCL5" ]];then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL6" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL7" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL8" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
else
if [[ ${sample} =~ "LCL5" ]];then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL6" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL7" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL8" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
else
echo "only for quartet samples"
echo "only for quartet samples"
fi
>>>


+ 0
- 44
tasks/corealigner.wdl Näytä tiedosto

@@ -1,44 +0,0 @@
task corealigner {
File ref_dir
File dbsnp_dir
File dbmills_dir

String sample
String SENTIEON_INSTALL_DIR
String docker
String cluster_config
String fasta

String dbsnp
String db_mills
File tumor_recaled_bam
File tumor_recaled_bam_index
File normal_recaled_bam
File normal_recaled_bam_index
String disk_size


command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=192.168.0.55:8990
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${tumor_recaled_bam} -i ${normal_recaled_bam} --algo Realigner -k ${db_mills} -k ${dbsnp} ${sample}_corealigned.bam
>>>
runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File corealigner_bam = "${sample}_corealigned.bam"
File corealigner_bam_index = "${sample}_corealigned.bam.bai"
}
}




+ 1
- 6
tasks/deduped_Metrics.wdl Näytä tiedosto

@@ -1,7 +1,6 @@
task deduped_Metrics {

File ref_dir
File? bed
String SENTIEON_INSTALL_DIR
String sample
String fasta
@@ -17,11 +16,7 @@ task deduped_Metrics {
set -e
export SENTIEON_LICENSE=192.168.0.55:8990
nt=$(nproc)
if [ ${bed} ]; then
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --interval ${bed} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
else
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
fi
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
>>>

runtime {

+ 25
- 0
tasks/filter_vcf.wdl Näytä tiedosto

@@ -0,0 +1,25 @@
task filter_vcf {
File vcf
String project
String docker
String cluster_config
String disk_size
command <<<

cat ${vcf} | grep '#' > header
cat ${vcf} | grep -v '#' > body
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
cat header body.filtered > ${project}.filtered.vcf
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File filtered_vcf = "${project}.filtered.vcf"
}
}

+ 1
- 7
tasks/qualimap.wdl Näytä tiedosto

@@ -1,7 +1,6 @@
task qualimap {
File bam
File bai
File? bed
String bamname = basename(bam,".bam")
String docker
String cluster_config
@@ -12,12 +11,7 @@ task qualimap {
set -e
nt=$(nproc)

if [ ${bed} ]; then
awk 'BEGIN{OFS="\t"}{sub("\r","",$3);print $1,$2,$3,"",0,"."}' ${bed} > new.bed
/opt/qualimap/qualimap bamqc -bam ${bam} -gff new.bed -outformat PDF:HTML -nt $nt -outdir ${bamname} --java-mem-size=60G
else
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname} --java-mem-size=60G
fi
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname} --java-mem-size=60G

tar -zcvf ${bamname}_qualimap.zip ${bamname}
>>>

+ 17
- 52
workflow.wdl Näytä tiedosto

@@ -16,7 +16,7 @@ import "./tasks/quartet_mendelian.wdl" as quartet_mendelian
import "./tasks/fastqc.wdl" as fastqc
import "./tasks/fastqscreen.wdl" as fastqscreen
import "./tasks/merge_family.wdl" as merge_family
import "./tasks/filter_vcf_bed.wdl" as filter_vcf_bed
import "./tasks/filter_vcf.wdl" as filter_vcf


workflow {{ project_name }} {
@@ -36,8 +36,6 @@ workflow {{ project_name }} {
File? vcf_F7
File? vcf_M8

File? bed

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String SENTIEONdocker
@@ -60,7 +58,6 @@ workflow {{ project_name }} {
File screen_ref_dir
File fastq_screen_conf
File benchmarking_dir
File benchmark_region

String project

@@ -119,7 +116,6 @@ workflow {{ project_name }} {

call qualimap.qualimap as qualimap_D5 {
input:
bed=bed,
bam=Dedup_D5.Dedup_bam,
bai=Dedup_D5.Dedup_bam_index,
docker=QUALIMAPdocker,
@@ -131,7 +127,6 @@ workflow {{ project_name }} {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
fasta=fasta,
bed=bed,
ref_dir=ref_dir,
Dedup_bam=Dedup_D5.Dedup_bam,
Dedup_bam_index=Dedup_D5.Dedup_bam_index,
@@ -198,11 +193,9 @@ workflow {{ project_name }} {
cluster_config=BIGcluster_config
}

call filter_vcf_bed.filter_vcf_bed as filter_vcf_bed_D5 {
call filter_vcf.filter_vcf as filter_vcf_D5 {
input:
vcf=Haplotyper_D5.vcf,
bed=bed,
benchmark_region=benchmark_region,
project=project,
docker=BEDTOOLSdocker,
cluster_config=SMALLcluster_config,
@@ -211,10 +204,9 @@ workflow {{ project_name }} {

call benchmark.benchmark as benchmark_D5 {
input:
filtered_vcf=filter_vcf_bed_D5.filtered_vcf,
filtered_vcf=filter_vcf_D5.filtered_vcf,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
qc_bed=filter_vcf_bed_D5.filtered_bed,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
@@ -270,7 +262,6 @@ workflow {{ project_name }} {

call qualimap.qualimap as qualimap_D6 {
input:
bed=bed,
bam=Dedup_D6.Dedup_bam,
bai=Dedup_D6.Dedup_bam_index,
docker=QUALIMAPdocker,
@@ -282,7 +273,6 @@ workflow {{ project_name }} {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
fasta=fasta,
bed=bed,
ref_dir=ref_dir,
Dedup_bam=Dedup_D6.Dedup_bam,
Dedup_bam_index=Dedup_D6.Dedup_bam_index,
@@ -349,11 +339,9 @@ workflow {{ project_name }} {
cluster_config=BIGcluster_config
}

call filter_vcf_bed.filter_vcf_bed as filter_vcf_bed_D6 {
call filter_vcf.filter_vcf as filter_vcf_D6 {
input:
vcf=Haplotyper_D6.vcf,
bed=bed,
benchmark_region=benchmark_region,
project=project,
docker=BEDTOOLSdocker,
cluster_config=SMALLcluster_config,
@@ -362,10 +350,9 @@ workflow {{ project_name }} {

call benchmark.benchmark as benchmark_D6 {
input:
filtered_vcf=filter_vcf_bed_D6.filtered_vcf,
filtered_vcf=filter_vcf_D6.filtered_vcf,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
qc_bed=filter_vcf_bed_D6.filtered_bed,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
@@ -421,7 +408,6 @@ workflow {{ project_name }} {

call qualimap.qualimap as qualimap_F7 {
input:
bed=bed,
bam=Dedup_F7.Dedup_bam,
bai=Dedup_F7.Dedup_bam_index,
docker=QUALIMAPdocker,
@@ -434,7 +420,6 @@ workflow {{ project_name }} {
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
fasta=fasta,
ref_dir=ref_dir,
bed=bed,
Dedup_bam=Dedup_F7.Dedup_bam,
Dedup_bam_index=Dedup_F7.Dedup_bam_index,
sample="F7",
@@ -500,11 +485,9 @@ workflow {{ project_name }} {
cluster_config=BIGcluster_config
}

call filter_vcf_bed.filter_vcf_bed as filter_vcf_bed_F7 {
call filter_vcf.filter_vcf as filter_vcf_F7 {
input:
vcf=Haplotyper_F7.vcf,
bed=bed,
benchmark_region=benchmark_region,
project=project,
docker=BEDTOOLSdocker,
cluster_config=SMALLcluster_config,
@@ -513,10 +496,9 @@ workflow {{ project_name }} {

call benchmark.benchmark as benchmark_F7 {
input:
filtered_vcf=filter_vcf_bed_F7.filtered_vcf,
filtered_vcf=filter_vcf_F7.filtered_vcf,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
qc_bed=filter_vcf_bed_F7.filtered_bed,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
@@ -572,7 +554,6 @@ workflow {{ project_name }} {

call qualimap.qualimap as qualimap_M8 {
input:
bed=bed,
bam=Dedup_M8.Dedup_bam,
bai=Dedup_M8.Dedup_bam_index,
docker=QUALIMAPdocker,
@@ -585,7 +566,6 @@ workflow {{ project_name }} {
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
fasta=fasta,
ref_dir=ref_dir,
bed=bed,
Dedup_bam=Dedup_M8.Dedup_bam,
Dedup_bam_index=Dedup_M8.Dedup_bam_index,
sample="M8",
@@ -651,11 +631,9 @@ workflow {{ project_name }} {
cluster_config=BIGcluster_config
}

call filter_vcf_bed.filter_vcf_bed as filter_vcf_bed_M8 {
call filter_vcf.filter_vcf as filter_vcf_M8 {
input:
vcf=Haplotyper_M8.vcf,
bed=bed,
benchmark_region=benchmark_region,
project=project,
docker=BEDTOOLSdocker,
cluster_config=SMALLcluster_config,
@@ -664,10 +642,9 @@ workflow {{ project_name }} {

call benchmark.benchmark as benchmark_M8 {
input:
filtered_vcf=filter_vcf_bed_M8.filtered_vcf,
filtered_vcf=filter_vcf_M8.filtered_vcf,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
qc_bed=filter_vcf_bed_M8.filtered_bed,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
@@ -751,11 +728,9 @@ workflow {{ project_name }} {
}

if (vcf_D5!= "") {
call filter_vcf_bed.filter_vcf_bed as filter_vcf_bed_D5_vcf {
call filter_vcf.filter_vcf as filter_vcf_D5_vcf {
input:
vcf=vcf_D5,
bed=bed,
benchmark_region=benchmark_region,
project=project,
docker=BEDTOOLSdocker,
cluster_config=SMALLcluster_config,
@@ -764,21 +739,18 @@ workflow {{ project_name }} {

call benchmark.benchmark as benchmark_D5_vcf {
input:
filtered_vcf=filter_vcf_bed_D5_vcf.filtered_vcf,
filtered_vcf=filter_vcf_D5_vcf.filtered_vcf,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
qc_bed=filter_vcf_bed_D5.filtered_bed,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
}

call filter_vcf_bed.filter_vcf_bed as filter_vcf_bed_D6_vcf {
call filter_vcf.filter_vcf as filter_vcf_D6_vcf {
input:
vcf=vcf_D6,
bed=bed,
benchmark_region=benchmark_region,
project=project,
docker=BEDTOOLSdocker,
cluster_config=SMALLcluster_config,
@@ -787,21 +759,18 @@ workflow {{ project_name }} {

call benchmark.benchmark as benchmark_D6_vcf {
input:
filtered_vcf=filter_vcf_bed_D6.filtered_vcf,
filtered_vcf=filter_vcf_D6_vcf.filtered_vcf,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
qc_bed=filter_vcf_bed_D6.filtered_bed,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
}

call filter_vcf_bed.filter_vcf_bed as filter_vcf_bed_F7_vcf {
call filter_vcf.filter_vcf as filter_vcf_F7_vcf {
input:
vcf=vcf_F7,
bed=bed,
benchmark_region=benchmark_region,
project=project,
docker=BEDTOOLSdocker,
cluster_config=SMALLcluster_config,
@@ -810,21 +779,18 @@ workflow {{ project_name }} {

call benchmark.benchmark as benchmark_F7_vcf {
input:
filtered_vcf=filter_vcf_bed_F7_vcf.filtered_vcf,
filtered_vcf=filter_vcf_F7_vcf.filtered_vcf,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
qc_bed=filter_vcf_bed_F7.filtered_bed,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
}

call filter_vcf_bed.filter_vcf_bed as filter_vcf_bed_M8_vcf {
call filter_vcf.filter_vcf as filter_vcf_M8_vcf {
input:
vcf=vcf_M8,
bed=bed,
benchmark_region=benchmark_region,
project=project,
docker=BEDTOOLSdocker,
cluster_config=SMALLcluster_config,
@@ -833,10 +799,9 @@ workflow {{ project_name }} {

call benchmark.benchmark as benchmark_M8_vcf {
input:
filtered_vcf=filter_vcf_bed_M8_vcf.filtered_vcf,
filtered_vcf=filter_vcf_M8_vcf.filtered_vcf,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
qc_bed=filter_vcf_bed_M8.filtered_bed,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,

Loading…
Peruuta
Tallenna