"FASTQSCREENdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0", | "FASTQSCREENdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0", | ||||
"SENTIEON_LICENSE": "192.168.0.55:8990", | "SENTIEON_LICENSE": "192.168.0.55:8990", | ||||
"SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2019.11.28", | "SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2019.11.28", | ||||
"REPLACE_SENTIEON_DOCKER": "xxx", | |||||
"DEEPVARIANT_DOCKER": "xxx", | |||||
"QUALIMAPdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0", | "QUALIMAPdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0", | ||||
"GATK_DOCKER": "", | |||||
"vcf_D5": "", | "vcf_D5": "", | ||||
"benchmark_region": "oss://pgx-result/renluyao/manuscript_v3.0/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed", | "benchmark_region": "oss://pgx-result/renluyao/manuscript_v3.0/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed", | ||||
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf", | "db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf", | ||||
"dbsnp": "dbsnp_146.hg38.vcf", | "dbsnp": "dbsnp_146.hg38.vcf", | ||||
"MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | "DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | ||||
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||||
} | |||||
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/", | |||||
"pl": "ILLUMINA" | |||||
} |
"{{ project_name }}.fastq_1_D6": "{{ fastq_1_D6 }}", | "{{ project_name }}.fastq_1_D6": "{{ fastq_1_D6 }}", | ||||
"{{ project_name }}.fastq_1_F7": "{{ fastq_1_F7 }}", | "{{ project_name }}.fastq_1_F7": "{{ fastq_1_F7 }}", | ||||
"{{ project_name }}.SENTIEONdocker": "{{ SENTIEONdocker }}", | "{{ project_name }}.SENTIEONdocker": "{{ SENTIEONdocker }}", | ||||
"{{ project_name }}.REPLACE_SENTIEON_DOCKER": "{{ REPLACE_SENTIEON_DOCKER }}", | |||||
"{{ project_name }}.DEEPVARIANT_DOCKER": "{{ DEEPVARIANT_DOCKER }}", | |||||
"{{ project_name }}.QUALIMAPdocker": "{{ QUALIMAPdocker }}", | "{{ project_name }}.QUALIMAPdocker": "{{ QUALIMAPdocker }}", | ||||
"{{ project_name }}.vcf_D5": "{{ vcf_D5 }}", | "{{ project_name }}.vcf_D5": "{{ vcf_D5 }}", | ||||
"{{ project_name }}.benchmark_region": "{{ benchmark_region }}", | "{{ project_name }}.benchmark_region": "{{ benchmark_region }}", |
File ref_dir | File ref_dir | ||||
File dbsnp_dir | File dbsnp_dir | ||||
File dbmills_dir | File dbmills_dir | ||||
String SENTIEON_INSTALL_DIR | |||||
String fasta | String fasta | ||||
String dbsnp | String dbsnp | ||||
String db_mills | String db_mills | ||||
command <<< | command <<< | ||||
set -o pipefail | set -o pipefail | ||||
set -e | set -e | ||||
export SENTIEON_LICENSE=192.168.0.55:8990 | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} -q ${sample}_recal_data.table --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.realigned.recaled.bam | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --algo QualCal --plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon plot QualCal -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv | |||||
# export SENTIEON_LICENSE=192.168.0.55:8990 | |||||
# nt=$(nproc) | |||||
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table | |||||
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} -q ${sample}_recal_data.table --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.realigned.recaled.bam | |||||
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --algo QualCal --plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv | |||||
# ${SENTIEON_INSTALL_DIR}/bin/sentieon plot QualCal -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv | |||||
gatk BaseRecalibrator \ | |||||
-R ${ref_dir}/${fasta} \ | |||||
-I ${realigned_bam} \ | |||||
--known-sites ${dbsnp_dir}/${dbsnp} \ | |||||
--known-sites ${dbmills_dir}/${db_mills} \ | |||||
-O ${sample}_recal_data.table | |||||
gatk ApplyBQSR \ | |||||
-R ${ref_dir}/${fasta} \ | |||||
-I ${realigned_bam} \ | |||||
-bqsr ${sample}_recal_data.table \ | |||||
-O ${sample}.sorted.deduped.realigned.recaled.bam | |||||
samtools index -@ $(nproc) -o ${sample}.sorted.deduped.realigned.recaled.bam.bai ${sample}.sorted.deduped.realigned.recaled.bam | |||||
# gatk BaseRecalibrator \ | |||||
# -R ${ref_dir}/${fasta} \ | |||||
# -I ${sample}.sorted.deduped.realigned.recaled.bam \ | |||||
# --known-sites ${dbsnp_dir}/${dbsnp} \ | |||||
# --known-sites ${dbmills_dir}/${db_mills} \ | |||||
# -O ${sample}_recal_data.table.post | |||||
# gatk AnalyzeCovariates \ | |||||
# -before ${sample}_recal_data.table \ | |||||
# -after ${sample}_recal_data.table.post \ | |||||
# -csv ${sample}_recal_data.csv | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
docker:docker | docker:docker | ||||
} | } | ||||
output { | output { | ||||
File recal_table = "${sample}_recal_data.table" | |||||
File recal_post = "${sample}_recal_data.table.post" | |||||
# File recal_table = "${sample}_recal_data.table" | |||||
# File recal_post = "${sample}_recal_data.table.post" | |||||
File recaled_bam = "${sample}.sorted.deduped.realigned.recaled.bam" | File recaled_bam = "${sample}.sorted.deduped.realigned.recaled.bam" | ||||
File recaled_bam_index = "${sample}.sorted.deduped.realigned.recaled.bam.bai" | File recaled_bam_index = "${sample}.sorted.deduped.realigned.recaled.bam.bai" | ||||
File recal_csv = "${sample}_recal_data.csv" | |||||
File bqsrreport_pdf = "${sample}_bqsrreport.pdf" | |||||
# File recal_csv = "${sample}_recal_data.csv" | |||||
# File bqsrreport_pdf = "${sample}_bqsrreport.pdf" | |||||
} | } | ||||
} | } |
task Dedup { | task Dedup { | ||||
String SENTIEON_INSTALL_DIR | |||||
File sorted_bam | File sorted_bam | ||||
File sorted_bam_index | File sorted_bam_index | ||||
String sample = basename(sorted_bam,".sorted.bam") | String sample = basename(sorted_bam,".sorted.bam") | ||||
command <<< | command <<< | ||||
set -o pipefail | set -o pipefail | ||||
set -e | set -e | ||||
export SENTIEON_LICENSE=192.168.0.55:8990 | |||||
nt=$(nproc) | nt=$(nproc) | ||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam | |||||
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt | |||||
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam | |||||
java -jar picard.jar MarkDuplicates \ | |||||
-I ${sorted_bam} \ | |||||
-O ${sample}.sorted.deduped.bam | |||||
-M ${sample}_dedup_metrics.txt \ | |||||
--REMOVE_DUPLICATES | |||||
samtools index -@ $(nproc) -o ${sample}.sorted.deduped.bam.bai ${sample}.sorted.deduped.bam | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
docker:docker | docker:docker | ||||
} | } | ||||
output { | output { | ||||
File score = "${sample}_score.txt" | |||||
File dedup_metrics = "${sample}_dedup_metrics.txt" | |||||
# File score = "${sample}_score.txt" | |||||
# File dedup_metrics = "${sample}_dedup_metrics.txt" | |||||
File Dedup_bam = "${sample}.sorted.deduped.bam" | File Dedup_bam = "${sample}.sorted.deduped.bam" | ||||
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai" | File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai" | ||||
} | } |
task Haplotyper { | task Haplotyper { | ||||
File ref_dir | File ref_dir | ||||
String SENTIEON_INSTALL_DIR | |||||
String fasta | String fasta | ||||
File recaled_bam | File recaled_bam | ||||
File recaled_bam_index | File recaled_bam_index | ||||
command <<< | command <<< | ||||
set -o pipefail | set -o pipefail | ||||
set -e | set -e | ||||
export SENTIEON_LICENSE=192.168.0.55:8990 | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper ${sample}_hc.vcf | |||||
# export SENTIEON_LICENSE=192.168.0.55:8990 | |||||
# nt=$(nproc) | |||||
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper ${sample}_hc.vcf | |||||
/opt/deepvariant/bin/run_deepvariant \ | |||||
--model_type=WGS \ | |||||
--ref=${ref_dir}/${fasta} \ | |||||
--reads=${recaled_bam} \ | |||||
--output_vcf=${sample}_hc.vcf \ | |||||
--num_shards=$(nproc) | |||||
gatk IndexFeatureFile -I ${sample}_hc.vcf -O ${sample}_hc.vcf.idx | |||||
>>> | >>> | ||||
runtime { | runtime { |
command <<< | command <<< | ||||
set -o pipefail | set -o pipefail | ||||
set -e | set -e | ||||
export SENTIEON_LICENSE=192.168.0.55:8990 | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||||
# export SENTIEON_LICENSE=192.168.0.55:8990 | |||||
# nt=$(nproc) | |||||
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver / | |||||
# -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} / | |||||
# --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics / | |||||
# --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt / | |||||
# --algo QualDistribution ${sample}_deduped_qd_metrics.txt / | |||||
# --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt / | |||||
# --algo AlignmentStat ${sample}_deduped_aln_metrics.txt / | |||||
# --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt / | |||||
# --algo QualityYield ${sample}_deduped_QualityYield.txt / | |||||
# --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||||
# alignment stats | |||||
java -jar picard.jar CollectAlignmentSummaryMetrics / | |||||
-I ${Dedup_bam} / | |||||
-O ${sample}_deduped_aln_metrics.txt / | |||||
-R ${ref_dir}/${fasta} / | |||||
--VALIDATION_STRINGENCY LENIENT | |||||
# insert size stats | |||||
java -jar picard.jar CollectInsertSizeMetrics / | |||||
-I ${Dedup_bam} / | |||||
-O ${sample}_deduped_is_metrics.txt / | |||||
-H ${sample}_deduped_is_metrics.pdf | |||||
# quality yield stats | |||||
java -jar picard.jar CollectQualityYieldMetrics / | |||||
-I ${Dedup_bam} / | |||||
-O ${sample}_deduped_QualityYield.txt | |||||
java -jar picard.jar CollectWgsMetrics | |||||
-I ${Dedup_bam} / | |||||
-O ${sample}_deduped_WgsMetricsAlgo.txt / | |||||
-R ${ref_dir}/${fasta} / | |||||
--VALIDATION_STRINGENCY LENIENT | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
} | } | ||||
output { | output { | ||||
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary" | |||||
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics" | |||||
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | |||||
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||||
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||||
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||||
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||||
# File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary" | |||||
# File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics" | |||||
# File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | |||||
# File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
# File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | |||||
# File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||||
# File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||||
# File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||||
# File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||||
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | ||||
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | ||||
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" | File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" |
File fastq_1 | File fastq_1 | ||||
File fastq_2 | File fastq_2 | ||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String group | String group | ||||
String sample | String sample | ||||
String project | String project | ||||
command <<< | command <<< | ||||
set -o pipefail | set -o pipefail | ||||
set -e | set -e | ||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${user_define_name}_${project}_${sample}.sorted.bam -t $nt --sam2bam -i - | |||||
# export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
# nt=$(nproc) | |||||
# ${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${user_define_name}_${project}_${sample}.sorted.bam -t $nt --sam2bam -i - | |||||
bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $(nproc) -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} \ | |||||
| samtools view -bS -@ $(nproc) - \ | |||||
| samtools sort -@ $(nproc) -o ${user_define_name}_${project}_${sample}.sorted.bam - | |||||
samtools index -@ $(nproc) \ | |||||
-o ${user_define_name}_${project}_${sample}.sorted.bam.bai \ | |||||
${user_define_name}_${project}_${sample}.sorted.bam | |||||
>>> | >>> | ||||
runtime { | runtime { |
File is_metrics | File is_metrics | ||||
String sample = basename(quality_yield,"_deduped_QualityYield.txt") | String sample = basename(quality_yield,"_deduped_QualityYield.txt") | ||||
String docker | |||||
String cluster_config | String cluster_config | ||||
String disk_size | String disk_size | ||||
command <<< | command <<< | ||||
set -o pipefail | |||||
set -o pipefail | |||||
set -e | set -e | ||||
cat ${quality_yield} | sed -n '2,2p' > quality_yield.header | |||||
cat ${quality_yield} | sed -n '3,3p' > ${sample}.quality_yield | |||||
# cat ${quality_yield} | sed -n '2,2p' > quality_yield.header | |||||
# cat ${quality_yield} | sed -n '3,3p' > ${sample}.quality_yield | |||||
cat ${quality_yield} | sed -n '7,7p' > quality_yield.header | |||||
cat ${quality_yield} | sed -n '8,8p' > ${sample}.quality_yield | |||||
cat ${wgs_metrics_algo} | sed -n '2,2p' > wgs_metrics_algo.header | |||||
cat ${wgs_metrics_algo} | sed -n '3,3p' > ${sample}.wgs_metrics_algo | |||||
# cat ${wgs_metrics_algo} | sed -n '2,2p' > wgs_metrics_algo.header | |||||
# cat ${wgs_metrics_algo} | sed -n '3,3p' > ${sample}.wgs_metrics_algo | |||||
cat ${wgs_metrics_algo} | sed -n '7,7p' > wgs_metrics_algo.header | |||||
cat ${wgs_metrics_algo} | sed -n '8,8p' > ${sample}.wgs_metrics_algo | |||||
cat ${aln_metrics} | sed -n '2,2p' > aln_metrics.header | |||||
cat ${aln_metrics} | sed -n '5,5p' > ${sample}.aln_metrics | |||||
# cat ${aln_metrics} | sed -n '2,2p' > aln_metrics.header | |||||
# cat ${aln_metrics} | sed -n '5,5p' > ${sample}.aln_metrics | |||||
cat ${aln_metrics} | sed -n '7,7p' > aln_metrics.header | |||||
cat ${aln_metrics} | sed -n '10,10p' > ${sample}.aln_metrics | |||||
cat ${is_metrics} | sed -n '2,2p' > is_metrics.header | |||||
cat ${is_metrics} | sed -n '3,3p' > ${sample}.is_metrics | |||||
# cat ${is_metrics} | sed -n '2,2p' > is_metrics.header | |||||
# cat ${is_metrics} | sed -n '3,3p' > ${sample}.is_metrics | |||||
cat ${is_metrics} | sed -n '7,7p' > is_metrics.header | |||||
cat ${is_metrics} | sed -n '8,8p' > ${sample}.is_metrics | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
docker:docker | |||||
cluster:cluster_config | cluster:cluster_config | ||||
systemDisk:"cloud_ssd 40" | systemDisk:"cloud_ssd 40" | ||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" |