ソースを参照

test modification

tags/v0.3.0
Haonan917 1年前
コミット
55ababb512
9個のファイルの変更710行の追加649行の削除
  1. +6
    -2
      defaults
  2. +2
    -0
      inputs
  3. +36
    -14
      tasks/BQSR.wdl
  4. +12
    -7
      tasks/Dedup.wdl
  5. +12
    -5
      tasks/Haplotyper.wdl
  6. +46
    -12
      tasks/deduped_Metrics.wdl
  7. +10
    -5
      tasks/mapping.wdl
  8. +17
    -11
      tasks/sentieon.wdl
  9. +569
    -593
      workflow.wdl

+ 6
- 2
defaults ファイルの表示

@@ -17,12 +17,16 @@
"FASTQSCREENdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
"SENTIEON_LICENSE": "192.168.0.55:8990",
"SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2019.11.28",
"REPLACE_SENTIEON_DOCKER": "xxx",
"DEEPVARIANT_DOCKER": "xxx",
"QUALIMAPdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
"GATK_DOCKER": "",
"vcf_D5": "",
"benchmark_region": "oss://pgx-result/renluyao/manuscript_v3.0/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed",
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf",
"dbsnp": "dbsnp_146.hg38.vcf",
"MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4",
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/"
}
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/",
"pl": "ILLUMINA"
}

+ 2
- 0
inputs ファイルの表示

@@ -28,6 +28,8 @@
"{{ project_name }}.fastq_1_D6": "{{ fastq_1_D6 }}",
"{{ project_name }}.fastq_1_F7": "{{ fastq_1_F7 }}",
"{{ project_name }}.SENTIEONdocker": "{{ SENTIEONdocker }}",
"{{ project_name }}.REPLACE_SENTIEON_DOCKER": "{{ REPLACE_SENTIEON_DOCKER }}",
"{{ project_name }}.DEEPVARIANT_DOCKER": "{{ DEEPVARIANT_DOCKER }}",
"{{ project_name }}.QUALIMAPdocker": "{{ QUALIMAPdocker }}",
"{{ project_name }}.vcf_D5": "{{ vcf_D5 }}",
"{{ project_name }}.benchmark_region": "{{ benchmark_region }}",

+ 36
- 14
tasks/BQSR.wdl ファイルの表示

@@ -3,7 +3,6 @@ task BQSR {
File ref_dir
File dbsnp_dir
File dbmills_dir
String SENTIEON_INSTALL_DIR
String fasta
String dbsnp
String db_mills
@@ -18,17 +17,40 @@ task BQSR {
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=192.168.0.55:8990
nt=$(nproc)

${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table

${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} -q ${sample}_recal_data.table --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.realigned.recaled.bam

${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --algo QualCal --plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv

${SENTIEON_INSTALL_DIR}/bin/sentieon plot QualCal -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv
# export SENTIEON_LICENSE=192.168.0.55:8990
# nt=$(nproc)
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} -q ${sample}_recal_data.table --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.realigned.recaled.bam
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --algo QualCal --plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv
# ${SENTIEON_INSTALL_DIR}/bin/sentieon plot QualCal -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv

gatk BaseRecalibrator \
-R ${ref_dir}/${fasta} \
-I ${realigned_bam} \
--known-sites ${dbsnp_dir}/${dbsnp} \
--known-sites ${dbmills_dir}/${db_mills} \
-O ${sample}_recal_data.table
gatk ApplyBQSR \
-R ${ref_dir}/${fasta} \
-I ${realigned_bam} \
-bqsr ${sample}_recal_data.table \
-O ${sample}.sorted.deduped.realigned.recaled.bam
samtools index -@ $(nproc) -o ${sample}.sorted.deduped.realigned.recaled.bam.bai ${sample}.sorted.deduped.realigned.recaled.bam
# gatk BaseRecalibrator \
# -R ${ref_dir}/${fasta} \
# -I ${sample}.sorted.deduped.realigned.recaled.bam \
# --known-sites ${dbsnp_dir}/${dbsnp} \
# --known-sites ${dbmills_dir}/${db_mills} \
# -O ${sample}_recal_data.table.post
# gatk AnalyzeCovariates \
# -before ${sample}_recal_data.table \
# -after ${sample}_recal_data.table.post \
# -csv ${sample}_recal_data.csv
>>>
runtime {
docker:docker
@@ -38,11 +60,11 @@ task BQSR {
}

output {
File recal_table = "${sample}_recal_data.table"
File recal_post = "${sample}_recal_data.table.post"
# File recal_table = "${sample}_recal_data.table"
# File recal_post = "${sample}_recal_data.table.post"
File recaled_bam = "${sample}.sorted.deduped.realigned.recaled.bam"
File recaled_bam_index = "${sample}.sorted.deduped.realigned.recaled.bam.bai"
File recal_csv = "${sample}_recal_data.csv"
File bqsrreport_pdf = "${sample}_bqsrreport.pdf"
# File recal_csv = "${sample}_recal_data.csv"
# File bqsrreport_pdf = "${sample}_bqsrreport.pdf"
}
}

+ 12
- 7
tasks/Dedup.wdl ファイルの表示

@@ -1,6 +1,4 @@
task Dedup {

String SENTIEON_INSTALL_DIR
File sorted_bam
File sorted_bam_index
String sample = basename(sorted_bam,".sorted.bam")
@@ -12,10 +10,17 @@ task Dedup {
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=192.168.0.55:8990
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam
java -jar picard.jar MarkDuplicates \
-I ${sorted_bam} \
-O ${sample}.sorted.deduped.bam
-M ${sample}_dedup_metrics.txt \
--REMOVE_DUPLICATES
samtools index -@ $(nproc) -o ${sample}.sorted.deduped.bam.bai ${sample}.sorted.deduped.bam
>>>
runtime {
docker:docker
@@ -25,8 +30,8 @@ task Dedup {
}

output {
File score = "${sample}_score.txt"
File dedup_metrics = "${sample}_dedup_metrics.txt"
# File score = "${sample}_score.txt"
# File dedup_metrics = "${sample}_dedup_metrics.txt"
File Dedup_bam = "${sample}.sorted.deduped.bam"
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai"
}

+ 12
- 5
tasks/Haplotyper.wdl ファイルの表示

@@ -1,7 +1,5 @@
task Haplotyper {
File ref_dir
String SENTIEON_INSTALL_DIR
String fasta
File recaled_bam
File recaled_bam_index
@@ -13,9 +11,18 @@ task Haplotyper {
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=192.168.0.55:8990
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper ${sample}_hc.vcf
# export SENTIEON_LICENSE=192.168.0.55:8990
# nt=$(nproc)
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper ${sample}_hc.vcf

/opt/deepvariant/bin/run_deepvariant \
--model_type=WGS \
--ref=${ref_dir}/${fasta} \
--reads=${recaled_bam} \
--output_vcf=${sample}_hc.vcf \
--num_shards=$(nproc)
gatk IndexFeatureFile -I ${sample}_hc.vcf -O ${sample}_hc.vcf.idx
>>>
runtime {

+ 46
- 12
tasks/deduped_Metrics.wdl ファイルの表示

@@ -14,9 +14,43 @@ task deduped_Metrics {
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=192.168.0.55:8990
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
# export SENTIEON_LICENSE=192.168.0.55:8990
# nt=$(nproc)
# ${SENTIEON_INSTALL_DIR}/bin/sentieon driver /
# -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} /
# --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics /
# --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt /
# --algo QualDistribution ${sample}_deduped_qd_metrics.txt /
# --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt /
# --algo AlignmentStat ${sample}_deduped_aln_metrics.txt /
# --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt /
# --algo QualityYield ${sample}_deduped_QualityYield.txt /
# --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
# alignment stats
java -jar picard.jar CollectAlignmentSummaryMetrics /
-I ${Dedup_bam} /
-O ${sample}_deduped_aln_metrics.txt /
-R ${ref_dir}/${fasta} /
--VALIDATION_STRINGENCY LENIENT

# insert size stats
java -jar picard.jar CollectInsertSizeMetrics /
-I ${Dedup_bam} /
-O ${sample}_deduped_is_metrics.txt /
-H ${sample}_deduped_is_metrics.pdf

# quality yield stats
java -jar picard.jar CollectQualityYieldMetrics /
-I ${Dedup_bam} /
-O ${sample}_deduped_QualityYield.txt

java -jar picard.jar CollectWgsMetrics
-I ${Dedup_bam} /
-O ${sample}_deduped_WgsMetricsAlgo.txt /
-R ${ref_dir}/${fasta} /
--VALIDATION_STRINGENCY LENIENT

>>>

runtime {
@@ -27,15 +61,15 @@ task deduped_Metrics {
}

output {
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary"
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics"
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics"
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions"
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts"
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt"
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt"
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt"
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt"
# File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary"
# File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics"
# File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics"
# File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions"
# File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts"
# File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt"
# File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt"
# File deduped_gc_summary = "${sample}_deduped_gc_summary.txt"
# File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt"
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt"
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt"
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt"

+ 10
- 5
tasks/mapping.wdl ファイルの表示

@@ -5,8 +5,6 @@ task mapping {
File fastq_1
File fastq_2

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String group
String sample
String project
@@ -19,9 +17,16 @@ task mapping {
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${user_define_name}_${project}_${sample}.sorted.bam -t $nt --sam2bam -i -
# export SENTIEON_LICENSE=${SENTIEON_LICENSE}
# nt=$(nproc)
# ${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${user_define_name}_${project}_${sample}.sorted.bam -t $nt --sam2bam -i -
bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $(nproc) -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} \
| samtools view -bS -@ $(nproc) - \
| samtools sort -@ $(nproc) -o ${user_define_name}_${project}_${sample}.sorted.bam -
samtools index -@ $(nproc) \
-o ${user_define_name}_${project}_${sample}.sorted.bam.bai \
${user_define_name}_${project}_${sample}.sorted.bam
>>>

runtime {

+ 17
- 11
tasks/sentieon.wdl ファイルの表示

@@ -5,30 +5,36 @@ task sentieon {
File is_metrics

String sample = basename(quality_yield,"_deduped_QualityYield.txt")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -o pipefail
set -e

cat ${quality_yield} | sed -n '2,2p' > quality_yield.header
cat ${quality_yield} | sed -n '3,3p' > ${sample}.quality_yield
# cat ${quality_yield} | sed -n '2,2p' > quality_yield.header
# cat ${quality_yield} | sed -n '3,3p' > ${sample}.quality_yield
cat ${quality_yield} | sed -n '7,7p' > quality_yield.header
cat ${quality_yield} | sed -n '8,8p' > ${sample}.quality_yield

cat ${wgs_metrics_algo} | sed -n '2,2p' > wgs_metrics_algo.header
cat ${wgs_metrics_algo} | sed -n '3,3p' > ${sample}.wgs_metrics_algo
# cat ${wgs_metrics_algo} | sed -n '2,2p' > wgs_metrics_algo.header
# cat ${wgs_metrics_algo} | sed -n '3,3p' > ${sample}.wgs_metrics_algo
cat ${wgs_metrics_algo} | sed -n '7,7p' > wgs_metrics_algo.header
cat ${wgs_metrics_algo} | sed -n '8,8p' > ${sample}.wgs_metrics_algo

cat ${aln_metrics} | sed -n '2,2p' > aln_metrics.header
cat ${aln_metrics} | sed -n '5,5p' > ${sample}.aln_metrics
# cat ${aln_metrics} | sed -n '2,2p' > aln_metrics.header
# cat ${aln_metrics} | sed -n '5,5p' > ${sample}.aln_metrics
cat ${aln_metrics} | sed -n '7,7p' > aln_metrics.header
cat ${aln_metrics} | sed -n '10,10p' > ${sample}.aln_metrics

cat ${is_metrics} | sed -n '2,2p' > is_metrics.header
cat ${is_metrics} | sed -n '3,3p' > ${sample}.is_metrics
# cat ${is_metrics} | sed -n '2,2p' > is_metrics.header
# cat ${is_metrics} | sed -n '3,3p' > ${sample}.is_metrics
cat ${is_metrics} | sed -n '7,7p' > is_metrics.header
cat ${is_metrics} | sed -n '8,8p' > ${sample}.is_metrics

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"

+ 569
- 593
workflow.wdl
ファイル差分が大きすぎるため省略します
ファイルの表示


読み込み中…
キャンセル
保存