Procházet zdrojové kódy

Add: replaceRG

master
YaqingLiu před 3 roky
rodič
revize
7e4d99c1a7
6 změnil soubory, kde provedl 214 přidání a 55 odebrání
  1. +3
    -2
      defaults
  2. +3
    -2
      inputs
  3. +3
    -3
      tasks/Haplotyper.wdl
  4. +76
    -0
      tasks/pindel.wdl
  5. +28
    -0
      tasks/replaceRG.wdl
  6. +101
    -48
      workflow.wdl

+ 3
- 2
defaults Zobrazit soubor

@@ -4,9 +4,8 @@
"normal_fastq_1": "",
"normal_fastq_2": "",
"tumor_deduped_bam": "",
"tumor_deduped_bam_index": "",
"normal_deduped_bam": "",
"normal_deduped_bam_index": "",
"pl": "ILLUMINAL",
"fasta": "GRCh38.d1.vd1.fa",
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/",
"dbsnp": "dbsnp_146.hg38.vcf",
@@ -23,6 +22,8 @@
"interval_padding": "0",
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v202010.02",
"varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3",
"pindel_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/pindel:0.2.5b9_3",
"samtools_docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2019.10",
"vep_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/vep:v104.0",
"cache": "oss://pgx-reference-data/ensembl_vep/",

+ 3
- 2
inputs Zobrazit soubor

@@ -5,14 +5,15 @@
"{{ project_name }}.normal_fastq_1": "{{ normal_fastq_1 }}",
"{{ project_name }}.normal_fastq_2": "{{ normal_fastq_2 }}",
"{{ project_name }}.tumor_deduped_bam": "{{ tumor_deduped_bam }}",
"{{ project_name }}.tumor_deduped_bam_index": "{{ tumor_deduped_bam_index }}",
"{{ project_name }}.normal_deduped_bam": "{{ normal_deduped_bam }}",
"{{ project_name }}.normal_deduped_bam_index": "{{ normal_deduped_bam_index }}",
"{{ project_name }}.SENTIEON_INSTALL_DIR": "{{ SENTIEON_INSTALL_DIR }}",
"{{ project_name }}.SENTIEON_LICENSE": "{{ SENTIEON_LICENSE }}",
"{{ project_name }}.sentieon_docker": "{{ sentieon_docker }}",
"{{ project_name }}.varscan_docker": "{{ varscan_docker }}",
"{{ project_name }}.pindel_docker": "{{ pindel_docker }}",
"{{ project_name }}.samtools_docker": "{{ samtools_docker }}",
"{{ project_name }}.annovar_docker": "{{ annovar_docker }}",
"{{ project_name }}.pl": "{{ pl }}",
"{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.ref_dir": "{{ ref_dir }}",
"{{ project_name }}.dbsnp": "{{ dbsnp }}",

+ 3
- 3
tasks/Haplotyper.wdl Zobrazit soubor

@@ -33,7 +33,7 @@ task Haplotyper {
--interval ${regions} -r ${ref_dir}/${fasta} \
-i ${recaled_bam} \
--algo Haplotyper -d ${dbsnp_dir}/${dbsnp} \
${sample}_hc.vcf
${sample}.Haplotyper.vcf
>>>
runtime {
@@ -44,8 +44,8 @@ task Haplotyper {
}
output {
File vcf = "${sample}_hc.vcf"
File vcf_idx = "${sample}_hc.vcf.idx"
File vcf = "${sample}.Haplotyper.vcf"
File vcf_idx = "${sample}.Haplotyper.vcf.idx"
}
}

+ 76
- 0
tasks/pindel.wdl Zobrazit soubor

@@ -0,0 +1,76 @@
task pindel {
String sample_id
File bam
File bam_index
File ref_dir
String fasta

String docker
String cluster
String disk_size

command <<<
set -o pipefail
set -e

mkdir ./pindel_result/
mkdir ./input

cp ${bam} ./input

bam_file_name=`echo ${bam}|awk -F "/" '{print $NF}'`
samtools index -@ 4 ./input/$bam_file_name
java "-Xmx16G" -jar /software/picard/picard.jar CollectInsertSizeMetrics \
-H ./pindel_result/${sample_id}_picard.pdf \
-I ./input/$bam_file_name \
-O ./pindel_result/${sample_id}_picard.txt

mean_insert_size=`cat pindel_result/${sample_id}_picard.txt|sed -n '8p'|cut -f 6|cut -d . -f 1`
bam_file_name=`echo ${bam}|awk -F "/" '{print $NF}'`

echo -e "./input/$bam_file_name\t$mean_insert_size\t${sample_id}" > ${sample_id}_config.txt

pindel -i ${sample_id}_config.txt \
-f $${ref_dir}/${fasta} \
-o ./pindel_result/${sample_id} \
-c all \
-T 4 \
-x 4 \
-l \
-B 0 \
-M 3 \
-J /software/picard/hg38_ucsc_centromere.bed

grep "ChrID" pindel_result/${sample_id}_SI > pindel_result/${sample_id}_all_indel
grep "ChrID" pindel_result/${sample_id}_D >> pindel_result/${sample_id}_all_indel

awk -v chrID="chr1" '$8==chrID {print}' pindel_result/${sample_id}_all_indel > pindel_result/${sample_id}_indel
for i in `seq 2 22` X Y M
do
awk -v chrID=chr$i '$8==chrID {print}' pindel_result/${sample_id}_all_indel >> pindel_result/${sample_id}_indel
done

pindel2vcf -r ${ref_dir}/${fasta} \
-R GRCh38.d1.vd1 \
-d GDC \
-p pindel_result/${sample_id}_indel \
-v pindel_result/${sample_id}.pindel.indel.vcf
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
Array[File] pindel_result = glob("./pindel_result/${sample_id}*")
}
}

+ 28
- 0
tasks/replaceRG.wdl Zobrazit soubor

@@ -0,0 +1,28 @@
task replaceRG {

File bam
String sample
String group
String pl
String bam_name = basename(bam, ".bam")
String docker
String cluster_config
String disk_size

command <<<
samtools addreplacerg -r "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -o ${bam_name}.bam ${bam}
samtools index ${bam_name}.bam ${bam_name}.bam.bai
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File bam = "${bam_name}.bam"
File bam_index = "${bam_name}.bam.bai"
}
}


+ 101
- 48
workflow.wdl Zobrazit soubor

@@ -9,6 +9,8 @@ import "./tasks/TNscope.wdl" as TNscope
import "./tasks/somatic.wdl" as somatic
import "./tasks/processSomatic.wdl" as processSomatic
import "./tasks/somaticFilter.wdl" as somaticFilter
import "./tasks/replaceRG.wdl" as replaceRG
import "./tasks/pindel.wdl" as pindel
import "./tasks/ANNOVAR.wdl" as ANNOVAR
import "./tasks/VEP.wdl" as VEP

@@ -21,9 +23,7 @@ workflow {{ project_name }} {
File? normal_fastq_1
File? normal_fastq_2
File? tumor_deduped_bam
File? tumor_deduped_bam_index
File? normal_deduped_bam
File? normal_deduped_bam_index

Boolean input_fastq
Boolean input_bam
@@ -32,9 +32,12 @@ workflow {{ project_name }} {
String SENTIEON_LICENSE
String sentieon_docker
String varscan_docker
String pindel_docker
String samtools_docker
String annovar_docker
String vep_docker

String pl
File ref_dir
String fasta
File dbmills_dir
@@ -72,13 +75,13 @@ workflow {{ project_name }} {
if (tumor_fastq_1!= "") {
call mapping.mapping as tumor_mapping {
input:
group=sample_id + '_tumor',
sample=sample_id + '_tumor',
group=sample_id + '.T',
sample=sample_id + '.T',
fastq_1=tumor_fastq_1,
fastq_2=tumor_fastq_2,
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
pl="ILLUMINAL",
pl=pl,
fasta=fasta,
ref_dir=ref_dir,
docker=sentieon_docker,
@@ -94,7 +97,7 @@ workflow {{ project_name }} {
ref_dir=ref_dir,
sorted_bam=tumor_mapping.sorted_bam,
sorted_bam_index=tumor_mapping.sorted_bam_index,
sample=sample_id + '_tumor',
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -106,7 +109,7 @@ workflow {{ project_name }} {
SENTIEON_LICENSE=SENTIEON_LICENSE,
sorted_bam=tumor_mapping.sorted_bam,
sorted_bam_index=tumor_mapping.sorted_bam_index,
sample=sample_id + '_tumor',
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -120,7 +123,7 @@ workflow {{ project_name }} {
ref_dir=ref_dir,
deduped_bam=tumor_Dedup.deduped_bam,
deduped_bam_index=tumor_Dedup.deduped_bam_index,
sample=sample_id + '_tumor',
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -138,7 +141,7 @@ workflow {{ project_name }} {
dbmills_dir=dbmills_dir,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
sample=sample_id + '_tumor',
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -148,13 +151,13 @@ workflow {{ project_name }} {
if (normal_fastq_1!= "") {
call mapping.mapping as normal_mapping {
input:
group=sample_id + '_normal',
sample=sample_id + '_normal',
group=sample_id + '.N',
sample=sample_id + '.N',
fastq_1=normal_fastq_1,
fastq_2=normal_fastq_2,
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
pl="ILLUMINAL",
pl=pl,
fasta=fasta,
ref_dir=ref_dir,
docker=sentieon_docker,
@@ -170,7 +173,7 @@ workflow {{ project_name }} {
ref_dir=ref_dir,
sorted_bam=normal_mapping.sorted_bam,
sorted_bam_index=normal_mapping.sorted_bam_index,
sample=sample_id + '_normal',
sample=sample_id + '.N',
regions=regions,
docker=sentieon_docker,
disk_size=disk_size,
@@ -183,7 +186,7 @@ workflow {{ project_name }} {
SENTIEON_LICENSE=SENTIEON_LICENSE,
sorted_bam=normal_mapping.sorted_bam,
sorted_bam_index=normal_mapping.sorted_bam_index,
sample=sample_id + '_normal',
sample=sample_id + '.N',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -197,7 +200,7 @@ workflow {{ project_name }} {
ref_dir=ref_dir,
deduped_bam=normal_Dedup.deduped_bam,
deduped_bam_index=normal_Dedup.deduped_bam_index,
sample=sample_id + '_normal',
sample=sample_id + '.N',
regions=regions,
docker=sentieon_docker,
disk_size=disk_size,
@@ -216,19 +219,19 @@ workflow {{ project_name }} {
dbmills_dir=dbmills_dir,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
sample=sample_id + '_normal',
sample=sample_id + '.N',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
}
if (haplotyper) {
call Haplotyper.Haplotyper as Haplotyper {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
sample=sample_id + '_normal',
sample=sample_id + '.N',
fasta=fasta,
ref_dir=ref_dir,
recaled_bam=normal_BQSR.recaled_bam,
@@ -241,7 +244,21 @@ workflow {{ project_name }} {
cluster_config=cluster_config
}
}
if (pindel) {
call pindel.pindel as pindel {
input:
sample_id=sample_id + '.N',
bam=normal_BQSR.recaled_bam,
bam_index=normal_BQSR.recaled_bam_index,
fasta=fasta,
ref_dir=ref_dir,
docker=pindel_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
}

if (tnseq) {
call TNseq.TNseq as TNseq {
input:
@@ -252,8 +269,8 @@ workflow {{ project_name }} {
normal_recaled_bam_index=normal_BQSR.recaled_bam_index,
tumor_recaled_bam=tumor_BQSR.recaled_bam,
tumor_recaled_bam_index=tumor_BQSR.recaled_bam_index,
normal_name=sample_id + "_normal",
tumor_name=sample_id + "_tumor",
normal_name=sample_id + ".N",
tumor_name=sample_id + ".T",
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
@@ -286,8 +303,8 @@ workflow {{ project_name }} {
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
tumor_id=sample_id + ".T",
normal_id=sample_id + ".N",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
@@ -311,8 +328,8 @@ workflow {{ project_name }} {
normal_recaled_bam_index=normal_BQSR.recaled_bam_index,
tumor_recaled_bam=tumor_BQSR.recaled_bam,
tumor_recaled_bam_index=tumor_BQSR.recaled_bam_index,
normal_name=sample_id + "_normal",
tumor_name=sample_id + "_tumor",
normal_name=sample_id + ".N",
tumor_name=sample_id + ".T",
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
@@ -345,8 +362,8 @@ workflow {{ project_name }} {
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
tumor_id=sample_id + ".T",
normal_id=sample_id + ".N",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
@@ -420,8 +437,8 @@ workflow {{ project_name }} {
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
tumor_id=sample_id + ".T",
normal_id=sample_id + ".N",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
@@ -435,22 +452,33 @@ workflow {{ project_name }} {
}
}
}
if (input_bam) {
if (tumor_deduped_bam != "") {
call replaceRG.replaceRG as tumor_replaceRG {
input:
bam=tumor_deduped_bam,
group=group_id + '.T',
sample=sample_id + '.T',
pl=pl,
docker=samtools_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call BQSR.BQSR as tumor_BQSR_fb {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
deduped_bam=tumor_deduped_bam,
deduped_bam_index=tumor_deduped_bam_index,
deduped_bam=tumor_replaceRG.bam,
deduped_bam_index=tumor_replaceRG.bam_index,
db_mills=db_mills,
dbmills_dir=dbmills_dir,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
sample=sample_id + '_tumor',
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -458,19 +486,30 @@ workflow {{ project_name }} {
}
if (normal_deduped_bam != "") {
call replaceRG.replaceRG as normal_replaceRG {
input:
bam=normal_deduped_bam,
group=group_id + '.N',
sample=sample_id + '.N',
pl=pl,
docker=samtools_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
call BQSR.BQSR as normal_BQSR_fb {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
deduped_bam=normal_deduped_bam,
deduped_bam_index=normal_deduped_bam_index,
deduped_bam=normal_replaceRG.bam,
deduped_bam_index=normal_replaceRG.bam_index,
db_mills=db_mills,
dbmills_dir=dbmills_dir,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
sample=sample_id + '_normal',
sample=sample_id + '.N',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -482,7 +521,7 @@ workflow {{ project_name }} {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
sample=sample_id + '_normal',
sample=sample_id + '.N',
fasta=fasta,
ref_dir=ref_dir,
recaled_bam=normal_BQSR_fb.recaled_bam,
@@ -495,7 +534,21 @@ workflow {{ project_name }} {
cluster_config=cluster_config
}
}

if (pindel) {
call pindel.pindel as pindel_fb {
input:
sample_id=sample_id + '.N',
bam=normal_BQSR_fb.recaled_bam,
bam_index=normal_BQSR_fb.recaled_bam_index,
fasta=fasta,
ref_dir=ref_dir,
docker=pindel_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
}

if (tnseq) {
call TNseq.TNseq as TNseq_fb {
input:
@@ -506,8 +559,8 @@ workflow {{ project_name }} {
normal_recaled_bam_index=normal_BQSR_fb.recaled_bam_index,
tumor_recaled_bam=tumor_BQSR_fb.recaled_bam,
tumor_recaled_bam_index=tumor_BQSR_fb.recaled_bam_index,
normal_name=sample_id + "_normal",
tumor_name=sample_id + "_tumor",
normal_name=sample_id + ".N",
tumor_name=sample_id + ".T",
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
@@ -540,8 +593,8 @@ workflow {{ project_name }} {
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
tumor_id=sample_id + ".T",
normal_id=sample_id + ".N",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
@@ -565,8 +618,8 @@ workflow {{ project_name }} {
normal_recaled_bam_index=normal_BQSR_fb.recaled_bam_index,
tumor_recaled_bam=tumor_BQSR_fb.recaled_bam,
tumor_recaled_bam_index=tumor_BQSR_fb.recaled_bam_index,
normal_name=sample_id + "_normal",
tumor_name=sample_id + "_tumor",
normal_name=sample_id + ".N",
tumor_name=sample_id + ".T",
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
@@ -599,8 +652,8 @@ workflow {{ project_name }} {
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
tumor_id=sample_id + ".T",
normal_id=sample_id + ".N",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
@@ -674,8 +727,8 @@ workflow {{ project_name }} {
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
tumor_id=sample_id + ".T",
normal_id=sample_id + ".N",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,

Načítá se…
Zrušit
Uložit