Browse Source

jaccard index

master
LUYAO REN 5 years ago
parent
commit
936275205c
4 changed files with 68 additions and 13 deletions
  1. +8
    -0
      inputs
  2. +12
    -8
      tasks/jaccard_index.wdl
  3. +26
    -0
      tasks/mergeJI.wdl
  4. +22
    -5
      workflow.wdl

+ 8
- 0
inputs View File

@@ -4,23 +4,31 @@
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.fastqc.disk_size": "150",
"{{ project_name }}.benchmark.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.mergeJI.disk_size": "100",
"{{ project_name }}.fastqscreen.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.fastqc.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.inputJIpiarsFile": "{{ inputJIpiarsFile }}",
"{{ project_name }}.benchmark.disk_size": "150",
"{{ project_name }}.vcfstat.disk_size": "100",
"{{ project_name }}.fastqc.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5",
"{{ project_name }}.mergeJI.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.benchmark.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.mergeJI.cluster_config": "OnDemand ecs.sn1ne.xlarge img-ubuntu-vpc",
"{{ project_name }}.fastqscreen.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
"{{ project_name }}.mergeNum.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01",
"{{ project_name }}.JI.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
"{{ project_name }}.mergeNum.disk_size": "100",
"{{ project_name }}.JI.disk_size": "100",
"{{ project_name }}.fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
"{{ project_name }}.multiqc.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.sdf": "oss://chinese-quartet/quartet-storage-data/reference_data/GRCh38.d1.vd1.sdf/",
"{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
"{{ project_name }}.mergeNum.cluster_config": "OnDemand ecs.sn1ne.xlarge img-ubuntu-vpc",
"{{ project_name }}.bamqc.cluster_config": "OnDemand ecs.sn1ne.8xlarge img-ubuntu-vpc",
"{{ project_name }}.vcfstat.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.JI.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.fastqscreen.disk_size": "100",
"{{ project_name }}.bamqc.disk_size": "500",
"{{ project_name }}.multiqc.disk_size": "100",

+ 12
- 8
tasks/jaccard_index.wdl View File

@@ -3,18 +3,22 @@ task JI {
File vcf_b
File dir_name
File sdf
String name_a = basename(vcf_a,".vcf")
String name_b = basename(vcf_b,".vcf")
String docker
String cluster_config
String disk_size
command <<<
touch number
cat ${filelist}| while read a b c d
do
rtg vcfeval -b $a -c $b -o $c -t ${sdf}
cat $c/summary.txt | sed -n '4,4p' | sed 's/\s\+/\t/g'| cut -f4-6 >> number
done
paste ${filelist} number > result.txt
set -o pipefail
set -e

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${vcf_a} -c > ${name_a}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${vcf_b}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${vcf_a} -c > ${name_b}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${vcf_b}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b ${vcf_a} -c ${vcf_b} -o ${dir_name} -t ${sdf}

>>>

runtime {
@@ -24,6 +28,6 @@ task JI {
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File summary = "result.txt"
File JI_summary = "${dir_name}/summary.txt"
}
}

+ 26
- 0
tasks/mergeJI.wdl View File

@@ -0,0 +1,26 @@
task mergeJI {
Array[File] JI_summary
File inputJIpiarsFile
String docker
String cluster_config
String disk_size
command <<<
touch number
for i in ${sep=" " JI_summary}
do
cat $i | sed -n '4,4p' | sed 's/\s\+/\t/g'| cut -f4-6 >> number
done
paste ${inputJIpiarsFile} number > result.txt
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File JI_all = "result.txt"
}
}

+ 22
- 5
workflow.wdl View File

@@ -5,20 +5,22 @@ import "./tasks/benchmark.wdl" as benchmark
import "./tasks/multiqc.wdl" as multiqc
import "./tasks/vcfstat.wdl" as vcfstat
import "./tasks/mergeNum.wdl" as mergeNum
#import "./tasks/jaccard_index.wdl" as JI
import "./tasks/jaccard_index.wdl" as JI
import "./tasks/mergeJI.wdl" as mergeJI


workflow {{ project_name }} {

File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
# File inputJIpiarsFile
# Array[Array[File]] inputJIpairs = read_tsv(inputJIpiarsFile)
File inputJIpiarsFile
Array[Array[File]] inputJIpairs = read_tsv(inputJIpiarsFile)
File screen_ref_dir
File fastq_screen_conf
File benchmarking_dir
File ref_dir
String fasta
File sdf

scatter (sample in inputSamples){
call fastqc.fastqc as fastqc {
@@ -54,8 +56,7 @@ workflow {{ project_name }} {
input:
rtg_vcf=benchmark.rtg_vcf,
rtg_vcf_index=benchmark.rtg_vcf_index
}

}

}
call multiqc.multiqc as multiqc {
@@ -73,5 +74,21 @@ workflow {{ project_name }} {
vcfnumber=vcfstat.vcfnumber
}

scatter (pair in inputJIpairs) {
call JI.JI as JI {
input:
vcf_a=pair[0],
vcf_b=pair[1],
dir_name=pair[2],
sdf=sdf
}
}

call mergeJI.mergeJI as mergeJI {
input:
JI_summary=JI.JI_summary,
inputJIpiarsFile=inputJIpiarsFile
}

}


Loading…
Cancel
Save