Browse Source

jaccard index

master
LUYAO REN 5 years ago
parent
commit
936275205c
4 changed files with 68 additions and 13 deletions
  1. +8
    -0
      inputs
  2. +12
    -8
      tasks/jaccard_index.wdl
  3. +26
    -0
      tasks/mergeJI.wdl
  4. +22
    -5
      workflow.wdl

+ 8
- 0
inputs View File

"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.fastqc.disk_size": "150", "{{ project_name }}.fastqc.disk_size": "150",
"{{ project_name }}.benchmark.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc", "{{ project_name }}.benchmark.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.mergeJI.disk_size": "100",
"{{ project_name }}.fastqscreen.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc", "{{ project_name }}.fastqscreen.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.fastqc.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc", "{{ project_name }}.fastqc.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.inputJIpiarsFile": "{{ inputJIpiarsFile }}",
"{{ project_name }}.benchmark.disk_size": "150", "{{ project_name }}.benchmark.disk_size": "150",
"{{ project_name }}.vcfstat.disk_size": "100", "{{ project_name }}.vcfstat.disk_size": "100",
"{{ project_name }}.fastqc.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5", "{{ project_name }}.fastqc.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5",
"{{ project_name }}.mergeJI.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.benchmark.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", "{{ project_name }}.benchmark.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", "{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.mergeJI.cluster_config": "OnDemand ecs.sn1ne.xlarge img-ubuntu-vpc",
"{{ project_name }}.fastqscreen.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0", "{{ project_name }}.fastqscreen.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
"{{ project_name }}.mergeNum.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01", "{{ project_name }}.mergeNum.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01",
"{{ project_name }}.JI.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/", "{{ project_name }}.screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
"{{ project_name }}.mergeNum.disk_size": "100", "{{ project_name }}.mergeNum.disk_size": "100",
"{{ project_name }}.JI.disk_size": "100",
"{{ project_name }}.fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf", "{{ project_name }}.fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
"{{ project_name }}.multiqc.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc", "{{ project_name }}.multiqc.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.sdf": "oss://chinese-quartet/quartet-storage-data/reference_data/GRCh38.d1.vd1.sdf/",
"{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8", "{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
"{{ project_name }}.mergeNum.cluster_config": "OnDemand ecs.sn1ne.xlarge img-ubuntu-vpc", "{{ project_name }}.mergeNum.cluster_config": "OnDemand ecs.sn1ne.xlarge img-ubuntu-vpc",
"{{ project_name }}.bamqc.cluster_config": "OnDemand ecs.sn1ne.8xlarge img-ubuntu-vpc", "{{ project_name }}.bamqc.cluster_config": "OnDemand ecs.sn1ne.8xlarge img-ubuntu-vpc",
"{{ project_name }}.vcfstat.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc", "{{ project_name }}.vcfstat.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.JI.cluster_config": "OnDemand ecs.sn1ne.4xlarge img-ubuntu-vpc",
"{{ project_name }}.fastqscreen.disk_size": "100", "{{ project_name }}.fastqscreen.disk_size": "100",
"{{ project_name }}.bamqc.disk_size": "500", "{{ project_name }}.bamqc.disk_size": "500",
"{{ project_name }}.multiqc.disk_size": "100", "{{ project_name }}.multiqc.disk_size": "100",

+ 12
- 8
tasks/jaccard_index.wdl View File

File vcf_b File vcf_b
File dir_name File dir_name
File sdf File sdf
String name_a = basename(vcf_a,".vcf")
String name_b = basename(vcf_b,".vcf")
String docker String docker
String cluster_config String cluster_config
String disk_size String disk_size
command <<< command <<<
touch number
cat ${filelist}| while read a b c d
do
rtg vcfeval -b $a -c $b -o $c -t ${sdf}
cat $c/summary.txt | sed -n '4,4p' | sed 's/\s\+/\t/g'| cut -f4-6 >> number
done
paste ${filelist} number > result.txt
set -o pipefail
set -e

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${vcf_a} -c > ${name_a}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${vcf_b}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${vcf_a} -c > ${name_b}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${vcf_b}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b ${vcf_a} -c ${vcf_b} -o ${dir_name} -t ${sdf}

>>> >>>


runtime { runtime {
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
} }
output { output {
File summary = "result.txt"
File JI_summary = "${dir_name}/summary.txt"
} }
} }

+ 26
- 0
tasks/mergeJI.wdl View File

task mergeJI {
Array[File] JI_summary
File inputJIpiarsFile
String docker
String cluster_config
String disk_size
command <<<
touch number
for i in ${sep=" " JI_summary}
do
cat $i | sed -n '4,4p' | sed 's/\s\+/\t/g'| cut -f4-6 >> number
done
paste ${inputJIpiarsFile} number > result.txt
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File JI_all = "result.txt"
}
}

+ 22
- 5
workflow.wdl View File

import "./tasks/multiqc.wdl" as multiqc import "./tasks/multiqc.wdl" as multiqc
import "./tasks/vcfstat.wdl" as vcfstat import "./tasks/vcfstat.wdl" as vcfstat
import "./tasks/mergeNum.wdl" as mergeNum import "./tasks/mergeNum.wdl" as mergeNum
#import "./tasks/jaccard_index.wdl" as JI
import "./tasks/jaccard_index.wdl" as JI
import "./tasks/mergeJI.wdl" as mergeJI




workflow {{ project_name }} { workflow {{ project_name }} {


File inputSamplesFile File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
# File inputJIpiarsFile
# Array[Array[File]] inputJIpairs = read_tsv(inputJIpiarsFile)
File inputJIpiarsFile
Array[Array[File]] inputJIpairs = read_tsv(inputJIpiarsFile)
File screen_ref_dir File screen_ref_dir
File fastq_screen_conf File fastq_screen_conf
File benchmarking_dir File benchmarking_dir
File ref_dir File ref_dir
String fasta String fasta
File sdf


scatter (sample in inputSamples){ scatter (sample in inputSamples){
call fastqc.fastqc as fastqc { call fastqc.fastqc as fastqc {
input: input:
rtg_vcf=benchmark.rtg_vcf, rtg_vcf=benchmark.rtg_vcf,
rtg_vcf_index=benchmark.rtg_vcf_index rtg_vcf_index=benchmark.rtg_vcf_index
}

}


} }
call multiqc.multiqc as multiqc { call multiqc.multiqc as multiqc {
vcfnumber=vcfstat.vcfnumber vcfnumber=vcfstat.vcfnumber
} }


scatter (pair in inputJIpairs) {
call JI.JI as JI {
input:
vcf_a=pair[0],
vcf_b=pair[1],
dir_name=pair[2],
sdf=sdf
}
}

call mergeJI.mergeJI as mergeJI {
input:
JI_summary=JI.JI_summary,
inputJIpiarsFile=inputJIpiarsFile
}

} }



Loading…
Cancel
Save