4 年之前 · 9bee569816
--- a/codescripts/annotate_vcf.py
+++ b/codescripts/annotate_vcf.py
@@ -0,0 +1,24 @@
 import pandas as pd
 import sys, argparse, os
 from operator import itemgetter   

 parser = argparse.ArgumentParser(description="this script is to annotate high confidence calls")

 parser.add_argument('-info', '--info', type=str, help='The infomation file',  required=True)
 parser.add_argument('-vcf', '--vcf', type=str, help='The vcf file',  required=True)
 parser.add_argument('-prefix', '--prefix', type=str, help='The outputname',  required=True)


 args = parser.parse_args()

 # Rename input:
 info = args.info
 vcf = args.vcf
 prefix = args.prefix

 info = pd.read_table(info,header=None)
 vcf = pd.read_table(vcf,header=None)
 merged_df = pd.merge(vcf, info,  how='inner', left_on=[0,1], right_on = [0,1])

 filename = prefix + '.annotated.txt'
 merged_df.to_csv(filename,header=None,index=None,sep="\t")
--- a/inputs
+++ b/inputs
@@ -2,13 +2,14 @@
  "{{ project_name }}.LCL5_vcf": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.voted.mendelian.vcf.gz",
  "{{ project_name }}.LCL8_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.consensus.merged.bed",
  "{{ project_name }}.LCL6_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.variants.bed",
  "{{ project_name }}.vcf_info": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/quartet.all.site.info.txt",
  "{{ project_name }}.LCL6_vcf_idx": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.voted.mendelian.vcf.gz.tbi",
  "{{ project_name }}.LCL5_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.variants.bed",
  "{{ project_name }}.LCL8_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.27.homo_ref.consensus.bed",
  "{{ project_name }}.disk_size": "150",
  "{{ project_name }}.LCL7_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.variants.bed",
  "{{ project_name }}.LCL7_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.consensus.merged.bed",
  "{{ project_name }}.filter_vcf.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
  "{{ project_name }}.filter_vcf.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:v1.1",
  "{{ project_name }}.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1",
  "{{ project_name }}.LCL5_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.27.homo_ref.consensus.bed",
  "{{ project_name }}.LCL6_vcf": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.voted.mendelian.vcf.gz",
@@ -25,4 +26,5 @@
  "{{ project_name }}.LCL7_vcf_idx": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.voted.mendelian.vcf.gz.tbi",
  "{{ project_name }}.LCL6_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.27.homo_ref.consensus.bed",
  "{{ project_name }}.bed_10X": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/linked.10x.26559.removed.vcf.bed"
 }
 }

--- a/tasks/filter_vcf.wdl
+++ b/tasks/filter_vcf.wdl
@@ -9,6 +9,7 @@ task filter_vcf {
 	File LCL6_vcf_idx
 	File LCL7_vcf_idx
 	File LCL8_vcf_idx
 	File vcf_info
 	String docker
 	String disk_size
 	String cluster_config
@@ -24,6 +25,28 @@ task filter_vcf {

 		/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${LCL8_vcf} --include-bed=${benchmark_region} -o LCL8.high.confidence.calls.vcf.gz

 		zcat LCL5.high.confidence.calls.vcf.gz | grep '#' > LCL5.header
 		zcat LCL6.high.confidence.calls.vcf.gz | grep '#' > LCL6.header
 		zcat LCL7.high.confidence.calls.vcf.gz | grep '#' > LCL7.header
 		zcat LCL8.high.confidence.calls.vcf.gz | grep '#' > LCL8.header

 		gunzip LCL5.high.confidence.calls.vcf.gz
 		gunzip LCL6.high.confidence.calls.vcf.gz
 		gunzip LCL7.high.confidence.calls.vcf.gz
 		gunzip LCL8.high.confidence.calls.vcf.gz

 		python /opt/annotate_vcf.py -info ${vcf_info} -vcf LCL5.high.confidence.calls.vcf -prefix LCL5
 		python /opt/annotate_vcf.py -info ${vcf_info} -vcf LCL6.high.confidence.calls.vcf -prefix LCL6
 		python /opt/annotate_vcf.py -info ${vcf_info} -vcf LCL7.high.confidence.calls.vcf -prefix LCL7
 		python /opt/annotate_vcf.py -info ${vcf_info} -vcf LCL8.high.confidence.calls.vcf -prefix LCL8


 		cat LCL5.annotated.txt | awk '{print $1"\t"$2"\t.\t"$4"\t"$5"\t.\t.\tVOTE="$13"\tGT:ALT:DP\t"$10":"$18":"$17}' | cat LCL5.header - > LCL5.high.confidence.calls.annotated.vcf
 		cat LCL6.annotated.txt | awk '{print $1"\t"$2"\t.\t"$4"\t"$5"\t.\t.\tVOTE="$14"\tGT:ALT:DP\t"$10":"$20":"$19}' | cat LCL6.header - > LCL6.high.confidence.calls.annotated.vcf
 		cat LCL7.annotated.txt | awk '{print $1"\t"$2"\t.\t"$4"\t"$5"\t.\t.\tVOTE="$15"\tGT:ALT:DP\t"$10":"$22":"$21}' | cat LCL7.header - > LCL7.high.confidence.calls.annotated.vcf
 		cat LCL8.annotated.txt | awk '{print $1"\t"$2"\t.\t"$4"\t"$5"\t.\t.\tVOTE="$16"\tGT:ALT:DP\t"$10":"$24":"$23}' | cat LCL8.header - > LCL8.high.confidence.calls.annotated.vcf


 		>>>

 		runtime {
@@ -34,14 +57,15 @@ task filter_vcf {
 	}

 	output {
 		File LCL5_filtered_vcf = "LCL5.high.confidence.calls.vcf.gz"
 		File LCL5_filtered_vcf_idx = "LCL5.high.confidence.calls.vcf.gz.tbi"
 		File LCL6_filtered_vcf = "LCL6.high.confidence.calls.vcf.gz"
 		File LCL6_filtered_vcf_idx = "LCL6.high.confidence.calls.vcf.gz.tbi"
 		File LCL7_filtered_vcf = "LCL7.high.confidence.calls.vcf.gz"
 		File LCL7_filtered_vcf_idx = "LCL7.high.confidence.calls.vcf.gz.tbi"
 		File LCL8_filtered_vcf = "LCL8.high.confidence.calls.vcf.gz"
 		File LCL8_filtered_vcf_idx = "LCL8.high.confidence.calls.vcf.gz.tbi"
 		File LCL5_filtered_vcf = "LCL5.high.confidence.calls.vcf"
 		File LCL6_filtered_vcf = "LCL6.high.confidence.calls.vcf"
 		File LCL7_filtered_vcf = "LCL7.high.confidence.calls.vcf"
 		File LCL8_filtered_vcf = "LCL8.high.confidence.calls.vcf"
 		File LCL5_annotated_vcf = "LCL5.high.confidence.calls.annotated.vcf"
 		File LCL6_annotated_vcf = "LCL6.high.confidence.calls.annotated.vcf"
 		File LCL7_annotated_vcf = "LCL7.high.confidence.calls.annotated.vcf"
 		File LCL8_annotated_vcf = "LCL8.high.confidence.calls.annotated.vcf"

 	}
 }

--- a/workflow.wdl
+++ b/workflow.wdl
@@ -4,8 +4,6 @@ import "./tasks/homo_bed.wdl" as homo_bed
 import "./tasks/variant_bed.wdl" as variant_bed
 import "./tasks/filter_vcf.wdl" as filter_vcf



 workflow {{ project_name }} {

 	File LCL5_callable_bed
@@ -30,6 +28,7 @@ workflow {{ project_name }} {
 	File LCL6_vcf_idx
 	File LCL7_vcf_idx
 	File LCL8_vcf_idx
 	File vcf_info
 	String docker
 	String disk_size
 	String cluster_config
@@ -82,6 +81,7 @@ workflow {{ project_name }} {
 	call filter_vcf.filter_vcf as filter_vcf {
 		input:
 		benchmark_region=final_merge.benchmark_region,
 		vcf_info=vcf_info,
 		LCL5_vcf=LCL5_vcf,
 		LCL6_vcf=LCL6_vcf,
 		LCL7_vcf=LCL7_vcf,