import pandas as pd | |||||
import sys, argparse, os | |||||
from operator import itemgetter | |||||
parser = argparse.ArgumentParser(description="this script is to annotate high confidence calls") | |||||
parser.add_argument('-info', '--info', type=str, help='The infomation file', required=True) | |||||
parser.add_argument('-vcf', '--vcf', type=str, help='The vcf file', required=True) | |||||
parser.add_argument('-prefix', '--prefix', type=str, help='The outputname', required=True) | |||||
args = parser.parse_args() | |||||
# Rename input: | |||||
info = args.info | |||||
vcf = args.vcf | |||||
prefix = args.prefix | |||||
info = pd.read_table(info,header=None) | |||||
vcf = pd.read_table(vcf,header=None) | |||||
merged_df = pd.merge(vcf, info, how='inner', left_on=[0,1], right_on = [0,1]) | |||||
filename = prefix + '.annotated.txt' | |||||
merged_df.to_csv(filename,header=None,index=None,sep="\t") |
"{{ project_name }}.LCL5_vcf": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.voted.mendelian.vcf.gz", | "{{ project_name }}.LCL5_vcf": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.voted.mendelian.vcf.gz", | ||||
"{{ project_name }}.LCL8_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.consensus.merged.bed", | "{{ project_name }}.LCL8_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.consensus.merged.bed", | ||||
"{{ project_name }}.LCL6_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.variants.bed", | "{{ project_name }}.LCL6_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.variants.bed", | ||||
"{{ project_name }}.vcf_info": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/quartet.all.site.info.txt", | |||||
"{{ project_name }}.LCL6_vcf_idx": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.voted.mendelian.vcf.gz.tbi", | "{{ project_name }}.LCL6_vcf_idx": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.voted.mendelian.vcf.gz.tbi", | ||||
"{{ project_name }}.LCL5_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.variants.bed", | "{{ project_name }}.LCL5_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.variants.bed", | ||||
"{{ project_name }}.LCL8_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.27.homo_ref.consensus.bed", | "{{ project_name }}.LCL8_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.27.homo_ref.consensus.bed", | ||||
"{{ project_name }}.disk_size": "150", | "{{ project_name }}.disk_size": "150", | ||||
"{{ project_name }}.LCL7_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.variants.bed", | "{{ project_name }}.LCL7_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.variants.bed", | ||||
"{{ project_name }}.LCL7_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.consensus.merged.bed", | "{{ project_name }}.LCL7_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.consensus.merged.bed", | ||||
"{{ project_name }}.filter_vcf.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||||
"{{ project_name }}.filter_vcf.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:v1.1", | |||||
"{{ project_name }}.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1", | "{{ project_name }}.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1", | ||||
"{{ project_name }}.LCL5_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.27.homo_ref.consensus.bed", | "{{ project_name }}.LCL5_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.27.homo_ref.consensus.bed", | ||||
"{{ project_name }}.LCL6_vcf": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.voted.mendelian.vcf.gz", | "{{ project_name }}.LCL6_vcf": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.voted.mendelian.vcf.gz", | ||||
"{{ project_name }}.LCL7_vcf_idx": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.voted.mendelian.vcf.gz.tbi", | "{{ project_name }}.LCL7_vcf_idx": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.voted.mendelian.vcf.gz.tbi", | ||||
"{{ project_name }}.LCL6_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.27.homo_ref.consensus.bed", | "{{ project_name }}.LCL6_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.27.homo_ref.consensus.bed", | ||||
"{{ project_name }}.bed_10X": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/linked.10x.26559.removed.vcf.bed" | "{{ project_name }}.bed_10X": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/linked.10x.26559.removed.vcf.bed" | ||||
} | |||||
} | |||||
File LCL6_vcf_idx | File LCL6_vcf_idx | ||||
File LCL7_vcf_idx | File LCL7_vcf_idx | ||||
File LCL8_vcf_idx | File LCL8_vcf_idx | ||||
File vcf_info | |||||
String docker | String docker | ||||
String disk_size | String disk_size | ||||
String cluster_config | String cluster_config | ||||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${LCL8_vcf} --include-bed=${benchmark_region} -o LCL8.high.confidence.calls.vcf.gz | /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${LCL8_vcf} --include-bed=${benchmark_region} -o LCL8.high.confidence.calls.vcf.gz | ||||
zcat LCL5.high.confidence.calls.vcf.gz | grep '#' > LCL5.header | |||||
zcat LCL6.high.confidence.calls.vcf.gz | grep '#' > LCL6.header | |||||
zcat LCL7.high.confidence.calls.vcf.gz | grep '#' > LCL7.header | |||||
zcat LCL8.high.confidence.calls.vcf.gz | grep '#' > LCL8.header | |||||
gunzip LCL5.high.confidence.calls.vcf.gz | |||||
gunzip LCL6.high.confidence.calls.vcf.gz | |||||
gunzip LCL7.high.confidence.calls.vcf.gz | |||||
gunzip LCL8.high.confidence.calls.vcf.gz | |||||
python /opt/annotate_vcf.py -info ${vcf_info} -vcf LCL5.high.confidence.calls.vcf -prefix LCL5 | |||||
python /opt/annotate_vcf.py -info ${vcf_info} -vcf LCL6.high.confidence.calls.vcf -prefix LCL6 | |||||
python /opt/annotate_vcf.py -info ${vcf_info} -vcf LCL7.high.confidence.calls.vcf -prefix LCL7 | |||||
python /opt/annotate_vcf.py -info ${vcf_info} -vcf LCL8.high.confidence.calls.vcf -prefix LCL8 | |||||
cat LCL5.annotated.txt | awk '{print $1"\t"$2"\t.\t"$4"\t"$5"\t.\t.\tVOTE="$13"\tGT:ALT:DP\t"$10":"$18":"$17}' | cat LCL5.header - > LCL5.high.confidence.calls.annotated.vcf | |||||
cat LCL6.annotated.txt | awk '{print $1"\t"$2"\t.\t"$4"\t"$5"\t.\t.\tVOTE="$14"\tGT:ALT:DP\t"$10":"$20":"$19}' | cat LCL6.header - > LCL6.high.confidence.calls.annotated.vcf | |||||
cat LCL7.annotated.txt | awk '{print $1"\t"$2"\t.\t"$4"\t"$5"\t.\t.\tVOTE="$15"\tGT:ALT:DP\t"$10":"$22":"$21}' | cat LCL7.header - > LCL7.high.confidence.calls.annotated.vcf | |||||
cat LCL8.annotated.txt | awk '{print $1"\t"$2"\t.\t"$4"\t"$5"\t.\t.\tVOTE="$16"\tGT:ALT:DP\t"$10":"$24":"$23}' | cat LCL8.header - > LCL8.high.confidence.calls.annotated.vcf | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
} | } | ||||
output { | output { | ||||
File LCL5_filtered_vcf = "LCL5.high.confidence.calls.vcf.gz" | |||||
File LCL5_filtered_vcf_idx = "LCL5.high.confidence.calls.vcf.gz.tbi" | |||||
File LCL6_filtered_vcf = "LCL6.high.confidence.calls.vcf.gz" | |||||
File LCL6_filtered_vcf_idx = "LCL6.high.confidence.calls.vcf.gz.tbi" | |||||
File LCL7_filtered_vcf = "LCL7.high.confidence.calls.vcf.gz" | |||||
File LCL7_filtered_vcf_idx = "LCL7.high.confidence.calls.vcf.gz.tbi" | |||||
File LCL8_filtered_vcf = "LCL8.high.confidence.calls.vcf.gz" | |||||
File LCL8_filtered_vcf_idx = "LCL8.high.confidence.calls.vcf.gz.tbi" | |||||
File LCL5_filtered_vcf = "LCL5.high.confidence.calls.vcf" | |||||
File LCL6_filtered_vcf = "LCL6.high.confidence.calls.vcf" | |||||
File LCL7_filtered_vcf = "LCL7.high.confidence.calls.vcf" | |||||
File LCL8_filtered_vcf = "LCL8.high.confidence.calls.vcf" | |||||
File LCL5_annotated_vcf = "LCL5.high.confidence.calls.annotated.vcf" | |||||
File LCL6_annotated_vcf = "LCL6.high.confidence.calls.annotated.vcf" | |||||
File LCL7_annotated_vcf = "LCL7.high.confidence.calls.annotated.vcf" | |||||
File LCL8_annotated_vcf = "LCL8.high.confidence.calls.annotated.vcf" | |||||
} | } | ||||
} | } | ||||
import "./tasks/variant_bed.wdl" as variant_bed | import "./tasks/variant_bed.wdl" as variant_bed | ||||
import "./tasks/filter_vcf.wdl" as filter_vcf | import "./tasks/filter_vcf.wdl" as filter_vcf | ||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
File LCL5_callable_bed | File LCL5_callable_bed | ||||
File LCL6_vcf_idx | File LCL6_vcf_idx | ||||
File LCL7_vcf_idx | File LCL7_vcf_idx | ||||
File LCL8_vcf_idx | File LCL8_vcf_idx | ||||
File vcf_info | |||||
String docker | String docker | ||||
String disk_size | String disk_size | ||||
String cluster_config | String cluster_config | ||||
call filter_vcf.filter_vcf as filter_vcf { | call filter_vcf.filter_vcf as filter_vcf { | ||||
input: | input: | ||||
benchmark_region=final_merge.benchmark_region, | benchmark_region=final_merge.benchmark_region, | ||||
vcf_info=vcf_info, | |||||
LCL5_vcf=LCL5_vcf, | LCL5_vcf=LCL5_vcf, | ||||
LCL6_vcf=LCL6_vcf, | LCL6_vcf=LCL6_vcf, | ||||
LCL7_vcf=LCL7_vcf, | LCL7_vcf=LCL7_vcf, |