cat chr*gt | cut -f1-9,10,41,74,107 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_SEQ2000_BGI_1_20180518.vcf | |||||
cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$11"\t"$52"\t"$85"\t"$12}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_SEQ2000_BGI_2_20180518.vcf | |||||
cat chr*gt | cut -f1-9,21,30,63,96 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_SEQ2000_BGI_3_20180518.vcf | |||||
cat chr*gt | cut -f1-9,88,91,94,98 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_T7_WGE_1_20191105.vcf | |||||
cat chr*gt | cut -f1-9,89,92,95,99 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_T7_WGE_2_20191105.vcf | |||||
cat chr*gt | cut -f1-9,90,93,97,100 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_T7_WGE_3_20191105.vcf | |||||
cat chr*gt | cut -f1-9,61,68,75,81 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_1_20181108.vcf | |||||
cat chr*gt | cut -f1-9,62,69,76,82 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_2_20181108.vcf | |||||
cat chr*gt | cut -f1-9,64,70,77,83 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_3_20181108.vcf | |||||
cat chr*gt | cut -f1-9,65,71,78,84 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_4_20181108.vcf | |||||
cat chr*gt | cut -f1-9,66,72,79,86 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_5_20181108.vcf | |||||
cat chr*gt | cut -f1-9,67,73,80,87 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_6_20181108.vcf | |||||
cat chr*gt | cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$114"\t"$117"\t"$15"\t"$18}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_BRG_1_20180930.vcf | |||||
cat chr*gt | cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$115"\t"$13"\t"$16"\t"$19}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_BRG_2_20180930.vcf | |||||
cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$116"\t"$14"\t"$17"\t"$20}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_BRG_3_20180930.vcf | |||||
cat chr*gt | cut -f1-9,101,102,103,104 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_WUX_1_20190917.vcf | |||||
cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$109"\t"$106"\t"$108"\t"$105}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_WUX_2_20190917.vcf | |||||
cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$113"\t"$112"\t"$111"\t"$110}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_WUX_3_20190917.vcf | |||||
cat chr*gt | cut -f1-9,22,25,28,32 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_ARD_1_20170403.vcf | |||||
cat chr*gt | cut -f1-9,23,26,29,33 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_ARD_2_20170403.vcf | |||||
cat chr*gt | cut -f1-9,24,27,31,34 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_ARD_3_20170403.vcf | |||||
cat chr*gt | cut -f1-9,35,38,42,45 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_NVG_1_20170329.vcf | |||||
cat chr*gt | cut -f1-9,36,39,43,46 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_NVG_2_20170329.vcf | |||||
cat chr*gt | cut -f1-9,37,40,44,47 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_NVG_3_20170329.vcf | |||||
cat chr*gt | cut -f1-9,48,51,55,58 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_WUX_1_20170216.vcf | |||||
cat chr*gt | cut -f1-9,49,53,56,59 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_WUX_2_20170216.vcf | |||||
cat chr*gt | cut -f1-9,50,54,57,60 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_WUX_3_20170216.vcf |
import pandas as pd | import pandas as pd | ||||
import sys, argparse, os | import sys, argparse, os | ||||
men = pd.read_table(sys.argv[1],header=None) | |||||
men = pd.read_table(sys.argv[1],header=None,low_memory=False) | |||||
vote = pd.read_table(sys.argv[2],low_memory=False) | vote = pd.read_table(sys.argv[2],low_memory=False) | ||||
mut = pd.read_table(sys.argv[3],header=None) | |||||
men[1]=men[1].astype(str) | |||||
merged_df = pd.merge(vote, men, how='inner', left_on=['CHROM','POS'], right_on = [0,1]) | merged_df = pd.merge(vote, men, how='inner', left_on=['CHROM','POS'], right_on = [0,1]) | ||||
lcl5_dat = merged_df[(merged_df[5]!='./.') & (merged_df[5]!='0/0')] | |||||
merged_df['mendelian_check'] = 'MIE' | merged_df['mendelian_check'] = 'MIE' | ||||
merged_df.loc[merged_df[2]=='1:1:1','mendelian_check'] = 'MP' | merged_df.loc[merged_df[2]=='1:1:1','mendelian_check'] = 'MP' | ||||
sub = merged_df[['CHROM','POS','LCL5_detected_num','mendelian_check',2]] | sub = merged_df[['CHROM','POS','LCL5_detected_num','mendelian_check',2]] |
{ | { | ||||
"{{ project_name }}.disk_size": "100", | |||||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||||
"{{ project_name }}.sample_name": "{{ sample_name }}", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.mut_file": "oss://pgx-result/renluyao/manuscript/mutation_type" | |||||
} | |||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||||
"{{ project_name }}.family_vcf": "{{ family_vcf }}", | |||||
"{{ project_name }}.disk_size": "500", | |||||
"{{ project_name }}.SMALLcluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.BIGcluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||||
"{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||||
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||||
} |
task mendelian { | |||||
File family_vcf | |||||
File ref_dir | |||||
String family_name = basename(family_vcf,".vcf") | |||||
String fasta | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
export LD_LIBRARY_PATH=/opt/htslib-1.9 | |||||
nt=$(nproc) | |||||
echo -e "${family_name}\tLCL8\t0\t0\t2\t-9\n${family_name}\tLCL7\t0\t0\t1\t-9\n${family_name}\tLCL5\tLCL7\tLCL8\t2\t-9" > ${family_name}.D5.ped | |||||
mkdir VBT_D5 | |||||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${family_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_name}.D5 --output-violation-regions -thread-count $nt | |||||
cat VBT_D5/${family_name}.D5_trio.vcf > ${family_name}.D5.vcf | |||||
echo -e "${family_name}\tLCL8\t0\t0\t2\t-9\n${family_name}\tLCL7\t0\t0\t1\t-9\n${family_name}\tLCL6\tLCL7\tLCL8\t2\t-9" > ${family_name}.D6.ped | |||||
mkdir VBT_D6 | |||||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${family_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_name}.D6 --output-violation-regions -thread-count $nt | |||||
cat VBT_D6/${family_name}.D6_trio.vcf > ${family_name}.D6.vcf | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File D5_ped = "${family_name}.D5.ped" | |||||
File D6_ped = "${family_name}.D6.ped" | |||||
Array[File] D5_mendelian = glob("VBT_D5/*") | |||||
Array[File] D6_mendelian = glob("VBT_D6/*") | |||||
File D5_trio_vcf = "${family_name}.D5.vcf" | |||||
File D6_trio_vcf = "${family_name}.D6.vcf" | |||||
} | |||||
} | |||||
task merge_chromo { | |||||
Array[File] mendelian_vote_snv | |||||
Array[File] mendelian_vote_indel | |||||
String sample_name | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
cat ${sep=" " mendelian_vote_snv} > ${sample_name}.snv.txt | |||||
cat ${sep=" " mendelian_vote_indel} > ${sample_name}.indel.txt | |||||
cat ${sample_name}.snv.txt | cut -f3,4 | sort | uniq -c > ${sample_name}.snv.summary.txt | |||||
cat ${sample_name}.indel.txt | cut -f3,4 | sort | uniq -c > ${sample_name}.indel.summary.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File mendelian_vote_summary_snv = "${sample_name}.snv.summary.txt" | |||||
File mendelian_vote_summary_indel = "${sample_name}.indel.summary.txt" | |||||
File mendelian_vote_summary_snv_detail = "${sample_name}.snv.txt" | |||||
File mendelian_vote_summary_indel_detail = "${sample_name}.indel.txt" | |||||
} | |||||
} |
task merge_mendelian { | |||||
File D5_trio_vcf | |||||
File D6_trio_vcf | |||||
File family_vcf | |||||
String family_name = basename(family_vcf,".family.vcf") | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
cat ${D5_trio_vcf} | grep -v '##' > ${family_name}.D5.txt | |||||
cat ${D6_trio_vcf} | grep -v '##' > ${family_name}.D6.txt | |||||
cat ${family_vcf} | grep -v '##' | awk ' | |||||
BEGIN { OFS = "\t" } | |||||
NF > 2 && FNR > 1 { | |||||
for ( i=9; i<=NF; i++ ) { | |||||
split($i,a,":") ;$i = a[1]; | |||||
} | |||||
} | |||||
{ print } | |||||
' > ${family_name}.consensus.txt | |||||
python /opt/merge_two_family_with_genotype.py -LCL5 ${family_name}.D5.txt -LCL6 ${family_name}.D6.txt -genotype ${family_name}.consensus.txt -family ${family_name} | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File project_mendelian = "${family_name}.txt" | |||||
File project_mendelian_summary = "${family_name}.summary.txt" | |||||
} | |||||
} |
task merge_mendelian_vote { | |||||
File vote_file | |||||
File mendelian_file | |||||
File mut_file | |||||
String output_prefix | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
python /opt/merge_mendelian_vote.py ${mendelian_file} ${vote_file} ${mut_file} ${output_prefix}.mendelian.vote.txt | |||||
cat ${output_prefix}.mendelian.vote.txt | awk '{ if ((length($6) == 1) && (length($7) == 1)) { print } }' > ${output_prefix}.snv | |||||
cat ${output_prefix}.mendelian.vote.txt | awk '{ if ((length($6) > 1) || (length($7) > 1)) { print } }' > ${output_prefix}.indel | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File mendelian_vote = "${output_prefix}.mendelian.vote.txt" | |||||
File mendelian_vote_snv = "${output_prefix}.snv" | |||||
File mendelian_vote_indel = "${output_prefix}.indel" | |||||
} | |||||
} |
import "./tasks/merge_mendelian_vote.wdl" as merge_mendelian_vote | |||||
import "./tasks/merge_chromo.wdl" as merge_chromo | |||||
import "./tasks/mendelian.wdl" as mendelian | |||||
import "./tasks/merge_mendelian.wdl" as merge_mendelian | |||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
File inputSamplesFile | |||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||||
File mut_file | |||||
String docker | |||||
String sample_name | |||||
String cluster_config | |||||
File family_vcf | |||||
File ref_dir | |||||
String fasta | |||||
String MENDELIANdocker | |||||
String DIYdocker | |||||
String BIGcluster_config | |||||
String SMALLcluster_config | |||||
String disk_size | String disk_size | ||||
scatter (sample in inputSamples){ | |||||
call merge_mendelian_vote.merge_mendelian_vote as merge_mendelian_vote { | |||||
input: | |||||
vote_file=sample[0], | |||||
mendelian_file=sample[1], | |||||
output_prefix=sample[2], | |||||
mut_file=mut_file, | |||||
docker=docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call mendelian.mendelian as mendelian { | |||||
input: | |||||
family_vcf=family_vcf, | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
docker=MENDELIANdocker, | |||||
cluster_config=BIGcluster_config, | |||||
disk_size=disk_size | |||||
} | } | ||||
call merge_chromo.merge_chromo as merge_chromo { | |||||
call merge_mendelian.merge_mendelian as merge_mendelian { | |||||
input: | input: | ||||
mendelian_vote_snv=merge_mendelian_vote.mendelian_vote_snv, | |||||
mendelian_vote_indel=merge_mendelian_vote.mendelian_vote_indel, | |||||
sample_name=sample_name, | |||||
docker=docker, | |||||
cluster_config=cluster_config, | |||||
D5_trio_vcf=mendelian.D5_trio_vcf, | |||||
D6_trio_vcf=mendelian.D6_trio_vcf, | |||||
family_vcf=family_vcf, | |||||
docker=DIYdocker, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
} | } |