@@ -0,0 +1,35 @@ | |||
cat chr*gt | cut -f1-9,10,41,74,107 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_SEQ2000_BGI_1_20180518.vcf | |||
cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$11"\t"$52"\t"$85"\t"$12}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_SEQ2000_BGI_2_20180518.vcf | |||
cat chr*gt | cut -f1-9,21,30,63,96 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_SEQ2000_BGI_3_20180518.vcf | |||
cat chr*gt | cut -f1-9,88,91,94,98 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_T7_WGE_1_20191105.vcf | |||
cat chr*gt | cut -f1-9,89,92,95,99 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_T7_WGE_2_20191105.vcf | |||
cat chr*gt | cut -f1-9,90,93,97,100 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_BGI_T7_WGE_3_20191105.vcf | |||
cat chr*gt | cut -f1-9,61,68,75,81 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_1_20181108.vcf | |||
cat chr*gt | cut -f1-9,62,69,76,82 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_2_20181108.vcf | |||
cat chr*gt | cut -f1-9,64,70,77,83 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_3_20181108.vcf | |||
cat chr*gt | cut -f1-9,65,71,78,84 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_4_20181108.vcf | |||
cat chr*gt | cut -f1-9,66,72,79,86 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_5_20181108.vcf | |||
cat chr*gt | cut -f1-9,67,73,80,87 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_ARD_6_20181108.vcf | |||
cat chr*gt | cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$114"\t"$117"\t"$15"\t"$18}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_BRG_1_20180930.vcf | |||
cat chr*gt | cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$115"\t"$13"\t"$16"\t"$19}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_BRG_2_20180930.vcf | |||
cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$116"\t"$14"\t"$17"\t"$20}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_BRG_3_20180930.vcf | |||
cat chr*gt | cut -f1-9,101,102,103,104 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_WUX_1_20190917.vcf | |||
cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$109"\t"$106"\t"$108"\t"$105}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_WUX_2_20190917.vcf | |||
cat chr*gt | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$113"\t"$112"\t"$111"\t"$110}' | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_Nova_WUX_3_20190917.vcf | |||
cat chr*gt | cut -f1-9,22,25,28,32 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_ARD_1_20170403.vcf | |||
cat chr*gt | cut -f1-9,23,26,29,33 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_ARD_2_20170403.vcf | |||
cat chr*gt | cut -f1-9,24,27,31,34 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_ARD_3_20170403.vcf | |||
cat chr*gt | cut -f1-9,35,38,42,45 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_NVG_1_20170329.vcf | |||
cat chr*gt | cut -f1-9,36,39,43,46 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_NVG_2_20170329.vcf | |||
cat chr*gt | cut -f1-9,37,40,44,47 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_NVG_3_20170329.vcf | |||
cat chr*gt | cut -f1-9,48,51,55,58 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_WUX_1_20170216.vcf | |||
cat chr*gt | cut -f1-9,49,53,56,59 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_WUX_2_20170216.vcf | |||
cat chr*gt | cut -f1-9,50,54,57,60 | grep -v '#CHROM' | sort -k1,1 -k2,2n | cat header - > Quartet_DNA_ILM_XTen_WUX_3_20170216.vcf |
@@ -1,11 +1,12 @@ | |||
import pandas as pd | |||
import sys, argparse, os | |||
men = pd.read_table(sys.argv[1],header=None) | |||
men = pd.read_table(sys.argv[1],header=None,low_memory=False) | |||
vote = pd.read_table(sys.argv[2],low_memory=False) | |||
mut = pd.read_table(sys.argv[3],header=None) | |||
men[1]=men[1].astype(str) | |||
merged_df = pd.merge(vote, men, how='inner', left_on=['CHROM','POS'], right_on = [0,1]) | |||
lcl5_dat = merged_df[(merged_df[5]!='./.') & (merged_df[5]!='0/0')] | |||
merged_df['mendelian_check'] = 'MIE' | |||
merged_df.loc[merged_df[2]=='1:1:1','mendelian_check'] = 'MP' | |||
sub = merged_df[['CHROM','POS','LCL5_detected_num','mendelian_check',2]] |
@@ -1,8 +1,10 @@ | |||
{ | |||
"{{ project_name }}.disk_size": "100", | |||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||
"{{ project_name }}.sample_name": "{{ sample_name }}", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.mut_file": "oss://pgx-result/renluyao/manuscript/mutation_type" | |||
} | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.family_vcf": "{{ family_vcf }}", | |||
"{{ project_name }}.disk_size": "500", | |||
"{{ project_name }}.SMALLcluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.BIGcluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||
} |
@@ -0,0 +1,46 @@ | |||
task mendelian { | |||
File family_vcf | |||
File ref_dir | |||
String family_name = basename(family_vcf,".vcf") | |||
String fasta | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
export LD_LIBRARY_PATH=/opt/htslib-1.9 | |||
nt=$(nproc) | |||
echo -e "${family_name}\tLCL8\t0\t0\t2\t-9\n${family_name}\tLCL7\t0\t0\t1\t-9\n${family_name}\tLCL5\tLCL7\tLCL8\t2\t-9" > ${family_name}.D5.ped | |||
mkdir VBT_D5 | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${family_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_name}.D5 --output-violation-regions -thread-count $nt | |||
cat VBT_D5/${family_name}.D5_trio.vcf > ${family_name}.D5.vcf | |||
echo -e "${family_name}\tLCL8\t0\t0\t2\t-9\n${family_name}\tLCL7\t0\t0\t1\t-9\n${family_name}\tLCL6\tLCL7\tLCL8\t2\t-9" > ${family_name}.D6.ped | |||
mkdir VBT_D6 | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${family_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_name}.D6 --output-violation-regions -thread-count $nt | |||
cat VBT_D6/${family_name}.D6_trio.vcf > ${family_name}.D6.vcf | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File D5_ped = "${family_name}.D5.ped" | |||
File D6_ped = "${family_name}.D6.ped" | |||
Array[File] D5_mendelian = glob("VBT_D5/*") | |||
Array[File] D6_mendelian = glob("VBT_D6/*") | |||
File D5_trio_vcf = "${family_name}.D5.vcf" | |||
File D6_trio_vcf = "${family_name}.D6.vcf" | |||
} | |||
} | |||
@@ -1,33 +0,0 @@ | |||
task merge_chromo { | |||
Array[File] mendelian_vote_snv | |||
Array[File] mendelian_vote_indel | |||
String sample_name | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
cat ${sep=" " mendelian_vote_snv} > ${sample_name}.snv.txt | |||
cat ${sep=" " mendelian_vote_indel} > ${sample_name}.indel.txt | |||
cat ${sample_name}.snv.txt | cut -f3,4 | sort | uniq -c > ${sample_name}.snv.summary.txt | |||
cat ${sample_name}.indel.txt | cut -f3,4 | sort | uniq -c > ${sample_name}.indel.summary.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File mendelian_vote_summary_snv = "${sample_name}.snv.summary.txt" | |||
File mendelian_vote_summary_indel = "${sample_name}.indel.summary.txt" | |||
File mendelian_vote_summary_snv_detail = "${sample_name}.snv.txt" | |||
File mendelian_vote_summary_indel_detail = "${sample_name}.indel.txt" | |||
} | |||
} |
@@ -0,0 +1,35 @@ | |||
task merge_mendelian { | |||
File D5_trio_vcf | |||
File D6_trio_vcf | |||
File family_vcf | |||
String family_name = basename(family_vcf,".family.vcf") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
cat ${D5_trio_vcf} | grep -v '##' > ${family_name}.D5.txt | |||
cat ${D6_trio_vcf} | grep -v '##' > ${family_name}.D6.txt | |||
cat ${family_vcf} | grep -v '##' | awk ' | |||
BEGIN { OFS = "\t" } | |||
NF > 2 && FNR > 1 { | |||
for ( i=9; i<=NF; i++ ) { | |||
split($i,a,":") ;$i = a[1]; | |||
} | |||
} | |||
{ print } | |||
' > ${family_name}.consensus.txt | |||
python /opt/merge_two_family_with_genotype.py -LCL5 ${family_name}.D5.txt -LCL6 ${family_name}.D6.txt -genotype ${family_name}.consensus.txt -family ${family_name} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File project_mendelian = "${family_name}.txt" | |||
File project_mendelian_summary = "${family_name}.summary.txt" | |||
} | |||
} |
@@ -1,31 +0,0 @@ | |||
task merge_mendelian_vote { | |||
File vote_file | |||
File mendelian_file | |||
File mut_file | |||
String output_prefix | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/merge_mendelian_vote.py ${mendelian_file} ${vote_file} ${mut_file} ${output_prefix}.mendelian.vote.txt | |||
cat ${output_prefix}.mendelian.vote.txt | awk '{ if ((length($6) == 1) && (length($7) == 1)) { print } }' > ${output_prefix}.snv | |||
cat ${output_prefix}.mendelian.vote.txt | awk '{ if ((length($6) > 1) || (length($7) > 1)) { print } }' > ${output_prefix}.indel | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File mendelian_vote = "${output_prefix}.mendelian.vote.txt" | |||
File mendelian_vote_snv = "${output_prefix}.snv" | |||
File mendelian_vote_indel = "${output_prefix}.indel" | |||
} | |||
} |
@@ -1,34 +1,34 @@ | |||
import "./tasks/merge_mendelian_vote.wdl" as merge_mendelian_vote | |||
import "./tasks/merge_chromo.wdl" as merge_chromo | |||
import "./tasks/mendelian.wdl" as mendelian | |||
import "./tasks/merge_mendelian.wdl" as merge_mendelian | |||
workflow {{ project_name }} { | |||
File inputSamplesFile | |||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||
File mut_file | |||
String docker | |||
String sample_name | |||
String cluster_config | |||
File family_vcf | |||
File ref_dir | |||
String fasta | |||
String MENDELIANdocker | |||
String DIYdocker | |||
String BIGcluster_config | |||
String SMALLcluster_config | |||
String disk_size | |||
scatter (sample in inputSamples){ | |||
call merge_mendelian_vote.merge_mendelian_vote as merge_mendelian_vote { | |||
input: | |||
vote_file=sample[0], | |||
mendelian_file=sample[1], | |||
output_prefix=sample[2], | |||
mut_file=mut_file, | |||
docker=docker, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call mendelian.mendelian as mendelian { | |||
input: | |||
family_vcf=family_vcf, | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
docker=MENDELIANdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size | |||
} | |||
call merge_chromo.merge_chromo as merge_chromo { | |||
call merge_mendelian.merge_mendelian as merge_mendelian { | |||
input: | |||
mendelian_vote_snv=merge_mendelian_vote.mendelian_vote_snv, | |||
mendelian_vote_indel=merge_mendelian_vote.mendelian_vote_indel, | |||
sample_name=sample_name, | |||
docker=docker, | |||
cluster_config=cluster_config, | |||
D5_trio_vcf=mendelian.D5_trio_vcf, | |||
D6_trio_vcf=mendelian.D6_trio_vcf, | |||
family_vcf=family_vcf, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size | |||
} | |||
} |