il y a 5 ans · 9821fd89a9
--- a/codescripts/merge_two_family_with_genotype.py
+++ b/codescripts/merge_two_family_with_genotype.py
@@ -0,0 +1,81 @@
 from __future__ import division
 import pandas as pd
 import sys, argparse, os
 import fileinput
 import re

 # input arguments
 parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information")

 parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info',  required=True)
 parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info',  required=True)
 parser.add_argument('-genotype', '--genotype', type=str, help='Genotype information of a set of four family members',  required=True)
 parser.add_argument('-family', '--family', type=str, help='family name',  required=True)


 args = parser.parse_args()
 lcl5 = args.LCL5
 lcl6 = args.LCL6
 genotype = args.genotype
 family = args.family


 # output file
 family_name = family + '.txt'

 family_file = open(family_name,'w')

 # input files
 lcl5_dat = pd.read_table(lcl5)
 lcl6_dat = pd.read_table(lcl6)
 genotype_dat = pd.read_table(genotype)
 merged_df = pd.merge(lcl5_dat, lcl6_dat,  how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])
 merged_genotype_df = pd.merge(merged_df, genotype_dat,  how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])

 merged_genotype_df_sub = merged_genotype_df.iloc[:,[0,1,22,23,24,25,26,27,7,17]]
 merged_genotype_df_sub.columns = ['CHROM', 'POS', 'REF', 'ALT','LCL5','LCL6','LCL7','LCL8', 'TRIO5', 'TRIO6']

 for row in merged_genotype_df_sub.itertuples():
 	# sister
 	if row.LCL5 == row.LCL6:
 		if row.LCL5 == './.':
 			mendelian = 'noInfo'
 			sister_count = "no"
 		elif row.LCL5 == '0/0':
 			mendelian = 'Ref'
 			sister_count = "no"
 		else:
 			mendelian = '1'
 			sister_count = "yes_same"

 	else:
 		mendelian = '0'
 		if (row.LCL5 == './.' or row.LCL5 == '0/0') and (row.LCL6 == './.' or row.LCL6 == '0/0'):
 			sister_count = "no"
 		else:
 			sister_count = "yes_diff"
 	# family trio5
 	if row.LCL5 == row. LCL7 == row.LCL8 == './.':
 		mendelian = mendelian + ':noInfo'
 	elif row.LCL5 == row. LCL7 == row.LCL8 == '0/0':
 		mendelian = mendelian + ':Ref'
 	elif pd.isnull(row.TRIO5) == True:
 		mendelian = mendelian + ':unVBT'
 	else:
 		mendelian = mendelian + ':' + row.TRIO5.split('=')[1]
 	# family trio6
 	if row.LCL6 == row. LCL7 == row.LCL8 == './.':
 		mendelian = mendelian + ':noInfo'
 	elif row.LCL6 == row. LCL7 == row.LCL8 == '0/0':
 		mendelian = mendelian + ':Ref'
 	elif pd.isnull(row.TRIO6) == True:
 		mendelian = mendelian + ':unVBT'
 	else:
 		mendelian =  mendelian + ':' + row.TRIO6.split('=')[1]
 	# not count into family
 	if (row.LCL5 == './.' or row.LCL5 == '0/0') and (row.LCL6 == './.' or row.LCL6 == '0/0') and (row.LCL7 == './.' or row.LCL7 == '0/0') and (row.LCL8 == './.' or row.LCL8 == '0/0'):
 		mendelian_count = "no"
 	else:
 		mendelian_count = "yes"
 	outline = row.CHROM + '\t' + str(row.POS) + '\t' + row.REF + '\t' + row.ALT + '\t' + row.LCL5 + '\t' + row.LCL6 + '\t' + row.LCL7 + '\t' + row.LCL8 + '\t' + str(row.TRIO5) + '\t' + str(row.TRIO6) + '\t' + str(mendelian) + '\t' + str(mendelian_count) + '\t' + str(sister_count) + '\n'
 	family_file.write(outline)
--- a/inputs
+++ b/inputs
@@ -1,8 +1,11 @@
 {
  "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
  "{{ project_name }}.two_family_merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4",
  "{{ project_name }}.family_name": "{{ family_name }}",
  "{{ project_name }}.sister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
  "{{ project_name }}.disk_size": "150",
  "{{ project_name }}.merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4",
  "{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
  "{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
  "{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc",
  "{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
 }
 }
--- a/tasks/VCFinfo.wdl
+++ b/tasks/VCFinfo.wdl
@@ -1,32 +0,0 @@
 task VCFinfo {
 	File repeat_annotated_vcf
 	String sample
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 		python /opt/variants_quality_location_intergration.py -vcf ${repeat_annotated_vcf} -prefix ${sample}

 		cat ${sample}_variant_quality_location.txt | grep '#CHROM' > header

 		for i in chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX
 		do
 			cat ${sample}_variant_quality_location.txt | grep -w $i | cat header - > ${sample}.$i.vcfInfo.txt
 		done


 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File extracted_info = "${sample}_variant_quality_location.txt"
 		Array[File] chromo_vcfInfo = glob("*.vcfInfo.txt")
 	}
 }
--- a/tasks/VCFrename.wdl
+++ b/tasks/VCFrename.wdl
@@ -1,31 +0,0 @@
 task VCFrename {
 	File trio_vcf_gz
 	File trio_vcf_idx
 	String mother_name
 	String father_name
 	String child_name
 	String family_name
 	String child
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<
 		echo "MOTHER ${mother_name}.${child}
 		FATHER ${father_name}.${child}
 		CHILD ${child_name}" > rename.txt

 		rtg vcfannotate -i ${trio_vcf_gz} -o ${family_name}.${child}.rename.vcf.gz --relabel=rename.txt
 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File rename_trio_vcf_gz = "${family_name}.${child}.rename.vcf.gz"
 		File rename_trio_vcf_idx = "${family_name}.${child}.rename.vcf.gz.tbi"
 	}
 }
--- a/tasks/bed_annotation.wdl
+++ b/tasks/bed_annotation.wdl
@@ -1,27 +0,0 @@
 task bed_annotation {
 	File merged_vcf_gz
 	File merged_vcf_idx
 	File repeat_bed
 	String sample
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 		rtg vcfannotate --bed-info=${repeat_bed} -i ${merged_vcf_gz} -o ${sample}.mendelian.merged.repeatAnno.vcf.gz

 		gunzip ${sample}.mendelian.merged.repeatAnno.vcf.gz

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File repeat_annotated_vcf = "${sample}.mendelian.merged.repeatAnno.vcf"
 	}
 }
--- a/tasks/final_result.wdl
+++ b/tasks/final_result.wdl
@@ -1,26 +0,0 @@
 task FinalResult {
 	File extracted_info
 	File annotated_txt
 	String prefix = basename(annotated_txt,".mendelian.txt")
 	String sample
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 		python /opt/FinalResult2VCF.py -vcfInfo ${extracted_info} -mendelianInfo ${annotated_txt} -prefix ${prefix} -sample ${sample}

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File benchmarking_calls = "${prefix}_benchmarking_calls.vcf"
 		File all_info = "${prefix}_all_sample_information.vcf"
 	}
 }
--- a/tasks/mendelian.wdl
+++ b/tasks/mendelian.wdl
@@ -1,34 +0,0 @@
 task mendelian {
 	File child_vcf
 	File LCL7_vcf
 	File LCL8_vcf
 	String LCL7_name
 	String LCL8_name
 	String child_name
 	File ref_dir
 	String fasta
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<
 		export LD_LIBRARY_PATH=/opt/htslib-1.9
 		nt=$(nproc)
 		mkdir VBT

 		/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${LCL8_vcf} -father ${LCL7_vcf} -child ${child_vcf} -outDir VBT -out-prefix ${child_name}.family --output-violation-regions -thread-count $nt

 		cat VBT/${child_name}.family_trio.vcf > ${child_name}.family.vcf
 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		Array[File] vbt_mendelian = glob("VBT/*")
 		File trio_vcf = "${child_name}.family.vcf"
 	}
 }
--- a/tasks/merge.wdl
+++ b/tasks/merge.wdl
@@ -1,16 +1,19 @@
 task merge {
 	Array[File] family_vcf_gz
 	Array[File] family_vcf_idx
 	String sample
 	Array[File] family_mendelian_info
 	String family_name
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 		rtg vcfmerge --force-merge-all -o ${sample}.merged.vcf.gz ${sep=" " family_vcf_gz}
 		cat ${sep=" " family_mendelian_info} | sort -k1,1 -k2,2n > ${family_name}.mendelian.txt

 		zcat ${sample}.merged.vcf.gz | grep -v '#' | cut -f1-2 | sed s'/\t/_/g' | sort | uniq -c | sed 's/\s\+/\t/g' | awk '{ if ($1 != 1) { print } }' | cut -f3 > ${sample}.vcf_dup.txt
 		cat ${family_name}.mendelian.txt | cut -f13 | sort | uniq -c > ${family_name}.sister.reproducibility.txt

 		cat ${family_name}.mendelian.txt | cut -f11 | sort | uniq -c | grep '1:1:1' > ${family_name}.mendelian.txt
 		cat ${family_name}.mendelian.txt | cut -f11 | sort | uniq -c | grep 'Ref:1:1' >> ${family_name}.mendelian.txt
 		cat ${family_name}.mendelian.txt | cut -f12 | sort | uniq -c | grep 'yes' >> ${family_name}.mendelian.txt

 	>>>

@@ -21,8 +24,8 @@ task merge {
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File merged_vcf_gz = "${sample}.merged.vcf.gz"
 		File merged_vcf_idx = "${sample}.merged.vcf.gz.tbi"
 		File vcf_dup = "${sample}.vcf_dup.txt"
 		File family_all_info = "${family_name}.mendelian.txt"
 		File sister_consistency = "${family_name}.sister.reproducibility.txt"
 		File family_mendelian = "${family_name}.mendelian.txt"
 	}
 }
--- a/tasks/mergeSister.wdl
+++ b/tasks/mergeSister.wdl
@@ -1,34 +0,0 @@
 task mergeSister {
 	File LCL5_trio_vcf_gz
 	File LCL5_trio_vcf_idx
 	File LCL6_trio_vcf_gz
 	File LCL6_trio_vcf_idx
 	String family_name
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<
 		rtg vcfmerge -o LCL5.LCL6.merged.vcf.gz ${LCL5_trio_vcf_gz} ${LCL6_trio_vcf_gz}

 		rtg vcfmerge -o LCL6.LCL5.merged.vcf.gz ${LCL6_trio_vcf_gz} ${LCL5_trio_vcf_gz}

 		zcat LCL5.LCL6.merged.vcf.gz | grep '##' > header
 		zcat LCL5.LCL6.merged.vcf.gz | grep -v '##' | cut -f8 > LCL5.mendelian
 		zcat LCL6.LCL5.merged.vcf.gz | grep -v '##' | paste - LCL5.mendelian > body

 		cat header body > ${family_name}.trio.info.vcf
 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}

 	output {
 		File family_mendelian_info = "${family_name}.trio.info.vcf"
 	}

 }
--- a/tasks/mergeVCFInfo.wdl
+++ b/tasks/mergeVCFInfo.wdl
@@ -1,25 +0,0 @@
 task mergeVCFInfo {
 	Array[File] vcf_gz
 	Array[File] vcf_idx
 	String sample
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 		rtg vcfmerge --force-merge-all -o ${sample}.merged.info.vcf.gz ${sep=" " vcf_gz}
 
 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File merged_vcf = "${sample}.merged.info.vcf.gz"
 		File merged_vcf_idx = "${sample}.merged.info.vcf.gz.tbi"
 	}
 }
--- a/tasks/merge_info.wdl
+++ b/tasks/merge_info.wdl
@@ -1,24 +0,0 @@
 task merge_info {
 	File vcfInfo
 	File mendelianInfo
 	String sample
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 		python /opt/merge_mendelian_vcfinfo.py -vcfInfo ${vcfInfo} -mendelianInfo ${mendelianInfo} -sample ${sample}

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File all_info = "${sample}_mendelian_vcfInfo.vcf"
 	}
 }
--- a/tasks/oneClass.wdl
+++ b/tasks/oneClass.wdl
@@ -1,39 +0,0 @@
 task oneClass {
 	File snv_train_vcf
 	File snv_test_vcf
 	File indel_train_vcf
 	File indel_test_vcf
 	String sampleName = basename(snv_train_vcf,".normed.snv.train.txt")
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 	python /opt/oneClass.py -train ${snv_train_vcf} -test ${snv_test_vcf} -name ${sampleName}_snv

 	python /opt/oneClass.py -train ${indel_train_vcf} -test ${indel_test_vcf} -name ${sampleName}_indel	

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}

 	output {
 		File snv_true_txt = "${sampleName}_snv_predicted_true.txt"
 		File snv_false_txt = "${sampleName}_snv_predicted_false.txt"
 		File snv_true_bed = "${sampleName}_snv_predicted_true.bed"
 		File snv_false_bed = "${sampleName}_snv_predicted_false.bed"
 		File snv_padding = "${sampleName}_snv_padding.bed"
 		File indel_true_txt = "${sampleName}_indel_predicted_true.txt"
 		File indel_false_txt = "${sampleName}_indel_predicted_false.txt"
 		File indel_true_bed = "${sampleName}_indel_predicted_true.bed"
 		File indel_false_bed = "${sampleName}_indel_predicted_false.bed"
 		File indel_padding = "${sampleName}_indel_padding.bed"
 	}
 }

--- a/tasks/reformVCF.wdl
+++ b/tasks/reformVCF.wdl
@@ -1,38 +0,0 @@
 task reformVCF {
 	File family_mendelian_info
 	File family_name
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 	python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name}

 	cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL5.txt
 	cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL6.txt
 	cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL7.txt
 	cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL8.txt

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}

 	output {
 		File LCL5_family_info = "${family_name}.LCL5.vcf"
 		File LCL6_family_info = "${family_name}.LCL6.vcf"
 		File LCL7_family_info = "${family_name}.LCL7.vcf"
 		File LCL8_family_info = "${family_name}.LCL8.vcf"
 		File family_info = "${family_name}.vcf"
 		File LCL5_family_info_txt = "${family_name}.LCL5.txt"
 		File LCL6_family_info_txt = "${family_name}.LCL6.txt"
 		File LCL7_family_info_txt = "${family_name}.LCL7.txt"
 		File LCL8_family_info_txt = "${family_name}.LCL8.txt"
 	}
 }

--- a/tasks/sister.wdl
+++ b/tasks/sister.wdl
@@ -9,7 +9,7 @@ task sister {
 	String LCL7_name
 	String LCL8_name
 	String fasta
 	String family_name
 	String family_chromo_name
 	String docker
 	String cluster_config
 	String disk_size
@@ -21,21 +21,25 @@ task sister {
 		cat ${LCL7_vcf} | grep -v '##' | cut -f10 > F7
 		cat ${LCL8_vcf} | grep -v '##' | cut -f10 > M8
 		cat ${LCL5_vcf} | grep -v '##' | paste - D6 F7 M8 > body
 		cat ${LCL5_vcf} | grep '##' | cat - body > ${family_name}.vcf
 		cat ${LCL5_vcf} | grep '##' | cat - body > ${family_chromo_name}.vcf
 	# prepare ped file, D5
 		echo "${family_name}	${LCL8_name}	0	0	2	-9
 		${family_name}	${LCL7_name}	0	0	1	-9
 		${family_name}	${LCL5_name}	${LCL7_name}	${LCL8_name}	2	-9" > ${family_name}.D5.ped
 		echo "${family_chromo_name}	${LCL8_name}	0	0	2	-9
 		${family_chromo_name}	${LCL7_name}	0	0	1	-9
 		${family_chromo_name}	${LCL5_name}	${LCL7_name}	${LCL8_name}	2	-9" > ${family_chromo_name}.D5.ped

 		mkdir VBT_D5
 		/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_name}.vcf -father ${family_name}.vcf -child ${family_name}.vcf -pedigree ${family_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_name}.D5 --output-violation-regions -thread-count $nt
 		/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_chromo_name}.vcf -father ${family_chromo_name}.vcf -child ${family_chromo_name}.vcf -pedigree ${family_chromo_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_chromo_name}.D5 --output-violation-regions -thread-count $nt

 		cat VBT_D5/${family_chromo_name}.D5_trio.vcf > ${family_chromo_name}.D5.vcf
 	# prepare ped file, D6
 		echo "${family_name}	${LCL8_name}	0	0	2	-9
 		${family_name}	${LCL7_name}	0	0	1	-9
 		${family_name}	${LCL6_name}	${LCL7_name}	${LCL8_name}	2	-9" > ${family_name}.D6.ped
 		echo "${family_chromo_name}	${LCL8_name}	0	0	2	-9
 		${family_chromo_name}	${LCL7_name}	0	0	1	-9
 		${family_chromo_name}	${LCL6_name}	${LCL7_name}	${LCL8_name}	2	-9" > ${family_chromo_name}.D6.ped

 		mkdir VBT_D6
 		/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_name}.vcf -father ${family_name}.vcf -child ${family_name}.vcf -pedigree ${family_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_name}.D6 --output-violation-regions -thread-count $nt
 		/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_chromo_name}.vcf -father ${family_chromo_name}.vcf -child ${family_chromo_name}.vcf -pedigree ${family_chromo_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_chromo_name}.D6 --output-violation-regions -thread-count $nt

 		cat VBT_D6/${family_chromo_name}.D6_trio.vcf > ${family_chromo_name}.D6.vcf
 	>>>

 	runtime {
@@ -47,6 +51,8 @@ task sister {
 	output {
 		Array[File] D5_mendelian = glob("VBT_D5/*")
 		Array[File] D6_mendelian = glob("VBT_D6/*")
 		File family_vcf = "${family_name}.vcf"
 		File D5_trio_vcf = "${family_chromo_name}.D5.vcf"
 		File D6_trio_vcf = "${family_chromo_name}.D6.vcf"
 		File family_vcf = "${family_chromo_name}.vcf"
 	}
 }
--- a/tasks/two_family_merge.wdl
+++ b/tasks/two_family_merge.wdl
@@ -1,15 +1,25 @@
 task two_family_merge {
 	File LCL5_trio_vcf
 	File LCL6_trio_vcf
 	String family_name
 	File genotype_file
 	String family_chromo_name
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<
 		cat ${LCL5_trio_vcf} | grep -v '##' > ${family_name}.LCL5.txt
 		cat ${LCL6_trio_vcf} | grep -v '##' > ${family_name}.LCL6.txt
 		python /opt/merge_two_family.py -LCL5 ${family_name}.LCL5.txt -LCL6 ${family_name}.LCL6.txt -family ${family_name}
 		cat ${LCL5_trio_vcf} | grep -v '##' > ${family_chromo_name}.LCL5.txt
 		cat ${LCL6_trio_vcf} | grep -v '##' > ${family_chromo_name}.LCL6.txt
 		cat ${genotype_file} | grep -v '##' | awk '
 			BEGIN { OFS = "\t" }
 			NF > 2 && FNR > 1 { 
 				for ( i=9; i<=NF; i++ ) { 
 					split($i,a,":") ;$i = a[1];
 				} 
 			} 
 			{ print }
 		' | cut -f1,2,4,5,10- > ${family_chromo_name}.genotype.txt
 		python /opt/merge_two_family_with_genotype.py -LCL5 ${family_chromo_name}.LCL5.txt -LCL6 ${family_chromo_name}.LCL6.txt -genotype ${family_chromo_name}.genotype.txt -family ${family_chromo_name}
 	>>>

 	runtime {
@@ -20,7 +30,7 @@ task two_family_merge {
 	}

 	output {
 		File family_mendelian_info = "${family_name}.txt"
 		File family_mendelian_info = "${family_chromo_name}.txt"
 	}

 }
--- a/tasks/variantsNorm.wdl
+++ b/tasks/variantsNorm.wdl
@@ -1,33 +0,0 @@
 task variantsNorm {
 	File vcf
 	File ref_dir
 	String fasta
 	String sampleName
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 		cat ${vcf} | grep '#' > header
 		cat ${vcf} | grep -v '#' > body
 		cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
 		cat header body.filtered > ${sampleName}.filtered.vcf

 		/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf

 		cat ${sampleName}.normed.vcf | grep -v '##' > ${sampleName}.normed.txt

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File normed_vcf = "${sampleName}.normed.vcf"
 		File normed_txt = "${sampleName}.normed.txt"
 	}
 }
--- a/tasks/votes.wdl
+++ b/tasks/votes.wdl
@@ -1,41 +0,0 @@
 task votes {
 	Array[File] family_mendelian_info
 	File vcf 
 	String chromo
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<
 		mkdir temp
 		for i in ${sep=" " family_mendelian_info}
 		do
 			cp $i temp
 		done

 		cat ${vcf} | grep -v '##' > vcf_info.txt

 		python /opt/voted_by_vcfinfo_mendelianinfo.py -folder ./temp -vcf vcf_info.txt

 		cp LCL5_voted.vcf LCL5.${chromo}.voted.vcf
 		cp LCL6_voted.vcf LCL6.${chromo}.voted.vcf
 		cp LCL7_voted.vcf LCL7.${chromo}.voted.vcf
 		cp LCL8_voted.vcf LCL8.${chromo}.voted.vcf

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File LCL5_voted_vcf = "LCL5.${chromo}.voted.vcf"
 		File LCL6_voted_vcf = "LCL6.${chromo}.voted.vcf"
 		File LCL7_voted_vcf = "LCL7.${chromo}.voted.vcf"
 		File LCL8_voted_vcf = "LCL8.${chromo}.voted.vcf"
 		File all_sample_info = "all_sample_information.txt"
 	}
 }

--- a/tasks/zipIndex.wdl
+++ b/tasks/zipIndex.wdl
@@ -1,24 +0,0 @@
 task zipIndex {
 	File vcf
 	String vcf_name = basename(vcf,".vcf")
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<
 		rtg bgzip ${vcf} -c > ${vcf_name}.vcf.gz
 		rtg index -f vcf ${vcf_name}.vcf.gz

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File vcf_gz = "${vcf_name}.vcf.gz"
 		File vcf_idx = "${vcf_name}.vcf.gz.tbi"
 	}
 }
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -1,10 +1,12 @@
 import "./tasks/sister.wdl" as sister
 import "./tasks/two_family_merge.wdl" as two_family_merge
 import "./tasks/merge.wdl" as merge

 workflow {{ project_name }} {
 	File inputSamplesFile
 	Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
 	File ref_dir
 	String docker
 	String family_name
 	String fasta
 	String cluster_config
 	String disk_size
@@ -22,10 +24,25 @@ workflow {{ project_name }} {
 			LCL7_name=quartet[6],
 			LCL8_name=quartet[7],
 			fasta=fasta,
 			family_name=quartet[8],
 			docker=docker,
 			family_chromo_name=quartet[8],
 			cluster_config=cluster_config,
 			disk_size=disk_size
 		}
 		call two_family_merge.two_family_merge as two_family_merge {
 			input:
 			LCL5_trio_vcf=sister.D5_trio_vcf,
 			LCL6_trio_vcf=sister.D6_trio_vcf,
 			genotype_file=sister.family_vcf,
 			family_chromo_name=quartet[8],
 			cluster_config=cluster_config,
 			disk_size=disk_size
 		}
 	}
 	call merge.merge as merge {
 		input:
 		family_mendelian_info=two_family_merge.family_mendelian_info,
 		family_name=family_name,
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 }