@@ -0,0 +1,81 @@ | |||
from __future__ import division | |||
import pandas as pd | |||
import sys, argparse, os | |||
import fileinput | |||
import re | |||
# input arguments | |||
parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information") | |||
parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info', required=True) | |||
parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info', required=True) | |||
parser.add_argument('-genotype', '--genotype', type=str, help='Genotype information of a set of four family members', required=True) | |||
parser.add_argument('-family', '--family', type=str, help='family name', required=True) | |||
args = parser.parse_args() | |||
lcl5 = args.LCL5 | |||
lcl6 = args.LCL6 | |||
genotype = args.genotype | |||
family = args.family | |||
# output file | |||
family_name = family + '.txt' | |||
family_file = open(family_name,'w') | |||
# input files | |||
lcl5_dat = pd.read_table(lcl5) | |||
lcl6_dat = pd.read_table(lcl6) | |||
genotype_dat = pd.read_table(genotype) | |||
merged_df = pd.merge(lcl5_dat, lcl6_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS']) | |||
merged_genotype_df = pd.merge(merged_df, genotype_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS']) | |||
merged_genotype_df_sub = merged_genotype_df.iloc[:,[0,1,22,23,24,25,26,27,7,17]] | |||
merged_genotype_df_sub.columns = ['CHROM', 'POS', 'REF', 'ALT','LCL5','LCL6','LCL7','LCL8', 'TRIO5', 'TRIO6'] | |||
for row in merged_genotype_df_sub.itertuples(): | |||
# sister | |||
if row.LCL5 == row.LCL6: | |||
if row.LCL5 == './.': | |||
mendelian = 'noInfo' | |||
sister_count = "no" | |||
elif row.LCL5 == '0/0': | |||
mendelian = 'Ref' | |||
sister_count = "no" | |||
else: | |||
mendelian = '1' | |||
sister_count = "yes_same" | |||
else: | |||
mendelian = '0' | |||
if (row.LCL5 == './.' or row.LCL5 == '0/0') and (row.LCL6 == './.' or row.LCL6 == '0/0'): | |||
sister_count = "no" | |||
else: | |||
sister_count = "yes_diff" | |||
# family trio5 | |||
if row.LCL5 == row. LCL7 == row.LCL8 == './.': | |||
mendelian = mendelian + ':noInfo' | |||
elif row.LCL5 == row. LCL7 == row.LCL8 == '0/0': | |||
mendelian = mendelian + ':Ref' | |||
elif pd.isnull(row.TRIO5) == True: | |||
mendelian = mendelian + ':unVBT' | |||
else: | |||
mendelian = mendelian + ':' + row.TRIO5.split('=')[1] | |||
# family trio6 | |||
if row.LCL6 == row. LCL7 == row.LCL8 == './.': | |||
mendelian = mendelian + ':noInfo' | |||
elif row.LCL6 == row. LCL7 == row.LCL8 == '0/0': | |||
mendelian = mendelian + ':Ref' | |||
elif pd.isnull(row.TRIO6) == True: | |||
mendelian = mendelian + ':unVBT' | |||
else: | |||
mendelian = mendelian + ':' + row.TRIO6.split('=')[1] | |||
# not count into family | |||
if (row.LCL5 == './.' or row.LCL5 == '0/0') and (row.LCL6 == './.' or row.LCL6 == '0/0') and (row.LCL7 == './.' or row.LCL7 == '0/0') and (row.LCL8 == './.' or row.LCL8 == '0/0'): | |||
mendelian_count = "no" | |||
else: | |||
mendelian_count = "yes" | |||
outline = row.CHROM + '\t' + str(row.POS) + '\t' + row.REF + '\t' + row.ALT + '\t' + row.LCL5 + '\t' + row.LCL6 + '\t' + row.LCL7 + '\t' + row.LCL8 + '\t' + str(row.TRIO5) + '\t' + str(row.TRIO6) + '\t' + str(mendelian) + '\t' + str(mendelian_count) + '\t' + str(sister_count) + '\n' | |||
family_file.write(outline) |
@@ -1,8 +1,11 @@ | |||
{ | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.two_family_merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||
"{{ project_name }}.family_name": "{{ family_name }}", | |||
"{{ project_name }}.sister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.disk_size": "150", | |||
"{{ project_name }}.merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||
} | |||
} |
@@ -1,32 +0,0 @@ | |||
task VCFinfo { | |||
File repeat_annotated_vcf | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/variants_quality_location_intergration.py -vcf ${repeat_annotated_vcf} -prefix ${sample} | |||
cat ${sample}_variant_quality_location.txt | grep '#CHROM' > header | |||
for i in chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX | |||
do | |||
cat ${sample}_variant_quality_location.txt | grep -w $i | cat header - > ${sample}.$i.vcfInfo.txt | |||
done | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File extracted_info = "${sample}_variant_quality_location.txt" | |||
Array[File] chromo_vcfInfo = glob("*.vcfInfo.txt") | |||
} | |||
} |
@@ -1,31 +0,0 @@ | |||
task VCFrename { | |||
File trio_vcf_gz | |||
File trio_vcf_idx | |||
String mother_name | |||
String father_name | |||
String child_name | |||
String family_name | |||
String child | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
echo "MOTHER ${mother_name}.${child} | |||
FATHER ${father_name}.${child} | |||
CHILD ${child_name}" > rename.txt | |||
rtg vcfannotate -i ${trio_vcf_gz} -o ${family_name}.${child}.rename.vcf.gz --relabel=rename.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File rename_trio_vcf_gz = "${family_name}.${child}.rename.vcf.gz" | |||
File rename_trio_vcf_idx = "${family_name}.${child}.rename.vcf.gz.tbi" | |||
} | |||
} |
@@ -1,27 +0,0 @@ | |||
task bed_annotation { | |||
File merged_vcf_gz | |||
File merged_vcf_idx | |||
File repeat_bed | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
rtg vcfannotate --bed-info=${repeat_bed} -i ${merged_vcf_gz} -o ${sample}.mendelian.merged.repeatAnno.vcf.gz | |||
gunzip ${sample}.mendelian.merged.repeatAnno.vcf.gz | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File repeat_annotated_vcf = "${sample}.mendelian.merged.repeatAnno.vcf" | |||
} | |||
} |
@@ -1,26 +0,0 @@ | |||
task FinalResult { | |||
File extracted_info | |||
File annotated_txt | |||
String prefix = basename(annotated_txt,".mendelian.txt") | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/FinalResult2VCF.py -vcfInfo ${extracted_info} -mendelianInfo ${annotated_txt} -prefix ${prefix} -sample ${sample} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File benchmarking_calls = "${prefix}_benchmarking_calls.vcf" | |||
File all_info = "${prefix}_all_sample_information.vcf" | |||
} | |||
} |
@@ -1,34 +0,0 @@ | |||
task mendelian { | |||
File child_vcf | |||
File LCL7_vcf | |||
File LCL8_vcf | |||
String LCL7_name | |||
String LCL8_name | |||
String child_name | |||
File ref_dir | |||
String fasta | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
export LD_LIBRARY_PATH=/opt/htslib-1.9 | |||
nt=$(nproc) | |||
mkdir VBT | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${LCL8_vcf} -father ${LCL7_vcf} -child ${child_vcf} -outDir VBT -out-prefix ${child_name}.family --output-violation-regions -thread-count $nt | |||
cat VBT/${child_name}.family_trio.vcf > ${child_name}.family.vcf | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
Array[File] vbt_mendelian = glob("VBT/*") | |||
File trio_vcf = "${child_name}.family.vcf" | |||
} | |||
} |
@@ -1,16 +1,19 @@ | |||
task merge { | |||
Array[File] family_vcf_gz | |||
Array[File] family_vcf_idx | |||
String sample | |||
Array[File] family_mendelian_info | |||
String family_name | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
rtg vcfmerge --force-merge-all -o ${sample}.merged.vcf.gz ${sep=" " family_vcf_gz} | |||
cat ${sep=" " family_mendelian_info} | sort -k1,1 -k2,2n > ${family_name}.mendelian.txt | |||
zcat ${sample}.merged.vcf.gz | grep -v '#' | cut -f1-2 | sed s'/\t/_/g' | sort | uniq -c | sed 's/\s\+/\t/g' | awk '{ if ($1 != 1) { print } }' | cut -f3 > ${sample}.vcf_dup.txt | |||
cat ${family_name}.mendelian.txt | cut -f13 | sort | uniq -c > ${family_name}.sister.reproducibility.txt | |||
cat ${family_name}.mendelian.txt | cut -f11 | sort | uniq -c | grep '1:1:1' > ${family_name}.mendelian.txt | |||
cat ${family_name}.mendelian.txt | cut -f11 | sort | uniq -c | grep 'Ref:1:1' >> ${family_name}.mendelian.txt | |||
cat ${family_name}.mendelian.txt | cut -f12 | sort | uniq -c | grep 'yes' >> ${family_name}.mendelian.txt | |||
>>> | |||
@@ -21,8 +24,8 @@ task merge { | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File merged_vcf_gz = "${sample}.merged.vcf.gz" | |||
File merged_vcf_idx = "${sample}.merged.vcf.gz.tbi" | |||
File vcf_dup = "${sample}.vcf_dup.txt" | |||
File family_all_info = "${family_name}.mendelian.txt" | |||
File sister_consistency = "${family_name}.sister.reproducibility.txt" | |||
File family_mendelian = "${family_name}.mendelian.txt" | |||
} | |||
} |
@@ -1,34 +0,0 @@ | |||
task mergeSister { | |||
File LCL5_trio_vcf_gz | |||
File LCL5_trio_vcf_idx | |||
File LCL6_trio_vcf_gz | |||
File LCL6_trio_vcf_idx | |||
String family_name | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
rtg vcfmerge -o LCL5.LCL6.merged.vcf.gz ${LCL5_trio_vcf_gz} ${LCL6_trio_vcf_gz} | |||
rtg vcfmerge -o LCL6.LCL5.merged.vcf.gz ${LCL6_trio_vcf_gz} ${LCL5_trio_vcf_gz} | |||
zcat LCL5.LCL6.merged.vcf.gz | grep '##' > header | |||
zcat LCL5.LCL6.merged.vcf.gz | grep -v '##' | cut -f8 > LCL5.mendelian | |||
zcat LCL6.LCL5.merged.vcf.gz | grep -v '##' | paste - LCL5.mendelian > body | |||
cat header body > ${family_name}.trio.info.vcf | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File family_mendelian_info = "${family_name}.trio.info.vcf" | |||
} | |||
} |
@@ -1,25 +0,0 @@ | |||
task mergeVCFInfo { | |||
Array[File] vcf_gz | |||
Array[File] vcf_idx | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
rtg vcfmerge --force-merge-all -o ${sample}.merged.info.vcf.gz ${sep=" " vcf_gz} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File merged_vcf = "${sample}.merged.info.vcf.gz" | |||
File merged_vcf_idx = "${sample}.merged.info.vcf.gz.tbi" | |||
} | |||
} |
@@ -1,24 +0,0 @@ | |||
task merge_info { | |||
File vcfInfo | |||
File mendelianInfo | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/merge_mendelian_vcfinfo.py -vcfInfo ${vcfInfo} -mendelianInfo ${mendelianInfo} -sample ${sample} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File all_info = "${sample}_mendelian_vcfInfo.vcf" | |||
} | |||
} |
@@ -1,39 +0,0 @@ | |||
task oneClass { | |||
File snv_train_vcf | |||
File snv_test_vcf | |||
File indel_train_vcf | |||
File indel_test_vcf | |||
String sampleName = basename(snv_train_vcf,".normed.snv.train.txt") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/oneClass.py -train ${snv_train_vcf} -test ${snv_test_vcf} -name ${sampleName}_snv | |||
python /opt/oneClass.py -train ${indel_train_vcf} -test ${indel_test_vcf} -name ${sampleName}_indel | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File snv_true_txt = "${sampleName}_snv_predicted_true.txt" | |||
File snv_false_txt = "${sampleName}_snv_predicted_false.txt" | |||
File snv_true_bed = "${sampleName}_snv_predicted_true.bed" | |||
File snv_false_bed = "${sampleName}_snv_predicted_false.bed" | |||
File snv_padding = "${sampleName}_snv_padding.bed" | |||
File indel_true_txt = "${sampleName}_indel_predicted_true.txt" | |||
File indel_false_txt = "${sampleName}_indel_predicted_false.txt" | |||
File indel_true_bed = "${sampleName}_indel_predicted_true.bed" | |||
File indel_false_bed = "${sampleName}_indel_predicted_false.bed" | |||
File indel_padding = "${sampleName}_indel_padding.bed" | |||
} | |||
} | |||
@@ -1,38 +0,0 @@ | |||
task reformVCF { | |||
File family_mendelian_info | |||
File family_name | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name} | |||
cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL5.txt | |||
cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL6.txt | |||
cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL7.txt | |||
cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL8.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File LCL5_family_info = "${family_name}.LCL5.vcf" | |||
File LCL6_family_info = "${family_name}.LCL6.vcf" | |||
File LCL7_family_info = "${family_name}.LCL7.vcf" | |||
File LCL8_family_info = "${family_name}.LCL8.vcf" | |||
File family_info = "${family_name}.vcf" | |||
File LCL5_family_info_txt = "${family_name}.LCL5.txt" | |||
File LCL6_family_info_txt = "${family_name}.LCL6.txt" | |||
File LCL7_family_info_txt = "${family_name}.LCL7.txt" | |||
File LCL8_family_info_txt = "${family_name}.LCL8.txt" | |||
} | |||
} | |||
@@ -9,7 +9,7 @@ task sister { | |||
String LCL7_name | |||
String LCL8_name | |||
String fasta | |||
String family_name | |||
String family_chromo_name | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
@@ -21,21 +21,25 @@ task sister { | |||
cat ${LCL7_vcf} | grep -v '##' | cut -f10 > F7 | |||
cat ${LCL8_vcf} | grep -v '##' | cut -f10 > M8 | |||
cat ${LCL5_vcf} | grep -v '##' | paste - D6 F7 M8 > body | |||
cat ${LCL5_vcf} | grep '##' | cat - body > ${family_name}.vcf | |||
cat ${LCL5_vcf} | grep '##' | cat - body > ${family_chromo_name}.vcf | |||
# prepare ped file, D5 | |||
echo "${family_name} ${LCL8_name} 0 0 2 -9 | |||
${family_name} ${LCL7_name} 0 0 1 -9 | |||
${family_name} ${LCL5_name} ${LCL7_name} ${LCL8_name} 2 -9" > ${family_name}.D5.ped | |||
echo "${family_chromo_name} ${LCL8_name} 0 0 2 -9 | |||
${family_chromo_name} ${LCL7_name} 0 0 1 -9 | |||
${family_chromo_name} ${LCL5_name} ${LCL7_name} ${LCL8_name} 2 -9" > ${family_chromo_name}.D5.ped | |||
mkdir VBT_D5 | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_name}.vcf -father ${family_name}.vcf -child ${family_name}.vcf -pedigree ${family_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_name}.D5 --output-violation-regions -thread-count $nt | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_chromo_name}.vcf -father ${family_chromo_name}.vcf -child ${family_chromo_name}.vcf -pedigree ${family_chromo_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_chromo_name}.D5 --output-violation-regions -thread-count $nt | |||
cat VBT_D5/${family_chromo_name}.D5_trio.vcf > ${family_chromo_name}.D5.vcf | |||
# prepare ped file, D6 | |||
echo "${family_name} ${LCL8_name} 0 0 2 -9 | |||
${family_name} ${LCL7_name} 0 0 1 -9 | |||
${family_name} ${LCL6_name} ${LCL7_name} ${LCL8_name} 2 -9" > ${family_name}.D6.ped | |||
echo "${family_chromo_name} ${LCL8_name} 0 0 2 -9 | |||
${family_chromo_name} ${LCL7_name} 0 0 1 -9 | |||
${family_chromo_name} ${LCL6_name} ${LCL7_name} ${LCL8_name} 2 -9" > ${family_chromo_name}.D6.ped | |||
mkdir VBT_D6 | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_name}.vcf -father ${family_name}.vcf -child ${family_name}.vcf -pedigree ${family_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_name}.D6 --output-violation-regions -thread-count $nt | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_chromo_name}.vcf -father ${family_chromo_name}.vcf -child ${family_chromo_name}.vcf -pedigree ${family_chromo_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_chromo_name}.D6 --output-violation-regions -thread-count $nt | |||
cat VBT_D6/${family_chromo_name}.D6_trio.vcf > ${family_chromo_name}.D6.vcf | |||
>>> | |||
runtime { | |||
@@ -47,6 +51,8 @@ task sister { | |||
output { | |||
Array[File] D5_mendelian = glob("VBT_D5/*") | |||
Array[File] D6_mendelian = glob("VBT_D6/*") | |||
File family_vcf = "${family_name}.vcf" | |||
File D5_trio_vcf = "${family_chromo_name}.D5.vcf" | |||
File D6_trio_vcf = "${family_chromo_name}.D6.vcf" | |||
File family_vcf = "${family_chromo_name}.vcf" | |||
} | |||
} |
@@ -1,15 +1,25 @@ | |||
task two_family_merge { | |||
File LCL5_trio_vcf | |||
File LCL6_trio_vcf | |||
String family_name | |||
File genotype_file | |||
String family_chromo_name | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
cat ${LCL5_trio_vcf} | grep -v '##' > ${family_name}.LCL5.txt | |||
cat ${LCL6_trio_vcf} | grep -v '##' > ${family_name}.LCL6.txt | |||
python /opt/merge_two_family.py -LCL5 ${family_name}.LCL5.txt -LCL6 ${family_name}.LCL6.txt -family ${family_name} | |||
cat ${LCL5_trio_vcf} | grep -v '##' > ${family_chromo_name}.LCL5.txt | |||
cat ${LCL6_trio_vcf} | grep -v '##' > ${family_chromo_name}.LCL6.txt | |||
cat ${genotype_file} | grep -v '##' | awk ' | |||
BEGIN { OFS = "\t" } | |||
NF > 2 && FNR > 1 { | |||
for ( i=9; i<=NF; i++ ) { | |||
split($i,a,":") ;$i = a[1]; | |||
} | |||
} | |||
{ print } | |||
' | cut -f1,2,4,5,10- > ${family_chromo_name}.genotype.txt | |||
python /opt/merge_two_family_with_genotype.py -LCL5 ${family_chromo_name}.LCL5.txt -LCL6 ${family_chromo_name}.LCL6.txt -genotype ${family_chromo_name}.genotype.txt -family ${family_chromo_name} | |||
>>> | |||
runtime { | |||
@@ -20,7 +30,7 @@ task two_family_merge { | |||
} | |||
output { | |||
File family_mendelian_info = "${family_name}.txt" | |||
File family_mendelian_info = "${family_chromo_name}.txt" | |||
} | |||
} |
@@ -1,33 +0,0 @@ | |||
task variantsNorm { | |||
File vcf | |||
File ref_dir | |||
String fasta | |||
String sampleName | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
cat ${vcf} | grep '#' > header | |||
cat ${vcf} | grep -v '#' > body | |||
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered | |||
cat header body.filtered > ${sampleName}.filtered.vcf | |||
/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf | |||
cat ${sampleName}.normed.vcf | grep -v '##' > ${sampleName}.normed.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File normed_vcf = "${sampleName}.normed.vcf" | |||
File normed_txt = "${sampleName}.normed.txt" | |||
} | |||
} |
@@ -1,41 +0,0 @@ | |||
task votes { | |||
Array[File] family_mendelian_info | |||
File vcf | |||
String chromo | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
mkdir temp | |||
for i in ${sep=" " family_mendelian_info} | |||
do | |||
cp $i temp | |||
done | |||
cat ${vcf} | grep -v '##' > vcf_info.txt | |||
python /opt/voted_by_vcfinfo_mendelianinfo.py -folder ./temp -vcf vcf_info.txt | |||
cp LCL5_voted.vcf LCL5.${chromo}.voted.vcf | |||
cp LCL6_voted.vcf LCL6.${chromo}.voted.vcf | |||
cp LCL7_voted.vcf LCL7.${chromo}.voted.vcf | |||
cp LCL8_voted.vcf LCL8.${chromo}.voted.vcf | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File LCL5_voted_vcf = "LCL5.${chromo}.voted.vcf" | |||
File LCL6_voted_vcf = "LCL6.${chromo}.voted.vcf" | |||
File LCL7_voted_vcf = "LCL7.${chromo}.voted.vcf" | |||
File LCL8_voted_vcf = "LCL8.${chromo}.voted.vcf" | |||
File all_sample_info = "all_sample_information.txt" | |||
} | |||
} | |||
@@ -1,24 +0,0 @@ | |||
task zipIndex { | |||
File vcf | |||
String vcf_name = basename(vcf,".vcf") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
rtg bgzip ${vcf} -c > ${vcf_name}.vcf.gz | |||
rtg index -f vcf ${vcf_name}.vcf.gz | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcf_gz = "${vcf_name}.vcf.gz" | |||
File vcf_idx = "${vcf_name}.vcf.gz.tbi" | |||
} | |||
} |
@@ -1,10 +1,12 @@ | |||
import "./tasks/sister.wdl" as sister | |||
import "./tasks/two_family_merge.wdl" as two_family_merge | |||
import "./tasks/merge.wdl" as merge | |||
workflow {{ project_name }} { | |||
File inputSamplesFile | |||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||
File ref_dir | |||
String docker | |||
String family_name | |||
String fasta | |||
String cluster_config | |||
String disk_size | |||
@@ -22,10 +24,25 @@ workflow {{ project_name }} { | |||
LCL7_name=quartet[6], | |||
LCL8_name=quartet[7], | |||
fasta=fasta, | |||
family_name=quartet[8], | |||
docker=docker, | |||
family_chromo_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call two_family_merge.two_family_merge as two_family_merge { | |||
input: | |||
LCL5_trio_vcf=sister.D5_trio_vcf, | |||
LCL6_trio_vcf=sister.D6_trio_vcf, | |||
genotype_file=sister.family_vcf, | |||
family_chromo_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
call merge.merge as merge { | |||
input: | |||
family_mendelian_info=two_family_merge.family_mendelian_info, | |||
family_name=family_name, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} |