LUYAO REN il y a 5 ans
Parent
révision
9821fd89a9
19 fichiers modifiés avec 149 ajouts et 437 suppressions
  1. +81
    -0
      codescripts/merge_two_family_with_genotype.py
  2. +5
    -2
      inputs
  3. +0
    -32
      tasks/VCFinfo.wdl
  4. +0
    -31
      tasks/VCFrename.wdl
  5. +0
    -27
      tasks/bed_annotation.wdl
  6. +0
    -26
      tasks/final_result.wdl
  7. +0
    -34
      tasks/mendelian.wdl
  8. +11
    -8
      tasks/merge.wdl
  9. +0
    -34
      tasks/mergeSister.wdl
  10. +0
    -25
      tasks/mergeVCFInfo.wdl
  11. +0
    -24
      tasks/merge_info.wdl
  12. +0
    -39
      tasks/oneClass.wdl
  13. +0
    -38
      tasks/reformVCF.wdl
  14. +17
    -11
      tasks/sister.wdl
  15. +15
    -5
      tasks/two_family_merge.wdl
  16. +0
    -33
      tasks/variantsNorm.wdl
  17. +0
    -41
      tasks/votes.wdl
  18. +0
    -24
      tasks/zipIndex.wdl
  19. +20
    -3
      workflow.wdl

+ 81
- 0
codescripts/merge_two_family_with_genotype.py Voir le fichier

@@ -0,0 +1,81 @@
from __future__ import division
import pandas as pd
import sys, argparse, os
import fileinput
import re

# input arguments
parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information")

parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info', required=True)
parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info', required=True)
parser.add_argument('-genotype', '--genotype', type=str, help='Genotype information of a set of four family members', required=True)
parser.add_argument('-family', '--family', type=str, help='family name', required=True)


args = parser.parse_args()
lcl5 = args.LCL5
lcl6 = args.LCL6
genotype = args.genotype
family = args.family


# output file
family_name = family + '.txt'

family_file = open(family_name,'w')

# input files
lcl5_dat = pd.read_table(lcl5)
lcl6_dat = pd.read_table(lcl6)
genotype_dat = pd.read_table(genotype)
merged_df = pd.merge(lcl5_dat, lcl6_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])
merged_genotype_df = pd.merge(merged_df, genotype_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])

merged_genotype_df_sub = merged_genotype_df.iloc[:,[0,1,22,23,24,25,26,27,7,17]]
merged_genotype_df_sub.columns = ['CHROM', 'POS', 'REF', 'ALT','LCL5','LCL6','LCL7','LCL8', 'TRIO5', 'TRIO6']

for row in merged_genotype_df_sub.itertuples():
# sister
if row.LCL5 == row.LCL6:
if row.LCL5 == './.':
mendelian = 'noInfo'
sister_count = "no"
elif row.LCL5 == '0/0':
mendelian = 'Ref'
sister_count = "no"
else:
mendelian = '1'
sister_count = "yes_same"

else:
mendelian = '0'
if (row.LCL5 == './.' or row.LCL5 == '0/0') and (row.LCL6 == './.' or row.LCL6 == '0/0'):
sister_count = "no"
else:
sister_count = "yes_diff"
# family trio5
if row.LCL5 == row. LCL7 == row.LCL8 == './.':
mendelian = mendelian + ':noInfo'
elif row.LCL5 == row. LCL7 == row.LCL8 == '0/0':
mendelian = mendelian + ':Ref'
elif pd.isnull(row.TRIO5) == True:
mendelian = mendelian + ':unVBT'
else:
mendelian = mendelian + ':' + row.TRIO5.split('=')[1]
# family trio6
if row.LCL6 == row. LCL7 == row.LCL8 == './.':
mendelian = mendelian + ':noInfo'
elif row.LCL6 == row. LCL7 == row.LCL8 == '0/0':
mendelian = mendelian + ':Ref'
elif pd.isnull(row.TRIO6) == True:
mendelian = mendelian + ':unVBT'
else:
mendelian = mendelian + ':' + row.TRIO6.split('=')[1]
# not count into family
if (row.LCL5 == './.' or row.LCL5 == '0/0') and (row.LCL6 == './.' or row.LCL6 == '0/0') and (row.LCL7 == './.' or row.LCL7 == '0/0') and (row.LCL8 == './.' or row.LCL8 == '0/0'):
mendelian_count = "no"
else:
mendelian_count = "yes"
outline = row.CHROM + '\t' + str(row.POS) + '\t' + row.REF + '\t' + row.ALT + '\t' + row.LCL5 + '\t' + row.LCL6 + '\t' + row.LCL7 + '\t' + row.LCL8 + '\t' + str(row.TRIO5) + '\t' + str(row.TRIO6) + '\t' + str(mendelian) + '\t' + str(mendelian_count) + '\t' + str(sister_count) + '\n'
family_file.write(outline)

+ 5
- 2
inputs Voir le fichier

@@ -1,8 +1,11 @@
{
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.two_family_merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4",
"{{ project_name }}.family_name": "{{ family_name }}",
"{{ project_name }}.sister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.disk_size": "150",
"{{ project_name }}.merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
}
}

+ 0
- 32
tasks/VCFinfo.wdl Voir le fichier

@@ -1,32 +0,0 @@
task VCFinfo {
File repeat_annotated_vcf
String sample
String docker
String cluster_config
String disk_size
command <<<

python /opt/variants_quality_location_intergration.py -vcf ${repeat_annotated_vcf} -prefix ${sample}

cat ${sample}_variant_quality_location.txt | grep '#CHROM' > header

for i in chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX
do
cat ${sample}_variant_quality_location.txt | grep -w $i | cat header - > ${sample}.$i.vcfInfo.txt
done


>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File extracted_info = "${sample}_variant_quality_location.txt"
Array[File] chromo_vcfInfo = glob("*.vcfInfo.txt")
}
}

+ 0
- 31
tasks/VCFrename.wdl Voir le fichier

@@ -1,31 +0,0 @@
task VCFrename {
File trio_vcf_gz
File trio_vcf_idx
String mother_name
String father_name
String child_name
String family_name
String child
String docker
String cluster_config
String disk_size
command <<<
echo "MOTHER ${mother_name}.${child}
FATHER ${father_name}.${child}
CHILD ${child_name}" > rename.txt

rtg vcfannotate -i ${trio_vcf_gz} -o ${family_name}.${child}.rename.vcf.gz --relabel=rename.txt
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File rename_trio_vcf_gz = "${family_name}.${child}.rename.vcf.gz"
File rename_trio_vcf_idx = "${family_name}.${child}.rename.vcf.gz.tbi"
}
}

+ 0
- 27
tasks/bed_annotation.wdl Voir le fichier

@@ -1,27 +0,0 @@
task bed_annotation {
File merged_vcf_gz
File merged_vcf_idx
File repeat_bed
String sample
String docker
String cluster_config
String disk_size
command <<<

rtg vcfannotate --bed-info=${repeat_bed} -i ${merged_vcf_gz} -o ${sample}.mendelian.merged.repeatAnno.vcf.gz

gunzip ${sample}.mendelian.merged.repeatAnno.vcf.gz

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File repeat_annotated_vcf = "${sample}.mendelian.merged.repeatAnno.vcf"
}
}

+ 0
- 26
tasks/final_result.wdl Voir le fichier

@@ -1,26 +0,0 @@
task FinalResult {
File extracted_info
File annotated_txt
String prefix = basename(annotated_txt,".mendelian.txt")
String sample
String docker
String cluster_config
String disk_size
command <<<

python /opt/FinalResult2VCF.py -vcfInfo ${extracted_info} -mendelianInfo ${annotated_txt} -prefix ${prefix} -sample ${sample}

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File benchmarking_calls = "${prefix}_benchmarking_calls.vcf"
File all_info = "${prefix}_all_sample_information.vcf"
}
}

+ 0
- 34
tasks/mendelian.wdl Voir le fichier

@@ -1,34 +0,0 @@
task mendelian {
File child_vcf
File LCL7_vcf
File LCL8_vcf
String LCL7_name
String LCL8_name
String child_name
File ref_dir
String fasta
String docker
String cluster_config
String disk_size
command <<<
export LD_LIBRARY_PATH=/opt/htslib-1.9
nt=$(nproc)
mkdir VBT

/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${LCL8_vcf} -father ${LCL7_vcf} -child ${child_vcf} -outDir VBT -out-prefix ${child_name}.family --output-violation-regions -thread-count $nt

cat VBT/${child_name}.family_trio.vcf > ${child_name}.family.vcf
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
Array[File] vbt_mendelian = glob("VBT/*")
File trio_vcf = "${child_name}.family.vcf"
}
}

+ 11
- 8
tasks/merge.wdl Voir le fichier

@@ -1,16 +1,19 @@
task merge {
Array[File] family_vcf_gz
Array[File] family_vcf_idx
String sample
Array[File] family_mendelian_info
String family_name
String docker
String cluster_config
String disk_size
command <<<

rtg vcfmerge --force-merge-all -o ${sample}.merged.vcf.gz ${sep=" " family_vcf_gz}
cat ${sep=" " family_mendelian_info} | sort -k1,1 -k2,2n > ${family_name}.mendelian.txt

zcat ${sample}.merged.vcf.gz | grep -v '#' | cut -f1-2 | sed s'/\t/_/g' | sort | uniq -c | sed 's/\s\+/\t/g' | awk '{ if ($1 != 1) { print } }' | cut -f3 > ${sample}.vcf_dup.txt
cat ${family_name}.mendelian.txt | cut -f13 | sort | uniq -c > ${family_name}.sister.reproducibility.txt

cat ${family_name}.mendelian.txt | cut -f11 | sort | uniq -c | grep '1:1:1' > ${family_name}.mendelian.txt
cat ${family_name}.mendelian.txt | cut -f11 | sort | uniq -c | grep 'Ref:1:1' >> ${family_name}.mendelian.txt
cat ${family_name}.mendelian.txt | cut -f12 | sort | uniq -c | grep 'yes' >> ${family_name}.mendelian.txt

>>>

@@ -21,8 +24,8 @@ task merge {
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File merged_vcf_gz = "${sample}.merged.vcf.gz"
File merged_vcf_idx = "${sample}.merged.vcf.gz.tbi"
File vcf_dup = "${sample}.vcf_dup.txt"
File family_all_info = "${family_name}.mendelian.txt"
File sister_consistency = "${family_name}.sister.reproducibility.txt"
File family_mendelian = "${family_name}.mendelian.txt"
}
}

+ 0
- 34
tasks/mergeSister.wdl Voir le fichier

@@ -1,34 +0,0 @@
task mergeSister {
File LCL5_trio_vcf_gz
File LCL5_trio_vcf_idx
File LCL6_trio_vcf_gz
File LCL6_trio_vcf_idx
String family_name
String docker
String cluster_config
String disk_size
command <<<
rtg vcfmerge -o LCL5.LCL6.merged.vcf.gz ${LCL5_trio_vcf_gz} ${LCL6_trio_vcf_gz}

rtg vcfmerge -o LCL6.LCL5.merged.vcf.gz ${LCL6_trio_vcf_gz} ${LCL5_trio_vcf_gz}

zcat LCL5.LCL6.merged.vcf.gz | grep '##' > header
zcat LCL5.LCL6.merged.vcf.gz | grep -v '##' | cut -f8 > LCL5.mendelian
zcat LCL6.LCL5.merged.vcf.gz | grep -v '##' | paste - LCL5.mendelian > body

cat header body > ${family_name}.trio.info.vcf
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File family_mendelian_info = "${family_name}.trio.info.vcf"
}

}

+ 0
- 25
tasks/mergeVCFInfo.wdl Voir le fichier

@@ -1,25 +0,0 @@
task mergeVCFInfo {
Array[File] vcf_gz
Array[File] vcf_idx
String sample
String docker
String cluster_config
String disk_size
command <<<

rtg vcfmerge --force-merge-all -o ${sample}.merged.info.vcf.gz ${sep=" " vcf_gz}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File merged_vcf = "${sample}.merged.info.vcf.gz"
File merged_vcf_idx = "${sample}.merged.info.vcf.gz.tbi"
}
}

+ 0
- 24
tasks/merge_info.wdl Voir le fichier

@@ -1,24 +0,0 @@
task merge_info {
File vcfInfo
File mendelianInfo
String sample
String docker
String cluster_config
String disk_size
command <<<

python /opt/merge_mendelian_vcfinfo.py -vcfInfo ${vcfInfo} -mendelianInfo ${mendelianInfo} -sample ${sample}

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File all_info = "${sample}_mendelian_vcfInfo.vcf"
}
}

+ 0
- 39
tasks/oneClass.wdl Voir le fichier

@@ -1,39 +0,0 @@
task oneClass {
File snv_train_vcf
File snv_test_vcf
File indel_train_vcf
File indel_test_vcf
String sampleName = basename(snv_train_vcf,".normed.snv.train.txt")
String docker
String cluster_config
String disk_size
command <<<

python /opt/oneClass.py -train ${snv_train_vcf} -test ${snv_test_vcf} -name ${sampleName}_snv

python /opt/oneClass.py -train ${indel_train_vcf} -test ${indel_test_vcf} -name ${sampleName}_indel

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File snv_true_txt = "${sampleName}_snv_predicted_true.txt"
File snv_false_txt = "${sampleName}_snv_predicted_false.txt"
File snv_true_bed = "${sampleName}_snv_predicted_true.bed"
File snv_false_bed = "${sampleName}_snv_predicted_false.bed"
File snv_padding = "${sampleName}_snv_padding.bed"
File indel_true_txt = "${sampleName}_indel_predicted_true.txt"
File indel_false_txt = "${sampleName}_indel_predicted_false.txt"
File indel_true_bed = "${sampleName}_indel_predicted_true.bed"
File indel_false_bed = "${sampleName}_indel_predicted_false.bed"
File indel_padding = "${sampleName}_indel_padding.bed"
}
}


+ 0
- 38
tasks/reformVCF.wdl Voir le fichier

@@ -1,38 +0,0 @@
task reformVCF {
File family_mendelian_info
File family_name
String docker
String cluster_config
String disk_size
command <<<

python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name}

cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL5.txt
cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL6.txt
cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL7.txt
cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL8.txt

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File LCL5_family_info = "${family_name}.LCL5.vcf"
File LCL6_family_info = "${family_name}.LCL6.vcf"
File LCL7_family_info = "${family_name}.LCL7.vcf"
File LCL8_family_info = "${family_name}.LCL8.vcf"
File family_info = "${family_name}.vcf"
File LCL5_family_info_txt = "${family_name}.LCL5.txt"
File LCL6_family_info_txt = "${family_name}.LCL6.txt"
File LCL7_family_info_txt = "${family_name}.LCL7.txt"
File LCL8_family_info_txt = "${family_name}.LCL8.txt"
}
}


+ 17
- 11
tasks/sister.wdl Voir le fichier

@@ -9,7 +9,7 @@ task sister {
String LCL7_name
String LCL8_name
String fasta
String family_name
String family_chromo_name
String docker
String cluster_config
String disk_size
@@ -21,21 +21,25 @@ task sister {
cat ${LCL7_vcf} | grep -v '##' | cut -f10 > F7
cat ${LCL8_vcf} | grep -v '##' | cut -f10 > M8
cat ${LCL5_vcf} | grep -v '##' | paste - D6 F7 M8 > body
cat ${LCL5_vcf} | grep '##' | cat - body > ${family_name}.vcf
cat ${LCL5_vcf} | grep '##' | cat - body > ${family_chromo_name}.vcf
# prepare ped file, D5
echo "${family_name} ${LCL8_name} 0 0 2 -9
${family_name} ${LCL7_name} 0 0 1 -9
${family_name} ${LCL5_name} ${LCL7_name} ${LCL8_name} 2 -9" > ${family_name}.D5.ped
echo "${family_chromo_name} ${LCL8_name} 0 0 2 -9
${family_chromo_name} ${LCL7_name} 0 0 1 -9
${family_chromo_name} ${LCL5_name} ${LCL7_name} ${LCL8_name} 2 -9" > ${family_chromo_name}.D5.ped

mkdir VBT_D5
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_name}.vcf -father ${family_name}.vcf -child ${family_name}.vcf -pedigree ${family_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_name}.D5 --output-violation-regions -thread-count $nt
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_chromo_name}.vcf -father ${family_chromo_name}.vcf -child ${family_chromo_name}.vcf -pedigree ${family_chromo_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_chromo_name}.D5 --output-violation-regions -thread-count $nt

cat VBT_D5/${family_chromo_name}.D5_trio.vcf > ${family_chromo_name}.D5.vcf
# prepare ped file, D6
echo "${family_name} ${LCL8_name} 0 0 2 -9
${family_name} ${LCL7_name} 0 0 1 -9
${family_name} ${LCL6_name} ${LCL7_name} ${LCL8_name} 2 -9" > ${family_name}.D6.ped
echo "${family_chromo_name} ${LCL8_name} 0 0 2 -9
${family_chromo_name} ${LCL7_name} 0 0 1 -9
${family_chromo_name} ${LCL6_name} ${LCL7_name} ${LCL8_name} 2 -9" > ${family_chromo_name}.D6.ped

mkdir VBT_D6
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_name}.vcf -father ${family_name}.vcf -child ${family_name}.vcf -pedigree ${family_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_name}.D6 --output-violation-regions -thread-count $nt
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_chromo_name}.vcf -father ${family_chromo_name}.vcf -child ${family_chromo_name}.vcf -pedigree ${family_chromo_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_chromo_name}.D6 --output-violation-regions -thread-count $nt

cat VBT_D6/${family_chromo_name}.D6_trio.vcf > ${family_chromo_name}.D6.vcf
>>>

runtime {
@@ -47,6 +51,8 @@ task sister {
output {
Array[File] D5_mendelian = glob("VBT_D5/*")
Array[File] D6_mendelian = glob("VBT_D6/*")
File family_vcf = "${family_name}.vcf"
File D5_trio_vcf = "${family_chromo_name}.D5.vcf"
File D6_trio_vcf = "${family_chromo_name}.D6.vcf"
File family_vcf = "${family_chromo_name}.vcf"
}
}

+ 15
- 5
tasks/two_family_merge.wdl Voir le fichier

@@ -1,15 +1,25 @@
task two_family_merge {
File LCL5_trio_vcf
File LCL6_trio_vcf
String family_name
File genotype_file
String family_chromo_name
String docker
String cluster_config
String disk_size
command <<<
cat ${LCL5_trio_vcf} | grep -v '##' > ${family_name}.LCL5.txt
cat ${LCL6_trio_vcf} | grep -v '##' > ${family_name}.LCL6.txt
python /opt/merge_two_family.py -LCL5 ${family_name}.LCL5.txt -LCL6 ${family_name}.LCL6.txt -family ${family_name}
cat ${LCL5_trio_vcf} | grep -v '##' > ${family_chromo_name}.LCL5.txt
cat ${LCL6_trio_vcf} | grep -v '##' > ${family_chromo_name}.LCL6.txt
cat ${genotype_file} | grep -v '##' | awk '
BEGIN { OFS = "\t" }
NF > 2 && FNR > 1 {
for ( i=9; i<=NF; i++ ) {
split($i,a,":") ;$i = a[1];
}
}
{ print }
' | cut -f1,2,4,5,10- > ${family_chromo_name}.genotype.txt
python /opt/merge_two_family_with_genotype.py -LCL5 ${family_chromo_name}.LCL5.txt -LCL6 ${family_chromo_name}.LCL6.txt -genotype ${family_chromo_name}.genotype.txt -family ${family_chromo_name}
>>>

runtime {
@@ -20,7 +30,7 @@ task two_family_merge {
}

output {
File family_mendelian_info = "${family_name}.txt"
File family_mendelian_info = "${family_chromo_name}.txt"
}

}

+ 0
- 33
tasks/variantsNorm.wdl Voir le fichier

@@ -1,33 +0,0 @@
task variantsNorm {
File vcf
File ref_dir
String fasta
String sampleName
String docker
String cluster_config
String disk_size
command <<<

cat ${vcf} | grep '#' > header
cat ${vcf} | grep -v '#' > body
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
cat header body.filtered > ${sampleName}.filtered.vcf

/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf

cat ${sampleName}.normed.vcf | grep -v '##' > ${sampleName}.normed.txt

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File normed_vcf = "${sampleName}.normed.vcf"
File normed_txt = "${sampleName}.normed.txt"
}
}

+ 0
- 41
tasks/votes.wdl Voir le fichier

@@ -1,41 +0,0 @@
task votes {
Array[File] family_mendelian_info
File vcf
String chromo
String docker
String cluster_config
String disk_size
command <<<
mkdir temp
for i in ${sep=" " family_mendelian_info}
do
cp $i temp
done

cat ${vcf} | grep -v '##' > vcf_info.txt

python /opt/voted_by_vcfinfo_mendelianinfo.py -folder ./temp -vcf vcf_info.txt

cp LCL5_voted.vcf LCL5.${chromo}.voted.vcf
cp LCL6_voted.vcf LCL6.${chromo}.voted.vcf
cp LCL7_voted.vcf LCL7.${chromo}.voted.vcf
cp LCL8_voted.vcf LCL8.${chromo}.voted.vcf

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File LCL5_voted_vcf = "LCL5.${chromo}.voted.vcf"
File LCL6_voted_vcf = "LCL6.${chromo}.voted.vcf"
File LCL7_voted_vcf = "LCL7.${chromo}.voted.vcf"
File LCL8_voted_vcf = "LCL8.${chromo}.voted.vcf"
File all_sample_info = "all_sample_information.txt"
}
}


+ 0
- 24
tasks/zipIndex.wdl Voir le fichier

@@ -1,24 +0,0 @@
task zipIndex {
File vcf
String vcf_name = basename(vcf,".vcf")
String docker
String cluster_config
String disk_size
command <<<
rtg bgzip ${vcf} -c > ${vcf_name}.vcf.gz
rtg index -f vcf ${vcf_name}.vcf.gz

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf_gz = "${vcf_name}.vcf.gz"
File vcf_idx = "${vcf_name}.vcf.gz.tbi"
}
}

+ 20
- 3
workflow.wdl Voir le fichier

@@ -1,10 +1,12 @@
import "./tasks/sister.wdl" as sister
import "./tasks/two_family_merge.wdl" as two_family_merge
import "./tasks/merge.wdl" as merge

workflow {{ project_name }} {
File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
File ref_dir
String docker
String family_name
String fasta
String cluster_config
String disk_size
@@ -22,10 +24,25 @@ workflow {{ project_name }} {
LCL7_name=quartet[6],
LCL8_name=quartet[7],
fasta=fasta,
family_name=quartet[8],
docker=docker,
family_chromo_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call two_family_merge.two_family_merge as two_family_merge {
input:
LCL5_trio_vcf=sister.D5_trio_vcf,
LCL6_trio_vcf=sister.D6_trio_vcf,
genotype_file=sister.family_vcf,
family_chromo_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
}
call merge.merge as merge {
input:
family_mendelian_info=two_family_merge.family_mendelian_info,
family_name=family_name,
cluster_config=cluster_config,
disk_size=disk_size
}
}

Chargement…
Annuler
Enregistrer