LUYAO REN 5 anni fa
parent
commit
4123aef78e
4 ha cambiato i file con 58 aggiunte e 7 eliminazioni
  1. +0
    -0
      codescripts/filter_indel_over_50_cluster.py
  2. +42
    -0
      codescripts/filter_indel_over_50_mendelian.py
  3. +0
    -1
      codescripts/merge_two_family_with_genotype.py
  4. +16
    -6
      tasks/merge.wdl

codescripts/filter_indel_over_50.py → codescripts/filter_indel_over_50_cluster.py Vedi File


+ 42
- 0
codescripts/filter_indel_over_50_mendelian.py Vedi File

@@ -0,0 +1,42 @@
from itertools import islice
import sys, argparse, os


# input arguments
parser = argparse.ArgumentParser(description="this script is to exclude indel over 50bp")

parser.add_argument('-i', '--mergedGVCF', type=str, help='merged gVCF txt with only chr, pos, ref, alt and genotypes', required=True)
parser.add_argument('-prefix', '--prefix', type=str, help='prefix of output file', required=True)


args = parser.parse_args()
input_dat = args.mergedGVCF
prefix = args.prefix


# output file
output_name = prefix + '.indel.lessthan50bp.txt'
outfile = open(output_name,'w')


def process(line):
strings = line.strip().split('\t')
#d5
if ',' in strings[3]:
alt = strings[3].split(',')
alt_len = [len(i) for i in alt]
alt_max = max(alt_len)
else:
alt_max = len(strings[3])
#ref
ref_len = len(strings[2])
if (alt_max > 50) or (ref_len > 50):
pass
else:
outfile.write(line)

input_file = open(input_dat)
for line in fileinput.input(input_file):
process(line)



+ 0
- 1
codescripts/merge_two_family_with_genotype.py Vedi File

@@ -47,7 +47,6 @@ for row in merged_genotype_df_sub.itertuples():
else:
mendelian = '1'
sister_count = "yes_same"

else:
mendelian = '0'
if (row.LCL5 == './.' or row.LCL5 == '0/0') and (row.LCL6 == './.' or row.LCL6 == '0/0'):

+ 16
- 6
tasks/merge.wdl Vedi File

@@ -9,11 +9,19 @@ task merge {

cat ${sep=" " family_mendelian_info} | sort -k1,1 -k2,2n > ${family_name}.mendelian.txt

cat ${family_name}.mendelian.txt | cut -f13 | sort | uniq -c > ${family_name}.sister.reproducibility.txt
cat ${family_name}.mendelian.txt | awk '{ if ((length($3) == 1) && (length($4) == 1)) { print } }' > ${family_name}.mendelian.snv.txt
cat ${family_name}.mendelian.txt | awk '{ if ((length($3) != 1) || (length($4) != 1)) { print } }' > ${family_name}.mendelian.indel.txt

python /opt/filter_indel_over_50_mendelian.py -i ${family_name}.mendelian.indel.txt -prefix ${family_name}.mendelian

cat ${family_name}.mendelian.snv.txt | cut -f11 | sort | uniq -c | grep '1:1:1' > ${family_name}.mendelian.snv.summary.txt
cat ${family_name}.mendelian.snv.txt | cut -f11 | sort | uniq -c | grep 'Ref:1:1' >> ${family_name}.mendelian.snv.summary.txt
cat ${family_name}.mendelian.snv.txt | cut -f12 | sort | uniq -c | grep 'yes' >> ${family_name}.mendelian.snv.summary.txt

cat ${family_name}.mendelian.indel.lessthan50bp.txt | cut -f11 | sort | uniq -c | grep '1:1:1' > ${family_name}.mendelian.indel.summary.txt
cat ${family_name}.mendelian.indel.lessthan50bp.txt | cut -f11 | sort | uniq -c | grep 'Ref:1:1' >> ${family_name}.mendelian.indel.summary.txt
cat ${family_name}.mendelian.indel.lessthan50bp.txt | cut -f12 | sort | uniq -c | grep 'yes' >> ${family_name}.mendelian.indel.summary.txt

cat ${family_name}.mendelian.txt | cut -f11 | sort | uniq -c | grep '1:1:1' > ${family_name}.mendelian.summary.txt
cat ${family_name}.mendelian.txt | cut -f11 | sort | uniq -c | grep 'Ref:1:1' >> ${family_name}.mendelian.summary.txt
cat ${family_name}.mendelian.txt | cut -f12 | sort | uniq -c | grep 'yes' >> ${family_name}.mendelian.summary.txt

>>>

@@ -25,7 +33,9 @@ task merge {
}
output {
File family_all_info = "${family_name}.mendelian.txt"
File sister_consistency = "${family_name}.sister.reproducibility.txt"
File family_mendelian = "${family_name}.mendelian.summary.txt"
File snv = "${family_name}.mendelian.snv.txt"
FIle indel = "${family_name}.mendelian.indel.lessthan50bp.txt"
File family_snv = "${family_name}.mendelian.snv.summary.txt"
File family_indel = "${family_name}.mendelian.indel.summary.txt"
}
}

Loading…
Annulla
Salva