import pandas as pd | |||||
import sys, argparse, os | |||||
men = pd.read_table(sys.argv[1],header=None) | |||||
vote = pd.read_table(sys.argv[2],low_memory=False) | |||||
merged_df = pd.merge(vote, men, how='inner', left_on=['CHROM','POS'], right_on = [0,1]) | |||||
merged_df['mendelian_check'] = 'MIE' | |||||
merged_df.loc[merged_df[2]=='1:1:1','mendelian_check'] = 'MP' | |||||
sub = merged_df[['CHROM','POS','LCL5_detected_num','mendelian_check',2]] | |||||
sub.columns=['CHROM','POS','detected_num','mendelian','detail'] | |||||
sub.to_csv(sys.argv[3],header=0,sep="\t",index=0) |
{ | |||||
"{{ project_name }}.disk_size": "100", | |||||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||||
"{{ project_name }}.sample_name": "{{ sample_name }}", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc" | |||||
} |
task merge_chromo { | |||||
Array[File] mendelian_vote_summary | |||||
String sample_name | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
cat ${sep=" " mendelian_vote_summary} > ${sample_name}.all_chromo.mendelian.vote.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File mendelian_vote_summary_all_chromo = "${sample_name}.all_chromo.mendelian.vote.txt" | |||||
} | |||||
} |
task merge_mendelian_vote { | |||||
File vote_file | |||||
File mendelian_file | |||||
String output_prefix | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
python /opt/merge_mendelian_vote.py ${mendelian_file} ${vote_file} ${output_prefix}.mendelian.vote.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File mendelian_vote = "${output_prefix}.mendelian.vote.txt" | |||||
} | |||||
} |
task summary_mendelian { | |||||
File mendelian_vote | |||||
String output_prefix | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
cat ${mendelian_vote} | cut -f3,4 | sort | uniq -c > ${output_prefix}.summary.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File mendelian_vote_summary = "${output_prefix}.summary.txt" | |||||
} | |||||
} |
import "./tasks/merge_mendelian_vote.wdl" as merge_mendelian_vote | |||||
import "./tasks/summary_mendelian.wdl" as summary_mendelian | |||||
import "./tasks/merge_chromo.wdl" as merge_chromo | |||||
workflow {{ project_name }} { | |||||
File inputSamplesFile | |||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||||
String docker | |||||
String sample_name | |||||
String cluster_config | |||||
String disk_size | |||||
scatter (sample in inputSamples){ | |||||
call merge_mendelian_vote.merge_mendelian_vote as merge_mendelian_vote { | |||||
input: | |||||
vote_file=sample[0], | |||||
mendelian_file=sample[1], | |||||
output_prefix=sample[2], | |||||
docker=docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call summary_mendelian.summary_mendelian as summary_mendelian { | |||||
input: | |||||
mendelian_vote=merge_mendelian_vote.mendelian_vote, | |||||
output_prefix=sample[2], | |||||
docker=docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | |||||
call merge_chromo.merge_chromo as merge_chromo { | |||||
input: | |||||
mendelian_vote_summary=summary_mendelian.mendelian_vote_summary, | |||||
sample_name=sample_name, | |||||
docker=docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} |