men = pd.read_table(sys.argv[1],header=None) | men = pd.read_table(sys.argv[1],header=None) | ||||
vote = pd.read_table(sys.argv[2],low_memory=False) | vote = pd.read_table(sys.argv[2],low_memory=False) | ||||
mut = pd.read_table(sys.argv[3],header=None) | |||||
merged_df = pd.merge(vote, men, how='inner', left_on=['CHROM','POS'], right_on = [0,1]) | merged_df = pd.merge(vote, men, how='inner', left_on=['CHROM','POS'], right_on = [0,1]) | ||||
merged_df['mendelian_check'] = 'MIE' | merged_df['mendelian_check'] = 'MIE' | ||||
merged_df.loc[merged_df[2]=='1:1:1','mendelian_check'] = 'MP' | merged_df.loc[merged_df[2]=='1:1:1','mendelian_check'] = 'MP' | ||||
sub = merged_df[['CHROM','POS','LCL5_detected_num','mendelian_check',2]] | sub = merged_df[['CHROM','POS','LCL5_detected_num','mendelian_check',2]] | ||||
sub.columns=['CHROM','POS','detected_num','mendelian','detail'] | sub.columns=['CHROM','POS','detected_num','mendelian','detail'] | ||||
sub.to_csv(sys.argv[3],header=0,sep="\t",index=0) | |||||
genotype_sub = pd.merge(sub, mut, how='inner', left_on=["CHROM","POS"], right_on = [0,1]) | |||||
genotype_sub = genotype_sub[['CHROM','POS','detected_num','mendelian','detail',2,3]] | |||||
genotype_sub.to_csv(sys.argv[4],header=0,sep="\t",index=0) |
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | "{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | ||||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | "{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | ||||
"{{ project_name }}.sample_name": "{{ sample_name }}", | "{{ project_name }}.sample_name": "{{ sample_name }}", | ||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc" | |||||
} | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.mut_file": "{{ mut_file }}", | |||||
} |
task merge_chromo { | task merge_chromo { | ||||
Array[File] mendelian_vote_summary | |||||
Array[File] mendelian_vote_snv | |||||
Array[File] mendelian_vote_indel | |||||
String sample_name | String sample_name | ||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
command <<< | command <<< | ||||
paste ${sep=" " mendelian_vote_summary} > ${sample_name}.all_chromo.mendelian.vote.txt | |||||
cat ${sep=" " mendelian_vote_snv} > ${sample_name}.snv.txt | |||||
paste ${sep=" " mendelian_vote_summary} > ${sample_name}.colnames | |||||
cat ${sep=" " mendelian_vote_indel} > ${sample_name}.indel | |||||
cat ${sample_name}.snv | cut -f3,4 | sort | uniq -c > ${sample_name}.snv.summary.txt | |||||
cat ${sample_name}.indel | cut -f3,4 | sort | uniq -c > ${sample_name}.indel.summary.txt | |||||
>>> | >>> | ||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | ||||
} | } | ||||
output { | output { | ||||
File mendelian_vote_summary_all_chromo = "${sample_name}.all_chromo.mendelian.vote.txt" | |||||
File colnames = "${sample_name}.colnames" | |||||
File mendelian_vote_summary_snv = "${sample_name}.snv.summary.txt" | |||||
File mendelian_vote_summary_indel = "${sample_name}.indel.summary.txt" | |||||
File mendelian_vote_summary_snv_detail = "${sample_name}.snv.txt" | |||||
File mendelian_vote_summary_indel_detail = "${sample_name}.indel.txt" | |||||
} | } | ||||
} | } |
task merge_mendelian_vote { | task merge_mendelian_vote { | ||||
File vote_file | File vote_file | ||||
File mendelian_file | File mendelian_file | ||||
File mut_file | |||||
String output_prefix | String output_prefix | ||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
command <<< | command <<< | ||||
python /opt/merge_mendelian_vote.py ${mendelian_file} ${vote_file} ${output_prefix}.mendelian.vote.txt | |||||
python /opt/merge_mendelian_vote.py ${mendelian_file} ${vote_file} ${mut_file} ${output_prefix}.mendelian.vote.txt | |||||
${output_prefix}.mendelian.vote.txt | awk '{ if ((length($6) < 51) || (length($7) < 51)) { print } }' > ${output_prefix}.snv | |||||
${output_prefix}.mendelian.vote.txt | awk '{ if ((length($6) > 50) || (length($7) > 50)) { print } }' > ${output_prefix}.indel | |||||
>>> | >>> | ||||
} | } | ||||
output { | output { | ||||
File mendelian_vote = "${output_prefix}.mendelian.vote.txt" | File mendelian_vote = "${output_prefix}.mendelian.vote.txt" | ||||
File mendelian_vote_snv = "${output_prefix}.snv" | |||||
File mendelian_vote_indel = "${output_prefix}.indel" | |||||
} | } | ||||
} | } |
task summary_mendelian { | |||||
File mendelian_vote | |||||
String output_prefix | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
cat ${mendelian_vote} | cut -f3,4 | sort | uniq -c > ${output_prefix}.summary.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File mendelian_vote_summary = "${output_prefix}.summary.txt" | |||||
} | |||||
} |
import "./tasks/merge_mendelian_vote.wdl" as merge_mendelian_vote | import "./tasks/merge_mendelian_vote.wdl" as merge_mendelian_vote | ||||
import "./tasks/summary_mendelian.wdl" as summary_mendelian | |||||
import "./tasks/merge_chromo.wdl" as merge_chromo | import "./tasks/merge_chromo.wdl" as merge_chromo | ||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
File inputSamplesFile | File inputSamplesFile | ||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | ||||
File mut_file | |||||
String docker | String docker | ||||
String sample_name | String sample_name | ||||
String cluster_config | String cluster_config | ||||
vote_file=sample[0], | vote_file=sample[0], | ||||
mendelian_file=sample[1], | mendelian_file=sample[1], | ||||
output_prefix=sample[2], | output_prefix=sample[2], | ||||
docker=docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call summary_mendelian.summary_mendelian as summary_mendelian { | |||||
input: | |||||
mendelian_vote=merge_mendelian_vote.mendelian_vote, | |||||
output_prefix=sample[2], | |||||
mut_file=mut_file, | |||||
docker=docker, | docker=docker, | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call merge_chromo.merge_chromo as merge_chromo { | call merge_chromo.merge_chromo as merge_chromo { | ||||
input: | input: | ||||
mendelian_vote_summary=summary_mendelian.mendelian_vote_summary, | |||||
mendelian_vote_snv=merge_mendelian_vote.mendelian_vote_snv, | |||||
mendelian_vote_indel=merge_mendelian_vote.mendelian_vote_indel, | |||||
sample_name=sample_name, | sample_name=sample_name, | ||||
docker=docker, | docker=docker, | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
} | |||||
} |