浏览代码

split fastq and vcf

tags/v0.1.1
LUYAO REN 3 年前
父节点
当前提交
48c3f31d75
共有 2 个文件被更改,包括 34 次插入10 次删除
  1. +32
    -8
      tasks/benchmark.wdl
  2. +2
    -2
      tasks/mendelian.wdl

+ 32
- 8
tasks/benchmark.wdl 查看文件

@@ -19,17 +19,41 @@ task benchmark {

export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${filtered_vcf} -c > ${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz

echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5" > LCL5_name
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL6" > LCL6_name
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL7" > LCL7_name
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL8" > LCL8_name


if [[ ${sample} =~ "LCL5" ]];then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${filtered_vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
cat ${filtered_vcf} | grep '##' > header
cat ${filtered_vcf} | grep -v '#' > body
cat header LCL5_name body > LCL5.vcf
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL5.vcf -c > ${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
elif [[ ${sample} =~ "LCL6" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf ${filtered_vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
cat ${filtered_vcf} | grep '##' > header
cat ${filtered_vcf} | grep -v '#' > body
cat header LCL6_name body > LCL6.vcf
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL6.vcf -c > ${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
elif [[ ${sample} =~ "LCL7" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf ${filtered_vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
cat ${filtered_vcf} | grep '##' > header
cat ${filtered_vcf} | grep -v '#' > body
cat header LCL7_name body > LCL7.vcf
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL7.vcf -c > ${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
elif [[ ${sample} =~ "LCL8" ]]; then
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf ${filtered_vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
cat ${filtered_vcf} | grep '##' > header
cat ${filtered_vcf} | grep -v '#' > body
cat header LCL8_name body > LCL8.vcf
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL8.vcf -c > ${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
else
echo "only for quartet samples"
fi
@@ -43,8 +67,8 @@ task benchmark {
}

output {
File rtg_vcf = "${sample}.rtg.vcf.gz"
File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi"
File rtg_vcf = "${sample}.reformed.vcf.gz"
File rtg_vcf_index = "${sample}.reformed.vcf.gz.tbi"
File gzip_vcf = "${sample}.vcf.gz"
File gzip_vcf_index = "${sample}.vcf.gz.tbi"
File roc_all_csv = "${sample}.roc.all.csv.gz"

+ 2
- 2
tasks/mendelian.wdl 查看文件

@@ -11,14 +11,14 @@ task mendelian {
export LD_LIBRARY_PATH=/opt/htslib-1.9
nt=$(nproc)

echo -e "${family_name}\tM8\t0\t0\t2\t-9\n${family_name}\tF7\t0\t0\t1\t-9\n${family_name}\tD5\tF7\tM8\t2\t-9" > ${family_name}.D5.ped
echo -e "${family_name}\tLCL8\t0\t0\t2\t-9\n${family_name}\tLCL7\t0\t0\t1\t-9\n${family_name}\tLCL5\tLCL7\tLCL8\t2\t-9" > ${family_name}.D5.ped

mkdir VBT_D5
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${family_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_name}.D5 --output-violation-regions -thread-count $nt

cat VBT_D5/${family_name}.D5_trio.vcf > ${family_name}.D5.vcf

echo -e "${family_name}\tM8\t0\t0\t2\t-9\n${family_name}\tF7\t0\t0\t1\t-9\n${family_name}\tD6\tF7\tM8\t2\t-9" > ${family_name}.D6.ped
echo -e "${family_name}\tLCL8\t0\t0\t2\t-9\n${family_name}\tLCL7\t0\t0\t1\t-9\n${family_name}\tLCL6\tLCL7\tLCL8\t2\t-9" > ${family_name}.D6.ped

mkdir VBT_D6
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${family_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_name}.D6 --output-violation-regions -thread-count $nt

正在加载...
取消
保存