lutingxi 3 роки тому
джерело
коміт
fb932d90cd
8 змінених файлів з 100542 додано та 0 видалено
  1. +6
    -0
      defaults
  2. +9
    -0
      inputs
  3. +68
    -0
      schema.json
  4. +42
    -0
      tasks/fusioncatcher.wdl
  5. +23
    -0
      test_input_output/final-list_candidate-fusion-genes.txt
  6. +50184
    -0
      test_input_output/input/reads_1.fq
  7. +50184
    -0
      test_input_output/input/reads_2.fq
  8. +26
    -0
      workflow.wdl

+ 6
- 0
defaults Переглянути файл

@@ -0,0 +1,6 @@
{
"fusioncatcher_database_dir":"/data/minio/reference-data/reference/fusioncatcher/human_v102",
"fusioncatcher_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fusioncatcher:v1.33_2",
"fusioncatcher_cluster":"OnDemand bcs.ps.r.xlarge img-ubuntu-vpc",
"disk_size":"200"
}

+ 9
- 0
inputs Переглянути файл

@@ -0,0 +1,9 @@
{
"{{ project_name }}.fastq1": "{{ fastq1 }}",
"{{ project_name }}.fastq2": "{{ fastq2 }}",
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.fusioncatcher_docker": "{{ fusioncatcher_docker }}",
"{{ project_name }}.fusioncatcher_database_dir": "{{ fusioncatcher_database_dir }}",
"{{ project_name }}.fusioncatcher_cluster": "{{ fusioncatcher_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size }}"
}

+ 68
- 0
schema.json Переглянути файл

@@ -0,0 +1,68 @@
{
"formMode":"batch",
"fields":[
{
"tmplType":"sample_id",
"type":"number",
"label":"Sample ID",
"name":"sample_id",
"question":"How many samples? For auto generating sample id.",
"placeholder":"How many samples? For auto generating sample id.",
"model":"sample_id",
"disabled":false,
"min":1,
"max":10,
"config":{
"rules":[{"type":"number","required":true,"message":"Please input a number."}]
}
},
{
"tmplType":"file",
"label":"Read1 Files",
"model":"read1",
"readOnly":false,
"multiple":true,
"filterType":"fastq|fastq.gz|fq|fq.gz",
"placeholder":"Select files for read1.",
"config":{
"rules":[
{
"type":"array",
"required":true,
"message":"Please select files for read1."
}
]
}
},
{
"tmplType":"file",
"label":"Read2 Files",
"model":"read2",
"readOnly":false,
"multiple":true,
"filterType":"fastq|fastq.gz|fq|fq.gz",
"placeholder":"Select files for read2.",
"config":{
"rules":[
{
"type":"array",
"required":true,
"message":"Please select files for read2."
}
]
}
},
{
"tmplType":"actions",
"buttons":[
{"type":"cancel","buttonType":"default","buttonLabel":"Previous Step"},
{
"type":"submit",
"buttonType":"primary",
"buttonLabel":"Next Step",
"validate":true
}
]
}
]
}

+ 42
- 0
tasks/fusioncatcher.wdl Переглянути файл

@@ -0,0 +1,42 @@
task fusioncatcher{
String sample_id
File fastq1
File fastq2
File database_dir
String docker
String cluster
String disk_size

command <<<
set -o pipefail
set -e

mkdir ./output/

/software/fusioncatcher/bin/fusioncatcher \
-d ${database_dir} \
-p 4 \
-i ${fastq1},${fastq2} \
--output ./output

>>>
runtime {
docker : docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
Array[File] fusioncatcher_result=glob("./output/*.txt")
Array[File] fusioncatcher_zip=glob("./output/*.zip")
Array[File] fusioncatcher_log=glob("./output/*.log")
Array[File] fusioncatcher_vcf=glob("./output/*.vcf")
}

}



+ 23
- 0
test_input_output/final-list_candidate-fusion-genes.txt Переглянути файл

@@ -0,0 +1,23 @@
Gene_1_symbol(5end_fusion_partner) Gene_2_symbol(3end_fusion_partner) Fusion_description Counts_of_common_mapping_reads Spanning_pairs Spanning_unique_reads Longest_anchor_found Fusion_finding_method Fusion_point_for_gene_1(5end_fusion_partner) Fusion_point_for_gene_2(3end_fusion_partner) Gene_1_id(5end_fusion_partner) Gene_2_id(3end_fusion_partner) Exon_1_id(5end_fusion_partner) Exon_2_id(3end_fusion_partner) Fusion_sequence Predicted_effect
FGFR3 TACC3 known,adjacent,oncogene,cosmic,ticdb,tcga,ccle2,18cancers,gliomas,chimer4kb,chimer4pub,chimer4seq,cancer,tcga-cancer,tcga2,oncokb,mitelman,pcawg,ccle,tcga3,t37,tcga-oesophagus,10K<gap<100K 0 857 77 42 BOWTIE+BLAT;BOWTIE+STAR 4:1806934:+ 4:1727977:+ ENSG00000068078 ENSG00000013810 AGCAGCTGGTGGAGGACCTGGACCGTGTCCTTACCGTGACGTCCACCGAC*ACAGAAGAGTGACACCCGCCTCTGAGACCCTAGAAGACCCTTGCAGGACA in-frame
FGFR3 TACC3 known,adjacent,oncogene,cosmic,ticdb,tcga,ccle2,18cancers,gliomas,chimer4kb,chimer4pub,chimer4seq,cancer,tcga-cancer,tcga2,oncokb,mitelman,pcawg,ccle,tcga3,t37,tcga-oesophagus,10K<gap<100K 0 857 10 42 BOWTIE+BLAT;BOWTIE+STAR 4:1807033:+ 4:1727832:+ ENSG00000068078 ENSG00000013810 CCGTCCCCGGCCATCCTGCCCCCCAGAGTGCTGAGGTGTGGGGCGGGCCT*TCTGGCCCAGGTGCCCTGGCTGACCTGGACTGCTCAAGCTCTTCCCAGAG intronic/CDS(truncated)
FIP1L1 PDGFRA known,oncogene,chimer2,cgp,ticdb,cacg,tcga,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,tcga2,oncokb,mitelman,t71 0 118 49 47 BOWTIE+BLAT;BOWTIE+STAR 4:53425965:+ 4:54274925:+ ENSG00000145216 ENSG00000134853 TCAGCCGAGTAGAAGGCAGGCGACGGGCAAATGAGAACAGCAACATACAG*CTGCCTTATGACTCAAGATGGGAGTTTCCAAGAGATGGACTAGTGCTTGG in-frame
GOPC ROS1 known,oncogene,cosmic,chimer2,cgp,ticdb,cacg,chimer4kb,chimer4pub,cancer,tumor,oncokb,mitelman,ccle,100K<gap<200K,exon-exon 0 73 20 29 BOWTIE;BOWTIE+BLAT;BOWTIE+STAR 6:117566854:- 6:117321394:- ENSG00000047932 ENSG00000047936 ENSE00003526662 ENSE00000442460 GCTAGTTGCAAAGACACAAGTGGGGAAATCAAAGTATTACAAG*TCTGGCATAGAAGATTAAAGAATCAAAAAAGTGCCAAGGAAGG in-frame
GOPC ROS1 known,oncogene,cosmic,chimer2,cgp,ticdb,cacg,chimer4kb,chimer4pub,cancer,tumor,oncokb,mitelman,ccle,100K<gap<200K,exon-exon 0 73 2 26 BOWTIE 6:117566854:- 6:117320030:- ENSG00000047932 ENSG00000047936 ENSE00003526662 ENSE00003478828 GCTAGTTGCAAAGACACAAGTGGGGAAATCAAAGTATTACAAG*TACTCTTCCAACCCAAGAGGAGATTGAAAATCTTCCTGCCTTC out-of-frame
IGH@ CRLF2 known,oncogene,cancer,oncokb,mitelman 0 32 10 95 BOWTIE+SPOTLIGHT 14:105863258:+ X:105799296:- ENSG09000000014 ENSG00000205755 CGACGGGGAATTCTCACAGGAGACGAGGGGGAAAAGGGTTGGGGCGGATGCACTCCCTGAGGAGACGGTGACCGTGGTCCCTTTGCCCCAGACGTCCATGTAGTAGTAGTAGTATCA*GGGGCCCGGGGAATAGAAGCTGTGAGCTGGGGAGAGGATATATAAGAACTTTCTGTACTTTCTGCAAATTTTGCTCTCAA ---/intergenic
IGH@ CRLF2 known,oncogene,cancer,oncokb,mitelman 0 32 5 32 BOWTIE+STAR 14:105863262:+ X:1228687:- ENSG09000000014 ENSG00000205755 CGTGGTCCCTTTGCCCCAGACGTCCATGTAGTAGTAGTAGTAGTAATCA*NNNNNNNNNNNNNNNNNNNNNGAAGCTGTGAGCTGGGGAGAGGATATATAAGAACTTTCTGTACTTTCTG ---/intergenic
IGH@ CRLF2 known,oncogene,cancer,oncokb,mitelman 0 32 3 96 BOWTIE+SPOTLIGHT 14:105863343:+ X:105812546:- ENSG09000000014 ENSG00000205755 TGCTGATGTCAGAGTTGTTCTTGTATTTCCAGGAGAAAGTGATGGAGTCGGGAAGGAAGTCCTGTGCGAGGCAGCCAACGGCCACGCTGCTCGTATCCGACGGGGAATTCTCACAGGAGACGAGGGGGAAAAGGGTTGGGGCGGATGCACTCCCTGAGGAGACGGTGACCGTGGTCCCTTTGCCCCAGACGTCCATGTAGTAGTAGTAGTATCAGGGGCCCGGGGAATAGAAG*CTGTGAGGGTTAATCCAATCTTCCTGCTGTCAAGCAAAAGAACACCTAAAATTCTTCCGTCACGTACTG ---/intergenic
HOOK3 RET known,oncogene,cosmic,cgp,ticdb,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,mitelman,t3,exon-exon 0 13 10 29 BOWTIE;BOWTIE+BLAT;BOWTIE+STAR 8:42968214:+ 10:43116584:+ ENSG00000168172 ENSG00000165731 ENSE00003485179 ENSE00001095944 GGCCAACGCAGCGCGAAGTCAACTTGAAACCTACAAGAGACAG*GAGGATCCAAAGTGGGAATTCCCTCGGAAGAACTTGGTTCTTG in-frame
AKAP9 BRAF known,oncogene,cosmic,chimer2,cgp,ticdb,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,oncokb,mitelman,t2,exon-exon 0 12 13 30 BOWTIE;BOWTIE+BLAT;BOWTIE+STAR 7:92003235:+ 7:140787584:- ENSG00000127914 ENSG00000157764 ENSE00003484877 ENSE00003680515 TGATAAACTTCAGAAAGAACTCAATGTACTTAAATCAGAACAG*GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAA in-frame
EWSR1 ATF1 known,oncogene,cosmic,cgp,ticdb,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,m5,oncokb,mitelman,t1,exon-exon 0 11 10 30 BOWTIE;BOWTIE+BLAT;BOWTIE+STAR 22:29287134:+ 12:50814280:+ ENSG00000182944 ENSG00000123268 ENSE00003570564 ENSE00003675908 CCAAGTCAATATAGCCAACAGAGCAGCAGCTACGGGCAGCAGA*CTGCATCAGGAGATATGCAAACATATCAGATCCGAACTACACC in-frame
TMPRSS2 ETV1 known,oncogene,cosmic,chimer2,cgp,ticdb,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,tcga-cancer,oncokb,mitelman,t3 0 10 11 37 BOWTIE+STAR 21:41494381:- 7:13935844:- ENSG00000184012 ENSG00000006468 GGGTCCTGACGCAGGCTTCCAACCCCGTCGTCTGCACGCAGCCCAAATCC*CCATCCAGCACGCCAGTGTCCCCACTGCATCATGCATCTCCAAACTCAAC in-frame
EWSR1 FLI1 known,oncogene,cosmic,cgp,ticdb,ccle2,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,m6,oncokb,mitelman,ccle,exon-exon 0 9 12 29 BOWTIE;BOWTIE+BLAT;BOWTIE+STAR 22:29287134:+ 11:128807180:+ ENSG00000182944 ENSG00000151702 ENSE00003570564 ENSE00003702967 CCAAGTCAATATAGCCAACAGAGCAGCAGCTACGGGCAGCAGA*GTCCTCCCCTTGGAGGGGCACAAACGATCAGTAAGAATACAGA in-frame
ETV6 NTRK3 known,oncogene,cosmic,chimer2,cgp,ticdb,tcga,ccle2,18cancers,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,tcga-cancer,tcga2,oncokb,mitelman,pcawg,tcga3,t1,exon-exon 0 7 9 29 BOWTIE 12:11869969:+ 15:87940753:- ENSG00000139083 ENSG00000140538 ENSE00001788162 ENSE00001134154 TCCCCGCCTGAAGAGCACGCCATGCCCATTGGGAGAATAGCAG*ATGTGCAGCACATTAAGAGGAGAGACATCGTGCTGAAGCGAGA in-frame
BRD4 NUTM1 known,oncogene,cosmic,chimer2,cgp,ticdb,ccle2,chimer4kb,chimer4pub,chimer4seq,cancer,oncokb,mitelman,ccle,t1,exon-exon 0 6 9 29 BOWTIE;BOWTIE+BLAT;BOWTIE+STAR 19:15254152:- 15:34347969:+ ENSG00000141867 ENSG00000184507 ENSE00001187704 ENSE00001367212 TCCAGTGAGTCCAGCTCCTCTGACAGCGAAGACTCCGAAACAG*CATCTGCATTGCCGGGACCGGATATGAGCATGAAACCTAGTGC in-frame
CD74 ROS1 known,oncogene,cosmic,chimer2,cgp,ticdb,tcga,ccle2,18cancers,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,tcga-cancer,tcga2,oncokb,mitelman,pcawg,t3,exon-exon 0 6 7 30 BOWTIE;BOWTIE+BLAT;BOWTIE+STAR 5:150404680:- 6:117324415:- ENSG00000019582 ENSG00000047936 ENSE00000841201 ENSE00000762702 AGGCACTCCTTGGAGCAAAAGCCCACTGACGCTCCACCGAAAG*ATGATTTTTGGATACCAGAAACAAGTTTCATACTTACTATTAT in-frame
CIC DUX4 known,cosmic,chimer2,cgp,ticdb,chimer4kb,chimer4pub,cancer,oncokb,mitelman,ccle 0 5 5 34 BOWTIE+STAR 19:42295047:+ 4:190174446:+ ENSG00000079432 ENSG00000260596 CTGCCCCTGCCCCCACTCCCAGCCCCGCAGGGGGCCCTGACCCCACCTCA*CCGGCAGAGGGGATCTCCCAACCTGCCCCGGCGCGCGGGGATTTCGCCTA in-frame
DUX4 IGH@ known,cancer,mitelman 0 5 5 84 BOWTIE+SPOTLIGHT 4:190174997:+ 14:190287409:+ ENSG00000260596 ENSG09000000014 CGCAACCTCTCCTAGAAACGGAGGCCCCGGGGGAGCTGGAGGCCTCGGAAGAGGCCGCCTCGCTG*CGATCCCTCAGGGTTGGCTGTAGTCACCGTATGGGGGAAGCCTGTGTCATCATTTATTTACCCAACCTGGGTTAATGTCTTTGCTATTATGAATAGTGCTGG CDS(truncated)/---
EML4 ALK known,oncogene,cosmic,chimer2,cgp,ticdb,tcga,ccle2,18cancers,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,tcga-cancer,tcga2,oncokb,mitelman,pcawg,ccle,ccle3,tcga3,t1 0 5 4 33 BOWTIE+STAR 2:42301391:+ 2:29223587:- ENSG00000143924 ENSG00000171094 AGAAAAATAATTCTGTGGGATCATGATCTGAATCCTGAAAGAGAAATAGA*GTGTAGTGCTTCAAGGGCCAGGCTGCCAGGCCATGTTGCAGCTGACCACC CDS(truncated)/UTR
DUX4 IGH@ known,cancer,mitelman 0 4 8 37 BOWTIE+BLAT;BOWTIE+STAR 4:190175284:+ 14:106638525:- ENSG00000260596 ENSG09000001017 TTCTTTCCTGGGCATCCCGGGGATCCCAGAGCCGGCCCAGGTACCAGCAG*GCAGAGAAGAGGCCATGTGGAGACATAGTGCACTAGAAGGTGGCCCAGTG UTR/---
MALT1 IGH@ known,oncogene,chimer2,cancer,oncokb,mitelman 0 3 3 34 BOWTIE+STAR 18:58669909:+ 14:105888559:+ ENSG00000172175 ENSG09000000014 AGCTATATTGGCTAAGAGTAAATAACACTGGCACCCATTTAACTCGTATG*AGTCACCACCACAATATGCTCACAGTGACACGAACCCCCACAAAATCCTC intergenic/---
NPM1 ALK known,oncogene,cosmic,chimer2,cgp,ticdb,ccle2,chimer4kb,chimer4pub,chimer4seq,cancer,tumor,oncokb,mitelman,ccle,ccle3,exon-exon 0 3 3 30 BOWTIE 5:171391799:+ 2:29223528:- ENSG00000181163 ENSG00000171094 ENSE00003625205 ENSE00001154407 TGTGGTTCAGGGCCAGTGCATATTAGTGGACAGCACTTAGTAG*TGTACCGCCGGAAGCACCAGGAGCTGCAAGCCATGCAGATGGA in-frame

+ 50184
- 0
test_input_output/input/reads_1.fq
Різницю між файлами не показано, бо вона завелика
Переглянути файл


+ 50184
- 0
test_input_output/input/reads_2.fq
Різницю між файлами не показано, бо вона завелика
Переглянути файл


+ 26
- 0
workflow.wdl Переглянути файл

@@ -0,0 +1,26 @@
import "./tasks/fusioncatcher.wdl" as fusioncatcher

workflow {{ project_name }} {
String sample_id
File fastq1
File fastq2
File fusioncatcher_database_dir

String fusioncatcher_docker
String fusioncatcher_cluster
String disk_size

call fusioncatcher.fusioncatcher as fusioncatcher {
input:
sample_id=sample_id,
fastq1=fastq1,
fastq2=fastq2,
database_dir=fusioncatcher_database_dir,
docker=fusioncatcher_docker,
cluster=fusioncatcher_cluster,
disk_size=disk_size
}


}


Завантаження…
Відмінити
Зберегти