4 years ago · 7d75ff2611
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.md
+++ b/README.md
@@ -0,0 +1,12 @@
 # RNA count exp generate from gtf Pipeline

 > Author:  Li Zhihui
 >
 > E-mail：18210700119@fudan.edu.cn
 >
 > Git: 
 >
 > Last Updates: 2020/08/23

 ## 安装指南

--- a/conf/fastq_screen.conf
+++ b/conf/fastq_screen.conf
@@ -0,0 +1,94 @@
 # This is an example configuration file for FastQ Screen

 ############################
 ## Bowtie, Bowtie 2 or BWA #
 ############################
 ## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set 
 ## this value to tell the program where to find your chosen aligner.  Uncomment 
 ## the relevant line below and set the appropriate location.  Please note, 
 ## this path should INCLUDE the executable filename.

 #BOWTIE	/usr/local/bin/bowtie/bowtie
 #BOWTIE2 /usr/local/bowtie2/bowtie2
 #BWA /usr/local/bwa/bwa



 ############################################
 ## Bismark (for bisulfite sequencing only) #
 ############################################
 ## If the Bismark binary is not in your PATH then you can set this value to 
 ## tell the program where to find it.  Uncomment the line below and set the 
 ## appropriate location. Please note, this path should INCLUDE the executable 
 ## filename.

 #BISMARK	/usr/local/bin/bismark/bismark



 ############
 ## Threads #
 ############
 ## Genome aligners can be made to run across multiple CPU cores to speed up 
 ## searches.  Set this value to the number of cores you want for mapping reads.

 THREADS		32



 ##############
 ## DATABASES #
 ##############
 ## This section enables you to configure multiple genomes databases (aligner index 
 ## files) to search against in your screen.  For each genome you need to provide a 
 ## database name (which can't contain spaces) and the location of the aligner index 
 ## files.
 ##
 ## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g:
 ## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
 ## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.) 
 ## are found in a folder named 'GRCh37'.
 ##
 ## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in 
 ## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices.  The index 
 ## used will be the one compatible with the chosen aligner (as specified using the 
 ## --aligner flag).  
 ##
 ## The entries shown below are only suggested examples, you can add as many DATABASE 
 ## sections as required, and you can comment out or remove as many of the existing 
 ## entries as desired.  We suggest including genomes and sequences that may be sources 
 ## of contamination either because they where run on your sequencer previously, or may 
 ## have contaminated your sample during the library preparation step.
 ##
 ## Human - sequences available from
 ## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/
 #DATABASE	Human	/data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
 ##
 ## Mouse - sequence available from
 ## ftp://ftp.ensembl.org/pub/current/fasta/mus_musculus/dna/
 #DATABASE	Mouse	/data/public/Genomes/Mouse/NCBIM37/Mus_musculus.NCBIM37
 ##
 ## Ecoli- sequence available from EMBL accession U00096.2
 #DATABASE	Ecoli	/data/public/Genomes/Ecoli/Ecoli
 ##
 ## PhiX - sequence available from Refseq accession NC_001422.1
 #DATABASE	PhiX	/data/public/Genomes/PhiX/phi_plus_SNPs
 ##
 ## Adapters - sequence derived from the FastQC contaminats file found at: www.bioinformatics.babraham.ac.uk/projects/fastqc
 #DATABASE	Adapters	/data/public/Genomes/Contaminants/Contaminants
 ##
 ## Vector - Sequence taken from the UniVec database
 ## http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html
 #DATABASE	Vectors		/data/public/Genomes/Vectors/Vectors

 DATABASE	Human	/cromwell_root/tmp/fastq_screen_reference/genome
 DATABASE	Mouse	/cromwell_root/tmp/fastq_screen_reference/mouse
 DATABASE	ERCC	/cromwell_root/tmp/fastq_screen_reference/ERCC
 DATABASE	EColi	/cromwell_root/tmp/fastq_screen_reference/ecoli
 DATABASE	Adapter	/cromwell_root/tmp/fastq_screen_reference/adapters
 DATABASE	Vector	/cromwell_root/tmp/fastq_screen_reference/vector
 DATABASE	rRNA	/cromwell_root/tmp/fastq_screen_reference/rRNARef
 DATABASE	Virus	/cromwell_root/tmp/fastq_screen_reference/viral
 DATABASE	Yeast	/cromwell_root/tmp/fastq_screen_reference/GCF_000146045.2_R64_genomic_modify
 DATABASE	Mitoch	/cromwell_root/tmp/fastq_screen_reference/Human_mitoch
 DATABASE	Phix	/cromwell_root/tmp/fastq_screen_reference/phix
--- a/defaults
+++ b/defaults
@@ -0,0 +1,5 @@
 {   
    "count_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/count:v1.0",
    "count_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "count_length": "150"
    }
--- a/inputs
+++ b/inputs
@@ -0,0 +1,8 @@
 {
 	"{{ project_name }}.sample_id": "{{ sample_id }}",
 	"{{ project_name }}.gtf": "{{ gtf }}",
 	"{{ project_name }}.count_docker": "{{ count_docker }}",
 	"{{ project_name }}.count_cluster": "{{ count_cluster }}",
 	"{{ project_name }}.count_length": "{{ count_length }}"
 	"{{ project_name }}.disk_size": "{{ disk_size if disk_size != '' else 200}}"
 	}
--- a/tasks/.DS_Store
+++ b/tasks/.DS_Store
--- a/tasks/count.wdl
+++ b/tasks/count.wdl
@@ -0,0 +1,26 @@
 task count {
    File gtf
    String sample_id
    String docker
    String cluster
    String disk_size
    Int count_length

    command <<<
      mkdir -p /cromwell_root/tmp/ballgown/${sample_id}
      cp -r ${gtf} /cromwell_root/tmp/ballgown/${sample_id}
      count -i /cromwell_root/tmp/ballgown -l ${count_length} -g ${sample_id}_gene_count_matrix.csv -t ${sample_id}_transcript_count_matrix.csv
    >>>
    
    runtime {
      docker: docker
      cluster: cluster
      systemDisk: "cloud_ssd 40"
      dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }
    
    output {
      File mat_expression_genecount = "${sample_id}_gene_count_matrix.csv"
      File mat_expression_transcriptcount = "${sample_id}_transcript_count_matrix.csv"
    }
 }
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,19 @@
 import "./tasks/count.wdl" as count

 workflow {{ project_name }} {
 	File gtf
 	Int count_length
 	String disk_size
 	String count_docker
 	String count_cluster

 	call count.count as count {
 		input: 
 		sample_id = sample_id,
 		docker = count_docker,
 		cluster = count_cluster,
 		gtf = gtf,
 		disk_size = disk_size,
 		count_length = count_length
 	} 
 }