@@ -0,0 +1,89 @@ | |||
# Byte-compiled / optimized / DLL files | |||
__pycache__/ | |||
*.py[cod] | |||
*$py.class | |||
# C extensions | |||
*.so | |||
# Distribution / packaging | |||
.Python | |||
env/ | |||
build/ | |||
develop-eggs/ | |||
dist/ | |||
downloads/ | |||
eggs/ | |||
.eggs/ | |||
lib/ | |||
lib64/ | |||
parts/ | |||
sdist/ | |||
var/ | |||
*.egg-info/ | |||
.installed.cfg | |||
*.egg | |||
# PyInstaller | |||
# Usually these files are written by a python script from a template | |||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | |||
*.manifest | |||
*.spec | |||
# Installer logs | |||
pip-log.txt | |||
pip-delete-this-directory.txt | |||
# Unit test / coverage reports | |||
htmlcov/ | |||
.tox/ | |||
.coverage | |||
.coverage.* | |||
.cache | |||
nosetests.xml | |||
coverage.xml | |||
*,cover | |||
.hypothesis/ | |||
# Translations | |||
*.mo | |||
*.pot | |||
# Django stuff: | |||
*.log | |||
local_settings.py | |||
# Flask stuff: | |||
instance/ | |||
.webassets-cache | |||
# Scrapy stuff: | |||
.scrapy | |||
# Sphinx documentation | |||
docs/_build/ | |||
# PyBuilder | |||
target/ | |||
# IPython Notebook | |||
.ipynb_checkpoints | |||
# pyenv | |||
.python-version | |||
# celery beat schedule file | |||
celerybeat-schedule | |||
# dotenv | |||
.env | |||
# virtualenv | |||
venv/ | |||
ENV/ | |||
# Spyder project settings | |||
.spyderproject | |||
# Rope project settings | |||
.ropeproject |
@@ -0,0 +1,21 @@ | |||
The MIT License (MIT) | |||
Copyright (c) 2016 Shifu Chen | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. |
@@ -0,0 +1,14 @@ | |||
# q30 | |||
A simple script to calculate q20/q30 percentages of a fastq file (can be gzipped) | |||
```shell | |||
python q30.py <fastq_file_name> | |||
``` | |||
# output | |||
``` | |||
('total bases:', 1386659) | |||
('q20 bases:', 1280980) | |||
('q30 bases:', 1232024) | |||
('q20 percents:', 92.37887613320939) | |||
('q30 percents:', 88.84837584438567) | |||
Time used: 0.221389055252 | |||
``` |
@@ -0,0 +1,102 @@ | |||
#!/usr/bin/env python | |||
import gzip | |||
import os,sys | |||
def isFastq(f): | |||
fqext = (".fq", ".fastq", "fq.gz", ".fastq.gz") | |||
for ext in fqext: | |||
if f.endswith(ext): | |||
return True | |||
return False | |||
################################ | |||
#fastq.reader | |||
class Reader: | |||
def __init__(self, fname): | |||
self.__file = None | |||
self.__gz = False | |||
self.__eof = False | |||
self.filename = fname | |||
if self.filename.endswith(".gz"): | |||
self.__gz = True | |||
self.__file = gzip.open(self.filename, "r") | |||
else: | |||
self.__gz = False | |||
self.__file = open(self.filename, "r") | |||
if self.__file == None: | |||
print("Failed to open file " + self.filename) | |||
sys.exit(1) | |||
def __del__(self): | |||
if self.__file != None: | |||
self.__file.close() | |||
def nextRead(self): | |||
if self.__eof == True or self.__file == None: | |||
return None | |||
lines = [] | |||
#read 4 (lines, name, sequence, strand, quality) | |||
for i in range(0,4): | |||
line = self.__file.readline().rstrip() | |||
if len(line) == 0: | |||
self.__eof = True | |||
return None | |||
lines.append(line) | |||
return lines | |||
def isEOF(self): | |||
return False | |||
################################ | |||
#fastq.writer | |||
class Writer: | |||
filename = "" | |||
__file = None | |||
__gz = False | |||
def __init__(self, fname): | |||
self.filename = fname | |||
if self.filename.endswith(".gz"): | |||
self.__gz = True | |||
self.__file = gzip.open(self.filename, "w") | |||
else: | |||
self.__gz = False | |||
self.__file = open(self.filename, "w") | |||
if self.__file == None: | |||
print("Failed to open file " + self.filename + " to write") | |||
sys.exit(1) | |||
def __del__(self): | |||
if self.__file != None: | |||
self.__file.flush() | |||
self.__file.close() | |||
def flush(self): | |||
if self.__file !=None: | |||
self.__file.flush() | |||
def writeLines(self, lines): | |||
if self.__file == None: | |||
return False | |||
for line in lines: | |||
self.__file.write(line+"\n") | |||
return True | |||
def writeRead(self, name, seqence, strand, quality): | |||
if self.__file == None: | |||
return False | |||
self.__file.write(name+"\n") | |||
self.__file.write(seqence+"\n") | |||
self.__file.write(strand+"\n") | |||
self.__file.write(quality+"\n") | |||
return True |
@@ -0,0 +1,49 @@ | |||
#!/usr/bin/env python | |||
import os,sys | |||
import fastq | |||
import time | |||
def qual_stat(qstr): | |||
q20 = 0 | |||
q30 = 0 | |||
for q in qstr: | |||
qual = int(q) - 33 | |||
if qual >= 30: | |||
q30 += 1 | |||
q20 += 1 | |||
elif qual >= 20: | |||
q20 += 1 | |||
return q20, q30 | |||
def stat(filename): | |||
reader = fastq.Reader(filename) | |||
total_count = 0 | |||
q20_count = 0 | |||
q30_count = 0 | |||
while True: | |||
read = reader.nextRead() | |||
if read == None: | |||
break | |||
total_count += len(read[3]) | |||
q20, q30 = qual_stat(read[3]) | |||
q20_count += q20 | |||
q30_count += q30 | |||
print("total bases:", total_count) | |||
print("q20 bases:", q20_count) | |||
print("q30 bases:", q30_count) | |||
print("q20 percents:", 100 * float(q20_count)/float(total_count)) | |||
print("q30 percents:", 100 * float(q30_count)/float(total_count)) | |||
def main(): | |||
if len(sys.argv) < 2: | |||
print("usage: python q30.py <fastq_file>") | |||
sys.exit(1) | |||
stat(sys.argv[1]) | |||
if __name__ == "__main__": | |||
time1 = time.time() | |||
main() | |||
time2 = time.time() | |||
print('Time used: ' + str(time2-time1)) |
@@ -1,7 +1,11 @@ | |||
{ | |||
"{{ project_name }}.disk_size": "100", | |||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||
"{{ project_name }}.fastq": "{{ fastq }}", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.sample": "{{ sample }}" | |||
} | |||
"{{ project_name }}.SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics", | |||
"{{ project_name }}.Dedup_bam": "{{ Dedup_bam }}", | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.disk_size": "500", | |||
"{{ project_name }}.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2018.08.01", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.Dedup_bam_index": "{{ Dedup_bam_index }}", | |||
"{{ project_name }}.sample": "{{ sample }}", | |||
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||
} |
@@ -0,0 +1,36 @@ | |||
task deduped_Metrics { | |||
File ref_dir | |||
String SENTIEON_INSTALL_DIR | |||
String sample | |||
String fasta | |||
File Dedup_bam | |||
File Dedup_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=192.168.0.55:8990 | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo QualityYield ${sample}_deduped_QualityYield | |||
ls > file | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File deduped_QualityYield = "${sample}_deduped_QualityYield" | |||
File files = "file" | |||
} | |||
} |
@@ -1,22 +0,0 @@ | |||
task q30 { | |||
File fastq | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/q30.py ${fastq} > ${sample}.q30.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File result = "${sample}.q30.txt" | |||
} | |||
} |
@@ -1,16 +1,24 @@ | |||
import "./tasks/q30.wdl" as q30 | |||
import "./tasks/deduped_Metrics.wdl" as deduped_Metrics | |||
workflow {{ project_name }} { | |||
File fastq | |||
File ref_dir | |||
String SENTIEON_INSTALL_DIR | |||
String sample | |||
String fasta | |||
File Dedup_bam | |||
File Dedup_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
call q30.q30 as q30 { | |||
call deduped_Metrics.deduped_Metrics as deduped_Metrics { | |||
input: | |||
fastq=fastq, | |||
ref_dir=ref_dir, | |||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||
sample=sample, | |||
fasta=fasta, | |||
Dedup_bam=Dedup_bam, | |||
Dedup_bam_index=Dedup_bam_index, | |||
docker=docker, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size |