# Byte-compiled / optimized / DLL files | |||||
__pycache__/ | |||||
*.py[cod] | |||||
*$py.class | |||||
# C extensions | |||||
*.so | |||||
# Distribution / packaging | |||||
.Python | |||||
env/ | |||||
build/ | |||||
develop-eggs/ | |||||
dist/ | |||||
downloads/ | |||||
eggs/ | |||||
.eggs/ | |||||
lib/ | |||||
lib64/ | |||||
parts/ | |||||
sdist/ | |||||
var/ | |||||
*.egg-info/ | |||||
.installed.cfg | |||||
*.egg | |||||
# PyInstaller | |||||
# Usually these files are written by a python script from a template | |||||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | |||||
*.manifest | |||||
*.spec | |||||
# Installer logs | |||||
pip-log.txt | |||||
pip-delete-this-directory.txt | |||||
# Unit test / coverage reports | |||||
htmlcov/ | |||||
.tox/ | |||||
.coverage | |||||
.coverage.* | |||||
.cache | |||||
nosetests.xml | |||||
coverage.xml | |||||
*,cover | |||||
.hypothesis/ | |||||
# Translations | |||||
*.mo | |||||
*.pot | |||||
# Django stuff: | |||||
*.log | |||||
local_settings.py | |||||
# Flask stuff: | |||||
instance/ | |||||
.webassets-cache | |||||
# Scrapy stuff: | |||||
.scrapy | |||||
# Sphinx documentation | |||||
docs/_build/ | |||||
# PyBuilder | |||||
target/ | |||||
# IPython Notebook | |||||
.ipynb_checkpoints | |||||
# pyenv | |||||
.python-version | |||||
# celery beat schedule file | |||||
celerybeat-schedule | |||||
# dotenv | |||||
.env | |||||
# virtualenv | |||||
venv/ | |||||
ENV/ | |||||
# Spyder project settings | |||||
.spyderproject | |||||
# Rope project settings | |||||
.ropeproject |
The MIT License (MIT) | |||||
Copyright (c) 2016 Shifu Chen | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
of this software and associated documentation files (the "Software"), to deal | |||||
in the Software without restriction, including without limitation the rights | |||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
copies of the Software, and to permit persons to whom the Software is | |||||
furnished to do so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. |
# q30 | |||||
A simple script to calculate q20/q30 percentages of a fastq file (can be gzipped) | |||||
```shell | |||||
python q30.py <fastq_file_name> | |||||
``` | |||||
# output | |||||
``` | |||||
('total bases:', 1386659) | |||||
('q20 bases:', 1280980) | |||||
('q30 bases:', 1232024) | |||||
('q20 percents:', 92.37887613320939) | |||||
('q30 percents:', 88.84837584438567) | |||||
Time used: 0.221389055252 | |||||
``` |
#!/usr/bin/env python | |||||
import gzip | |||||
import os,sys | |||||
def isFastq(f): | |||||
fqext = (".fq", ".fastq", "fq.gz", ".fastq.gz") | |||||
for ext in fqext: | |||||
if f.endswith(ext): | |||||
return True | |||||
return False | |||||
################################ | |||||
#fastq.reader | |||||
class Reader: | |||||
def __init__(self, fname): | |||||
self.__file = None | |||||
self.__gz = False | |||||
self.__eof = False | |||||
self.filename = fname | |||||
if self.filename.endswith(".gz"): | |||||
self.__gz = True | |||||
self.__file = gzip.open(self.filename, "r") | |||||
else: | |||||
self.__gz = False | |||||
self.__file = open(self.filename, "r") | |||||
if self.__file == None: | |||||
print("Failed to open file " + self.filename) | |||||
sys.exit(1) | |||||
def __del__(self): | |||||
if self.__file != None: | |||||
self.__file.close() | |||||
def nextRead(self): | |||||
if self.__eof == True or self.__file == None: | |||||
return None | |||||
lines = [] | |||||
#read 4 (lines, name, sequence, strand, quality) | |||||
for i in range(0,4): | |||||
line = self.__file.readline().rstrip() | |||||
if len(line) == 0: | |||||
self.__eof = True | |||||
return None | |||||
lines.append(line) | |||||
return lines | |||||
def isEOF(self): | |||||
return False | |||||
################################ | |||||
#fastq.writer | |||||
class Writer: | |||||
filename = "" | |||||
__file = None | |||||
__gz = False | |||||
def __init__(self, fname): | |||||
self.filename = fname | |||||
if self.filename.endswith(".gz"): | |||||
self.__gz = True | |||||
self.__file = gzip.open(self.filename, "w") | |||||
else: | |||||
self.__gz = False | |||||
self.__file = open(self.filename, "w") | |||||
if self.__file == None: | |||||
print("Failed to open file " + self.filename + " to write") | |||||
sys.exit(1) | |||||
def __del__(self): | |||||
if self.__file != None: | |||||
self.__file.flush() | |||||
self.__file.close() | |||||
def flush(self): | |||||
if self.__file !=None: | |||||
self.__file.flush() | |||||
def writeLines(self, lines): | |||||
if self.__file == None: | |||||
return False | |||||
for line in lines: | |||||
self.__file.write(line+"\n") | |||||
return True | |||||
def writeRead(self, name, seqence, strand, quality): | |||||
if self.__file == None: | |||||
return False | |||||
self.__file.write(name+"\n") | |||||
self.__file.write(seqence+"\n") | |||||
self.__file.write(strand+"\n") | |||||
self.__file.write(quality+"\n") | |||||
return True |
#!/usr/bin/env python | |||||
import os,sys | |||||
import fastq | |||||
import time | |||||
def qual_stat(qstr): | |||||
q20 = 0 | |||||
q30 = 0 | |||||
for q in qstr: | |||||
qual = int(q) - 33 | |||||
if qual >= 30: | |||||
q30 += 1 | |||||
q20 += 1 | |||||
elif qual >= 20: | |||||
q20 += 1 | |||||
return q20, q30 | |||||
def stat(filename): | |||||
reader = fastq.Reader(filename) | |||||
total_count = 0 | |||||
q20_count = 0 | |||||
q30_count = 0 | |||||
while True: | |||||
read = reader.nextRead() | |||||
if read == None: | |||||
break | |||||
total_count += len(read[3]) | |||||
q20, q30 = qual_stat(read[3]) | |||||
q20_count += q20 | |||||
q30_count += q30 | |||||
print("total bases:", total_count) | |||||
print("q20 bases:", q20_count) | |||||
print("q30 bases:", q30_count) | |||||
print("q20 percents:", 100 * float(q20_count)/float(total_count)) | |||||
print("q30 percents:", 100 * float(q30_count)/float(total_count)) | |||||
def main(): | |||||
if len(sys.argv) < 2: | |||||
print("usage: python q30.py <fastq_file>") | |||||
sys.exit(1) | |||||
stat(sys.argv[1]) | |||||
if __name__ == "__main__": | |||||
time1 = time.time() | |||||
main() | |||||
time2 = time.time() | |||||
print('Time used: ' + str(time2-time1)) |
{ | { | ||||
"{{ project_name }}.disk_size": "100", | |||||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||||
"{{ project_name }}.fastq": "{{ fastq }}", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.sample": "{{ sample }}" | |||||
} | |||||
"{{ project_name }}.SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics", | |||||
"{{ project_name }}.Dedup_bam": "{{ Dedup_bam }}", | |||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||||
"{{ project_name }}.disk_size": "500", | |||||
"{{ project_name }}.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2018.08.01", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.Dedup_bam_index": "{{ Dedup_bam_index }}", | |||||
"{{ project_name }}.sample": "{{ sample }}", | |||||
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||||
} |
task deduped_Metrics { | |||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String sample | |||||
String fasta | |||||
File Dedup_bam | |||||
File Dedup_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=192.168.0.55:8990 | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo QualityYield ${sample}_deduped_QualityYield | |||||
ls > file | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File deduped_QualityYield = "${sample}_deduped_QualityYield" | |||||
File files = "file" | |||||
} | |||||
} |
task q30 { | |||||
File fastq | |||||
String sample | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
python /opt/q30.py ${fastq} > ${sample}.q30.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File result = "${sample}.q30.txt" | |||||
} | |||||
} |
import "./tasks/q30.wdl" as q30 | |||||
import "./tasks/deduped_Metrics.wdl" as deduped_Metrics | |||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
File fastq | |||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String sample | String sample | ||||
String fasta | |||||
File Dedup_bam | |||||
File Dedup_bam_index | |||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
String disk_size | String disk_size | ||||
call q30.q30 as q30 { | |||||
call deduped_Metrics.deduped_Metrics as deduped_Metrics { | |||||
input: | input: | ||||
fastq=fastq, | |||||
ref_dir=ref_dir, | |||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||||
sample=sample, | sample=sample, | ||||
fasta=fasta, | |||||
Dedup_bam=Dedup_bam, | |||||
Dedup_bam_index=Dedup_bam_index, | |||||
docker=docker, | docker=docker, | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size |