@@ -0,0 +1,312 @@ | |||
Mozilla Public License Version 2.0 | |||
1. Definitions | |||
1.1. "Contributor" means each individual or legal entity that creates, contributes | |||
to the creation of, or owns Covered Software. | |||
1.2. "Contributor Version" means the combination of the Contributions of others | |||
(if any) used by a Contributor and that particular Contributor's Contribution. | |||
1.3. "Contribution" means Covered Software of a particular Contributor. | |||
1.4. "Covered Software" means Source Code Form to which the initial Contributor | |||
has attached the notice in Exhibit A, the Executable Form of such Source Code | |||
Form, and Modifications of such Source Code Form, in each case including portions | |||
thereof. | |||
1.5. "Incompatible With Secondary Licenses" means | |||
(a) that the initial Contributor has attached the notice described in Exhibit | |||
B to the Covered Software; or | |||
(b) that the Covered Software was made available under the terms of version | |||
1.1 or earlier of the License, but not also under the terms of a Secondary | |||
License. | |||
1.6. "Executable Form" means any form of the work other than Source Code Form. | |||
1.7. "Larger Work" means a work that combines Covered Software with other | |||
material, in a separate file or files, that is not Covered Software. | |||
1.8. "License" means this document. | |||
1.9. "Licensable" means having the right to grant, to the maximum extent possible, | |||
whether at the time of the initial grant or subsequently, any and all of the | |||
rights conveyed by this License. | |||
1.10. "Modifications" means any of the following: | |||
(a) any file in Source Code Form that results from an addition to, deletion | |||
from, or modification of the contents of Covered Software; or | |||
(b) any new file in Source Code Form that contains any Covered Software. | |||
1.11. "Patent Claims" of a Contributor means any patent claim(s), including | |||
without limitation, method, process, and apparatus claims, in any patent Licensable | |||
by such Contributor that would be infringed, but for the grant of the License, | |||
by the making, using, selling, offering for sale, having made, import, or | |||
transfer of either its Contributions or its Contributor Version. | |||
1.12. "Secondary License" means either the GNU General Public License, Version | |||
2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General | |||
Public License, Version 3.0, or any later versions of those licenses. | |||
1.13. "Source Code Form" means the form of the work preferred for making modifications. | |||
1.14. "You" (or "Your") means an individual or a legal entity exercising rights | |||
under this License. For legal entities, "You" includes any entity that controls, | |||
is controlled by, or is under common control with You. For purposes of this | |||
definition, "control" means (a) the power, direct or indirect, to cause the | |||
direction or management of such entity, whether by contract or otherwise, | |||
or (b) ownership of more than fifty percent (50%) of the outstanding shares | |||
or beneficial ownership of such entity. | |||
2. License Grants and Conditions | |||
2.1. Grants | |||
Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive | |||
license: | |||
(a) under intellectual property rights (other than patent or trademark) Licensable | |||
by such Contributor to use, reproduce, make available, modify, display, perform, | |||
distribute, and otherwise exploit its Contributions, either on an unmodified | |||
basis, with Modifications, or as part of a Larger Work; and | |||
(b) under Patent Claims of such Contributor to make, use, sell, offer for | |||
sale, have made, import, and otherwise transfer either its Contributions or | |||
its Contributor Version. | |||
2.2. Effective Date | |||
The licenses granted in Section 2.1 with respect to any Contribution become | |||
effective for each Contribution on the date the Contributor first distributes | |||
such Contribution. | |||
2.3. Limitations on Grant Scope | |||
The licenses granted in this Section 2 are the only rights granted under this | |||
License. No additional rights or licenses will be implied from the distribution | |||
or licensing of Covered Software under this License. Notwithstanding Section | |||
2.1(b) above, no patent license is granted by a Contributor: | |||
(a) for any code that a Contributor has removed from Covered Software; or | |||
(b) for infringements caused by: (i) Your and any other third party's modifications | |||
of Covered Software, or (ii) the combination of its Contributions with other | |||
software (except as part of its Contributor Version); or | |||
(c) under Patent Claims infringed by Covered Software in the absence of its | |||
Contributions. | |||
This License does not grant any rights in the trademarks, service marks, or | |||
logos of any Contributor (except as may be necessary to comply with the notice | |||
requirements in Section 3.4). | |||
2.4. Subsequent Licenses | |||
No Contributor makes additional grants as a result of Your choice to distribute | |||
the Covered Software under a subsequent version of this License (see Section | |||
10.2) or under the terms of a Secondary License (if permitted under the terms | |||
of Section 3.3). | |||
2.5. Representation | |||
Each Contributor represents that the Contributor believes its Contributions | |||
are its original creation(s) or it has sufficient rights to grant the rights | |||
to its Contributions conveyed by this License. | |||
2.6. Fair Use | |||
This License is not intended to limit any rights You have under applicable | |||
copyright doctrines of fair use, fair dealing, or other equivalents. | |||
2.7. Conditions | |||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in | |||
Section 2.1. | |||
3. Responsibilities | |||
3.1. Distribution of Source Form | |||
All distribution of Covered Software in Source Code Form, including any Modifications | |||
that You create or to which You contribute, must be under the terms of this | |||
License. You must inform recipients that the Source Code Form of the Covered | |||
Software is governed by the terms of this License, and how they can obtain | |||
a copy of this License. You may not attempt to alter or restrict the recipients' | |||
rights in the Source Code Form. | |||
3.2. Distribution of Executable Form | |||
If You distribute Covered Software in Executable Form then: | |||
(a) such Covered Software must also be made available in Source Code Form, | |||
as described in Section 3.1, and You must inform recipients of the Executable | |||
Form how they can obtain a copy of such Source Code Form by reasonable means | |||
in a timely manner, at a charge no more than the cost of distribution to the | |||
recipient; and | |||
(b) You may distribute such Executable Form under the terms of this License, | |||
or sublicense it under different terms, provided that the license for the | |||
Executable Form does not attempt to limit or alter the recipients' rights | |||
in the Source Code Form under this License. | |||
3.3. Distribution of a Larger Work | |||
You may create and distribute a Larger Work under terms of Your choice, provided | |||
that You also comply with the requirements of this License for the Covered | |||
Software. If the Larger Work is a combination of Covered Software with a work | |||
governed by one or more Secondary Licenses, and the Covered Software is not | |||
Incompatible With Secondary Licenses, this License permits You to additionally | |||
distribute such Covered Software under the terms of such Secondary License(s), | |||
so that the recipient of the Larger Work may, at their option, further distribute | |||
the Covered Software under the terms of either this License or such Secondary | |||
License(s). | |||
3.4. Notices | |||
You may not remove or alter the substance of any license notices (including | |||
copyright notices, patent notices, disclaimers of warranty, or limitations | |||
of liability) contained within the Source Code Form of the Covered Software, | |||
except that You may alter any license notices to the extent required to remedy | |||
known factual inaccuracies. | |||
3.5. Application of Additional Terms | |||
You may choose to offer, and to charge a fee for, warranty, support, indemnity | |||
or liability obligations to one or more recipients of Covered Software. However, | |||
You may do so only on Your own behalf, and not on behalf of any Contributor. | |||
You must make it absolutely clear that any such warranty, support, indemnity, | |||
or liability obligation is offered by You alone, and You hereby agree to indemnify | |||
every Contributor for any liability incurred by such Contributor as a result | |||
of warranty, support, indemnity or liability terms You offer. You may include | |||
additional disclaimers of warranty and limitations of liability specific to | |||
any jurisdiction. | |||
4. Inability to Comply Due to Statute or Regulation | |||
If it is impossible for You to comply with any of the terms of this License | |||
with respect to some or all of the Covered Software due to statute, judicial | |||
order, or regulation then You must: (a) comply with the terms of this License | |||
to the maximum extent possible; and (b) describe the limitations and the code | |||
they affect. Such description must be placed in a text file included with | |||
all distributions of the Covered Software under this License. Except to the | |||
extent prohibited by statute or regulation, such description must be sufficiently | |||
detailed for a recipient of ordinary skill to be able to understand it. | |||
5. Termination | |||
5.1. The rights granted under this License will terminate automatically if | |||
You fail to comply with any of its terms. However, if You become compliant, | |||
then the rights granted under this License from a particular Contributor are | |||
reinstated (a) provisionally, unless and until such Contributor explicitly | |||
and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor | |||
fails to notify You of the non-compliance by some reasonable means prior to | |||
60 days after You have come back into compliance. Moreover, Your grants from | |||
a particular Contributor are reinstated on an ongoing basis if such Contributor | |||
notifies You of the non-compliance by some reasonable means, this is the first | |||
time You have received notice of non-compliance with this License from such | |||
Contributor, and You become compliant prior to 30 days after Your receipt | |||
of the notice. | |||
5.2. If You initiate litigation against any entity by asserting a patent infringement | |||
claim (excluding declaratory judgment actions, counter-claims, and cross-claims) | |||
alleging that a Contributor Version directly or indirectly infringes any patent, | |||
then the rights granted to You by any and all Contributors for the Covered | |||
Software under Section 2.1 of this License shall terminate. | |||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end | |||
user license agreements (excluding distributors and resellers) which have | |||
been validly granted by You or Your distributors under this License prior | |||
to termination shall survive termination. | |||
6. Disclaimer of Warranty | |||
Covered Software is provided under this License on an "as is" basis, without | |||
warranty of any kind, either expressed, implied, or statutory, including, | |||
without limitation, warranties that the Covered Software is free of defects, | |||
merchantable, fit for a particular purpose or non-infringing. The entire risk | |||
as to the quality and performance of the Covered Software is with You. Should | |||
any Covered Software prove defective in any respect, You (not any Contributor) | |||
assume the cost of any necessary servicing, repair, or correction. This disclaimer | |||
of warranty constitutes an essential part of this License. No use of any Covered | |||
Software is authorized under this License except under this disclaimer. | |||
7. Limitation of Liability | |||
Under no circumstances and under no legal theory, whether tort (including | |||
negligence), contract, or otherwise, shall any Contributor, or anyone who | |||
distributes Covered Software as permitted above, be liable to You for any | |||
direct, indirect, special, incidental, or consequential damages of any character | |||
including, without limitation, damages for lost profits, loss of goodwill, | |||
work stoppage, computer failure or malfunction, or any and all other commercial | |||
damages or losses, even if such party shall have been informed of the possibility | |||
of such damages. This limitation of liability shall not apply to liability | |||
for death or personal injury resulting from such party's negligence to the | |||
extent applicable law prohibits such limitation. Some jurisdictions do not | |||
allow the exclusion or limitation of incidental or consequential damages, | |||
so this exclusion and limitation may not apply to You. | |||
8. Litigation | |||
Any litigation relating to this License may be brought only in the courts | |||
of a jurisdiction where the defendant maintains its principal place of business | |||
and such litigation shall be governed by laws of that jurisdiction, without | |||
reference to its conflict-of-law provisions. Nothing in this Section shall | |||
prevent a party's ability to bring cross-claims or counter-claims. | |||
9. Miscellaneous | |||
This License represents the complete agreement concerning the subject matter | |||
hereof. If any provision of this License is held to be unenforceable, such | |||
provision shall be reformed only to the extent necessary to make it enforceable. | |||
Any law or regulation which provides that the language of a contract shall | |||
be construed against the drafter shall not be used to construe this License | |||
against a Contributor. | |||
10. Versions of the License | |||
10.1. New Versions | |||
Mozilla Foundation is the license steward. Except as provided in Section 10.3, | |||
no one other than the license steward has the right to modify or publish new | |||
versions of this License. Each version will be given a distinguishing version | |||
number. | |||
10.2. Effect of New Versions | |||
You may distribute the Covered Software under the terms of the version of | |||
the License under which You originally received the Covered Software, or under | |||
the terms of any subsequent version published by the license steward. | |||
10.3. Modified Versions | |||
If you create software not governed by this License, and you want to create | |||
a new license for such software, you may create and use a modified version | |||
of this License if you rename the license and remove any references to the | |||
name of the license steward (except to note that such modified license differs | |||
from this License). | |||
10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses | |||
If You choose to distribute Source Code Form that is Incompatible With Secondary | |||
Licenses under the terms of this version of the License, the notice described | |||
in Exhibit B of this License must be attached. Exhibit A - Source Code Form | |||
License Notice | |||
This Source Code Form is subject to the terms of the Mozilla Public License, | |||
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain | |||
one at http://mozilla.org/MPL/2.0/. | |||
If it is not possible or desirable to put the notice in a particular file, | |||
then You may include the notice in a location (such as a LICENSE file in a | |||
relevant directory) where a recipient would be likely to look for such a notice. | |||
You may add additional accurate notices of copyright ownership. | |||
Exhibit B - "Incompatible With Secondary Licenses" Notice | |||
This Source Code Form is "Incompatible With Secondary Licenses", as defined | |||
by the Mozilla Public License, v. 2.0. |
@@ -0,0 +1,3 @@ | |||
# poi-upstream | |||
Automated integrated analysis software for genomics data of the cancer patients. |
@@ -0,0 +1,36 @@ | |||
{ | |||
"tumor_bam": "", | |||
"tumor_bam_index": "", | |||
"normal_bam": "", | |||
"normal_bam_index": "", | |||
"read_structure": "", | |||
"duplex_umi": "", | |||
"platform": "MGI", | |||
"regions": "oss://ivd-product/reference/bed/Dynegene/SureSelectXT_HS_V8_S33266340_hg38_Regions.bed", | |||
"interval_padding": "0", | |||
"SENTIEON_LICENSE": "172.25.164.226:8990", | |||
"fasta": "GRCh38_full_analysis_set_plus_decoy_hla.fa", | |||
"ref_dir": "oss://ivd-product/reference/refGenome/", | |||
"ref_flat": "oss://ivd-product/reference/refGenome/refFlat.hg38.txt", | |||
"dbmills_dir": "oss://genomics-platform-reference-data/GRCh38.d1.vd1/", | |||
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf", | |||
"dbsnp": "dbsnp_146.hg38.vcf", | |||
"dbsnp_dir": "oss://genomics-platform-reference-data/GRCh38.d1.vd1/", | |||
"annovar_database": "oss://genomics-platform-reference-data/annovar/", | |||
"annotsv_database": "oss://ivd-product/reference/AnnotSV/", | |||
"germline_resource": "oss://genomics-platform-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz", | |||
"germline_resource_tbi": "oss://genomics-platform-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz.tbi", | |||
"gc": "oss://ivd-product/reference/Sequenza/GRCh38.gc50Base.wig.gz", | |||
"baseline": "oss://ivd-product/reference/MSIsensor/hg38_reference.list_baseline", | |||
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/sentieon-genomics:v202112.05", | |||
"manta_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/manta:1.6.0", | |||
"bcftools_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/bcftools:v1.9", | |||
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/annovar:v20191024", | |||
"annotsv_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/annotsv:3.1.3", | |||
"cnvkit_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/cnvkit:0.9.8", | |||
"sequenza_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/sequenza:3.0.0", | |||
"msisensor_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/msisensor-pro:1.2.0", | |||
"tmb_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/tmb:1.0.0", | |||
"cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||
"disk_size": "200" | |||
} |
@@ -0,0 +1,38 @@ | |||
{ | |||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||
"{{ project_name }}.hrd": "{{ hrd }}", | |||
"{{ project_name }}.tumor_bam": "{{ tumor_bam }}", | |||
"{{ project_name }}.tumor_bam_index": "{{ tumor_bam_index }}", | |||
"{{ project_name }}.normal_bam": "{{ normal_bam }}", | |||
"{{ project_name }}.normal_bam_index": "{{ normal_bam_index }}", | |||
"{{ project_name }}.read_structure": "{{ read_structure }}", | |||
"{{ project_name }}.duplex_umi": "{{ duplex_umi }}", | |||
"{{ project_name }}.SENTIEON_LICENSE": "{{ SENTIEON_LICENSE }}", | |||
"{{ project_name }}.sentieon_docker": "{{ sentieon_docker }}", | |||
"{{ project_name }}.manta_docker": "{{ manta_docker }}", | |||
"{{ project_name }}.bcftools_docker": "{{ bcftools_docker }}", | |||
"{{ project_name }}.annovar_docker": "{{ annovar_docker }}", | |||
"{{ project_name }}.annotsv_docker": "{{ annotsv_docker }}", | |||
"{{ project_name }}.cnvkit_docker": "{{ cnvkit_docker }}", | |||
"{{ project_name }}.sequenza_docker": "{{ sequenza_docker }}", | |||
"{{ project_name }}.msisensor_docker": "{{ msisensor_docker }}", | |||
"{{ project_name }}.tmb_docker": "{{ tmb_docker }}", | |||
"{{ project_name }}.platform": "{{ platform }}", | |||
"{{ project_name }}.fasta": "{{ fasta }}", | |||
"{{ project_name }}.ref_dir": "{{ ref_dir }}", | |||
"{{ project_name }}.dbsnp": "{{ dbsnp }}", | |||
"{{ project_name }}.dbsnp_dir": "{{ dbsnp_dir }}", | |||
"{{ project_name }}.dbmills_dir": "{{ dbmills_dir }}", | |||
"{{ project_name }}.db_mills": "{{ db_mills }}", | |||
"{{ project_name }}.germline_resource": "{{ germline_resource }}", | |||
"{{ project_name }}.germline_resource_tbi": "{{ germline_resource_tbi }}", | |||
"{{ project_name }}.regions": "{{ regions }}", | |||
"{{ project_name }}.interval_padding": "{{ interval_padding }}", | |||
"{{ project_name }}.annovar_database": "{{ annovar_database }}", | |||
"{{ project_name }}.annotsv_database": "{{ annotsv_database }}", | |||
"{{ project_name }}.gc": "{{ gc }}", | |||
"{{ project_name }}.baseline": "{{ baseline }}", | |||
"{{ project_name }}.ref_flat": "{{ ref_flat }}", | |||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||
"{{ project_name }}.cluster_config": "{{ cluster_config }}" | |||
} |
@@ -0,0 +1,35 @@ | |||
task ANNOVAR { | |||
File vcf | |||
String basename = basename(vcf,".vcf") | |||
File annovar_database | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
/installations/annovar/table_annovar.pl ${vcf} \ | |||
${annovar_database} -buildver hg38 \ | |||
-out ${basename} -remove \ | |||
-protocol refGene,cytoBand,genomicSuperDups,clinvar_20220320,intervar_20180118,cosmic95_coding,cosmic95_noncoding,gnomad211_exome,dbnsfp42c,avsnp150 \ | |||
-operation g,r,r,f,f,f,f,f,f,f \ | |||
-nastring . -vcfinput -polish -thread $nt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File avinput = "${basename}.avinput" | |||
File multianno_txt = "${basename}.hg38_multianno.txt" | |||
File multianno_vcf = "${basename}.hg38_multianno.vcf" | |||
} | |||
} |
@@ -0,0 +1,36 @@ | |||
task AnnotSV { | |||
String sample | |||
File somatic_vcf | |||
File? germline_vcf | |||
File annotsv_database | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
export ANNOTSV=/opt/AnnotSV | |||
if [ ${somatic_vcf} ]; then | |||
$ANNOTSV/bin/AnnotSV -SVinputFile ${somatic_vcf} -outputFile ${sample}.somatic.SV.annotated.tsv -genomeBuild GRCh38 -annotationsDir ${annotsv_database} -outputDir . | |||
fi | |||
if [ ${germline_vcf} ]; then | |||
$ANNOTSV/bin/AnnotSV -SVinputFile ${germline_vcf} -outputFile ${sample}.germline.SV.annotated.tsv -genomeBuild GRCh38 -annotationsDir ${annotsv_database} -outputDir . | |||
fi | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File AnnotSV_somatic_SV = "${sample}.somatic.SV.annotated.tsv" | |||
File? AnnotSV_germline_SV = "${sample}.germline.SV.annotated.tsv" | |||
} | |||
} |
@@ -0,0 +1,56 @@ | |||
task BQSR { | |||
File ref_dir | |||
File dbsnp_dir | |||
File dbmills_dir | |||
String sample | |||
String SENTIEON_LICENSE | |||
String fasta | |||
String dbsnp | |||
String db_mills | |||
File deduped_bam | |||
File deduped_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
sentieon driver -t $nt \ | |||
-r ${ref_dir}/${fasta} -i ${deduped_bam} \ | |||
--algo QualCal \ | |||
-k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \ | |||
${sample}_recal_data.table | |||
sentieon driver -t $nt \ | |||
-r ${ref_dir}/${fasta} -i ${deduped_bam} -q ${sample}_recal_data.table \ | |||
--algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \ | |||
${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.recaled.bam | |||
sentieon driver -t $nt --algo QualCal \ | |||
--plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv | |||
sentieon plot bqsr -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File recal_table = "${sample}_recal_data.table" | |||
File recal_post = "${sample}_recal_data.table.post" | |||
File recaled_bam = "${sample}.sorted.deduped.recaled.bam" | |||
File recaled_bam_index = "${sample}.sorted.deduped.recaled.bam.bai" | |||
File recal_csv = "${sample}_recal_data.csv" | |||
File bqsrreport_pdf = "${sample}_bqsrreport.pdf" | |||
} | |||
} |
@@ -0,0 +1,96 @@ | |||
task CNVkit { | |||
String sample | |||
File ref_dir | |||
String fasta | |||
File ref_flat | |||
File regions | |||
File hrd | |||
File tumor_bam | |||
File tumor_bam_index | |||
File? normal_bam | |||
File? normal_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
cnvkit.py access ${ref_dir}/${fasta} -o access.bed | |||
# Prepare the target bed | |||
cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed | |||
if [ ${normal_bam} ]; then | |||
cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed | |||
else | |||
cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed | |||
fi | |||
# For each sample... | |||
cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn | |||
cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn | |||
if [ ${normal_bam} ]; then | |||
cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn | |||
cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn | |||
# With paired or pooled normals | |||
cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn | |||
else | |||
# With no control sample | |||
cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed | |||
fi | |||
# For each tumor sample... | |||
cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr | |||
cnvkit.py segment ${sample}.cnr -o ${sample}.cns | |||
# Check noise | |||
cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats | |||
# Derive each segment's absolute integer copy number, ploidy must be int value | |||
PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'` | |||
cnvkit.py call ${sample}.cns -y -m clonal --purity $PURITY -o ${sample}.call.cns | |||
cnvkit.py call ${sample}.cnr -y -m clonal --purity $PURITY -o ${sample}.call.cnr | |||
# Plot the results | |||
cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf | |||
cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf | |||
cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf | |||
# Genemetrics | |||
mkdir gainloss | |||
cnvkit.py genemetrics ${sample}.call.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt | |||
cnvkit.py genemetrics ${sample}.call.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt | |||
# Filter genes | |||
cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt | |||
cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt | |||
comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt | |||
for gene in `cat ${sample}.trusted_genes.txt` | |||
do | |||
cnvkit.py scatter ${sample}.call.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf | |||
done | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File scatter_pdf = "${sample}.scatter.pdf" | |||
File diagram_pdf = "${sample}.diagram.pdf" | |||
File heatmap_pdf = "${sample}.heatmap.pdf" | |||
File cnr = "${sample}.cnr" | |||
File cns = "${sample}.cns" | |||
File stats = "${sample}.stats" | |||
File call_cnr = "${sample}.call.cnr" | |||
File call_cns = "${sample}.call.cns" | |||
File ratio_cnv = "${sample}.ratio_cnv.txt" | |||
File segment_cnv = "${sample}.segment_cnv.txt" | |||
File gainloss_genes = "${sample}.trusted_genes.txt" | |||
Array[File] gainloss = glob("./gainloss/*") | |||
} | |||
} |
@@ -0,0 +1,34 @@ | |||
task Dedup { | |||
String SENTIEON_LICENSE | |||
String sample | |||
File sorted_bam | |||
File sorted_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt | |||
sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File score = "${sample}_score.txt" | |||
File dedup_metrics = "${sample}_dedup_metrics.txt" | |||
File deduped_bam = "${sample}.sorted.deduped.bam" | |||
File deduped_bam_index = "${sample}.sorted.deduped.bam.bai" | |||
} | |||
} |
@@ -0,0 +1,67 @@ | |||
task HRD { | |||
String sample | |||
File ref_dir | |||
String fasta | |||
File gc | |||
File tumor_bam | |||
File tumor_bam_index | |||
File? normal_bam | |||
File? normal_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
seqz=${sample}'.seqz.gz' | |||
small=${sample}'.small.seqz.gz' | |||
# bam2seqz | |||
sequenza-utils bam2seqz -gc ${gc} --fasta ${ref_dir}/${fasta} -n ${normal_bam} -t ${tumor_bam} -o $seqz -C chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY --parallel 24 | |||
# merge and remove | |||
zcat ${sample}_*.seqz.gz | awk '{if (NR == 1 || (NR != 1 && $1 != "chromosome")) {print $0}}' | bgzip > $seqz | |||
tabix -f -s 1 -b 2 -e 2 -S 1 $seqz | |||
rm ${sample}_*.seqz.gz; rm ${sample}_*.seqz.gz.tbi | |||
# seqz_binning: WES: 50; WGS: 200 | |||
sequenza-utils seqz_binning --seqz $seqz -w 50 -o $small | |||
# analysis in r | |||
Rscript /home/sequenza/sequenza.r '.' ${sample} | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File hrd="${sample}.HRD.txt" | |||
File alternative_fit="${sample}_alternative_fit.pdf" | |||
File alternative_solutions="${sample}_alternative_solutions.txt" | |||
File chromosome_depths="${sample}_chromosome_depths.pdf" | |||
File chromosome_view="${sample}_chromosome_view.pdf" | |||
File CN_bars="${sample}_CN_bars.pdf" | |||
File confints_CP="${sample}_confints_CP.txt" | |||
File contours_CP="${sample}_contours_CP.pdf" | |||
File CP_contours="${sample}_CP_contours.pdf" | |||
File gc_plots="${sample}_gc_plots.pdf" | |||
File genome_view="${sample}_genome_view.pdf" | |||
File model_fit="${sample}_model_fit.pdf" | |||
File mutations="${sample}_mutations.txt" | |||
File scarHRD_input="${sample}_scarHRD_input.txt" | |||
File segments="${sample}_segments.txt" | |||
File sequenza_cp_table="${sample}_sequenza_cp_table.RData" | |||
File sequenza_extract="${sample}_sequenza_extract.RData" | |||
File sequenza_log="${sample}_sequenza_log.txt" | |||
File small_seqz="${sample}.small.seqz.gz" | |||
File small_seqz_index="${sample}.small.seqz.gz.tbi" | |||
} | |||
} |
@@ -0,0 +1,51 @@ | |||
task Haplotyper { | |||
File ref_dir | |||
String fasta | |||
File dbsnp_dir | |||
String SENTIEON_LICENSE | |||
File recaled_bam | |||
File recaled_bam_index | |||
String dbsnp | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
File? regions | |||
Int? interval_padding | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
if [ ${regions} ]; then | |||
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}" | |||
else | |||
INTERVAL="" | |||
fi | |||
sentieon driver -t $nt \ | |||
--interval ${regions} -r ${ref_dir}/${fasta} \ | |||
-i ${recaled_bam} \ | |||
--algo Haplotyper -d ${dbsnp_dir}/${dbsnp} \ | |||
${sample}.Haplotyper.vcf | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcf = "${sample}.Haplotyper.vcf" | |||
File vcf_idx = "${sample}.Haplotyper.vcf.idx" | |||
} | |||
} | |||
@@ -0,0 +1,41 @@ | |||
task MSIsensor { | |||
String sample | |||
File ref_dir | |||
String fasta | |||
File tumor_bam | |||
File tumor_bam_index | |||
File? normal_bam | |||
File? normal_bam_index | |||
File baseline | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
# MSI | |||
mkdir -p /cromwell_root/tmp/ | |||
msisensor-pro scan -d ${ref_dir}/${fasta} -o reference.list | |||
if [ ${normal_bam} ]; then | |||
msisensor-pro msi -d reference.list -n ${normal_bam} -t ${tumor_bam} -o /cromwell_root/tmp/${sample} | |||
else | |||
msisensor-pro pro -d ${baseline} -t ${tumor_bam} -o /cromwell_root/tmp/${sample} | |||
fi | |||
cp /cromwell_root/tmp/${sample} ${sample}.MSI.txt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File msi = "${sample}.MSI.txt" | |||
} | |||
} |
@@ -0,0 +1,61 @@ | |||
task Manta { | |||
File ref_dir | |||
File fasta | |||
File regions | |||
File tumor_bam | |||
File tumor_bam_index | |||
File? normal_bam | |||
File? normal_bam_index | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
MANTA_INSTALL_PATH="/opt/manta-1.6.0.centos6_x86_64" | |||
MANTA_ANALYSIS_PATH="/cromwell_root/tmp" | |||
mkdir -p $MANTA_ANALYSIS_PATH | |||
cp ${regions} my_baits.bed | |||
bgzip -c my_baits.bed > my_baits.bed.gz | |||
tabix -p bed my_baits.bed.gz | |||
# input files | |||
if [ ${normal_bam} ]; then | |||
INPUT="--normalBam ${normal_bam} --tumorBam ${tumor_bam}" | |||
else | |||
INPUT="--tumorBam ${tumor_bam}" | |||
fi | |||
# configManta | |||
$MANTA_INSTALL_PATH/bin/configManta.py \ | |||
$INPUT \ | |||
--callRegions my_baits.bed.gz --exome \ | |||
--referenceFasta ${ref_dir}/${fasta} \ | |||
--runDir $MANTA_ANALYSIS_PATH | |||
# runWorkflow | |||
$MANTA_ANALYSIS_PATH/runWorkflow.py -j $nt | |||
# results | |||
if [ ${normal_bam} ]; then | |||
cp $MANTA_ANALYSIS_PATH/results/variants/somaticSV.vcf.gz ${sample}.Manta.somaticSV.vcf.gz | |||
cp $MANTA_ANALYSIS_PATH/results/variants/diploidSV.vcf.gz ${sample}.Manta.germlineSV.vcf.gz | |||
else | |||
cp $MANTA_ANALYSIS_PATH/results/variants/tumorSV.vcf.gz ${sample}.Manta.somaticSV.vcf.gz | |||
fi | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File somatic_vcf = "${sample}.Manta.somaticSV.vcf.gz" | |||
File? germline_vcf = "${sample}.Manta.germlineSV.vcf.gz" | |||
} | |||
} |
@@ -0,0 +1,69 @@ | |||
task Metrics { | |||
File ref_dir | |||
String SENTIEON_LICENSE | |||
String sample | |||
String docker | |||
String cluster_config | |||
String fasta | |||
File sorted_bam | |||
File sorted_bam_index | |||
String disk_size | |||
File? regions | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
if [ ${regions} ]; then | |||
INTERVAL="--interval ${regions}" | |||
else | |||
INTERVAL="" | |||
fi | |||
sentieon driver -t $nt \ | |||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||
-i ${sorted_bam} \ | |||
--algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics \ | |||
--algo MeanQualityByCycle ${sample}_mq_metrics.txt \ | |||
--algo QualDistribution ${sample}_qd_metrics.txt \ | |||
--algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt \ | |||
--algo AlignmentStat ${sample}_aln_metrics.txt \ | |||
--algo InsertSizeMetricAlgo ${sample}_is_metrics.txt \ | |||
--algo QualityYield ${sample}_QualityYield.txt \ | |||
--algo WgsMetricsAlgo ${sample}_WgsMetricsAlgo.txt | |||
sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary" | |||
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics" | |||
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics" | |||
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions" | |||
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts" | |||
File qd_metrics = "${sample}_qd_metrics.txt" | |||
File mq_metrics = "${sample}_mq_metrics.txt" | |||
File is_metrics = "${sample}_is_metrics.txt" | |||
File gc_summary = "${sample}_gc_summary.txt" | |||
File gc_metrics = "${sample}_gc_metrics.txt" | |||
File aln_metrics = "${sample}_aln_metrics.txt" | |||
File QualityYield = "${sample}_QualityYield.txt" | |||
File wgsmetrics = "${sample}_WgsMetricsAlgo.txt" | |||
File qd_metrics_pdf = "${sample}_qd_metrics.pdf" | |||
File mq_metrics_pdf = "${sample}_mq_metrics.pdf" | |||
File is_metrics_pdf = "${sample}_is_metrics.pdf" | |||
File gc_metrics_pdf = "${sample}_gc_metrics.pdf" | |||
} | |||
} |
@@ -0,0 +1,33 @@ | |||
task TMB { | |||
String sample | |||
File regions | |||
File snpindel_txt | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
sort -k1,1 -k2,2n ${regions} | bedtools merge -i - > merged.bed | |||
size=`awk -F'\t' 'BEGIN{SUM=0}{SUM+=$3-$2}END{print SUM}' merged.bed` | |||
# analysis in python | |||
python ~/tmb.py ${snpindel_txt} $size ${sample} | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File snp_indel="${sample}.snp_indel.txt" | |||
File tmb="${sample}.TMB.txt" | |||
} | |||
} |
@@ -0,0 +1,79 @@ | |||
task TNseq { | |||
String sample | |||
String SENTIEON_LICENSE | |||
File tumor_bam | |||
File tumor_bam_index | |||
File? normal_bam | |||
File? normal_bam_index | |||
String tumor_name | |||
String normal_name | |||
File ref_dir | |||
String fasta | |||
File germline_resource | |||
File germline_resource_tbi | |||
File? regions | |||
Int? interval_padding | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
if [ ${regions} ]; then | |||
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}" | |||
else | |||
INTERVAL="" | |||
fi | |||
if [ ${normal_bam} ]; then | |||
INPUT="-i ${tumor_bam} -i ${normal_bam}" | |||
SAMPLE="--tumor_sample ${tumor_name} --normal_sample ${normal_name}" | |||
else | |||
INPUT="-i ${tumor_bam}" | |||
SAMPLE="--tumor_sample ${tumor_name}" | |||
fi | |||
sentieon driver -t $nt -r ${ref_dir}/${fasta} \ | |||
$INPUT $INTERVAL \ | |||
--algo TNhaplotyper2 $SAMPLE \ | |||
--germline_vcf ${germline_resource} \ | |||
${sample}.TNseq.raw.vcf \ | |||
--algo OrientationBias --tumor_sample ${tumor_name} \ | |||
${sample}.orientation \ | |||
--algo ContaminationModel $SAMPLE \ | |||
--vcf ${germline_resource} \ | |||
--tumor_segments ${sample}.contamination.segments \ | |||
${sample}.contamination | |||
sentieon driver -t $nt \ | |||
-r ${ref_dir}/${fasta} \ | |||
--algo TNfilter $SAMPLE \ | |||
-v ${sample}.TNseq.raw.vcf \ | |||
--contamination ${sample}.contamination \ | |||
--tumor_segments ${sample}.contamination.segments \ | |||
--orientation_priors ${sample}.orientation \ | |||
${sample}.TNseq.vcf | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcf = "${sample}.TNseq.vcf" | |||
File vcf_index = "${sample}.TNseq.vcf.idx" | |||
File contamination = "${sample}.contamination" | |||
File contamination_segments = "${sample}.contamination.segments" | |||
File orientation = "${sample}.orientation" | |||
} | |||
} |
@@ -0,0 +1,29 @@ | |||
task bcftools { | |||
File ref_dir | |||
String fasta | |||
File vcf | |||
String basename = basename(vcf,".vcf") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File norm_vcf = "${basename}.norm.vcf" | |||
} | |||
} |
@@ -0,0 +1,62 @@ | |||
task deduped_Metrics { | |||
File ref_dir | |||
String SENTIEON_LICENSE | |||
String sample | |||
String fasta | |||
File deduped_bam | |||
File deduped_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
File? regions | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
if [ ${regions} ]; then | |||
INTERVAL="--interval ${regions}" | |||
else | |||
INTERVAL="" | |||
fi | |||
sentieon driver -t $nt \ | |||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||
-i ${deduped_bam} \ | |||
--algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics \ | |||
--algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt \ | |||
--algo QualDistribution ${sample}_deduped_qd_metrics.txt \ | |||
--algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt \ | |||
--algo AlignmentStat ${sample}_deduped_aln_metrics.txt \ | |||
--algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt \ | |||
--algo QualityYield ${sample}_deduped_QualityYield.txt \ | |||
--algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary" | |||
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics" | |||
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | |||
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | |||
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | |||
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | |||
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | |||
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" | |||
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt" | |||
} | |||
} |
@@ -0,0 +1,52 @@ | |||
task mapping { | |||
File ref_dir | |||
String fasta | |||
File fastq_1 | |||
File fastq_2 | |||
String SENTIEON_LICENSE | |||
String group | |||
String sample | |||
String platform | |||
String? duplex_umi | |||
String? read_structure | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
if [ ${read_structure} ]; then | |||
if [ ${duplex_umi} == "true" ]; then | |||
READ_STRUCTURE="-d ${read_structure}" | |||
fi | |||
sentieon umi extract $READ_STRUCTURE ${fastq_1} ${fastq_2} | \ | |||
sentieon bwa mem -p -C -R "@RG\tID:${group}\tSM:${sample}\tPL:${platform}" -t $nt -K 10000000 ${ref_dir}/${fasta} - | \ | |||
sentieon umi consensus -o ${sample}.umi_consensus.fastq.gz | |||
sentieon bwa mem -p -C -R "@RG\tID:${group}\tSM:${sample}\tPL:${platform}" -t $nt -K 10000000 $fasta ${sample}.umi_consensus.fastq.gz | \ | |||
sentieon util sort --umi_post_process --sam2bam -i - -o ${sample}.sorted.bam | |||
else | |||
sentieon bwa mem -R "@RG\tID:${group}\tSM:${sample}\tPL:${platform}" \ | |||
-t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | \ | |||
sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i - | |||
fi | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File sorted_bam = "${sample}.sorted.bam" | |||
File sorted_bam_index = "${sample}.sorted.bam.bai" | |||
} | |||
} |
@@ -0,0 +1,71 @@ | |||
import "./tasks/Haplotyper.wdl" as Haplotyper | |||
import "./tasks/TNseq.wdl" as TNseq | |||
import "./tasks/bcftools.wdl" as bcftools | |||
import "./tasks/ANNOVAR.wdl" as ANNOVAR | |||
import "./tasks/Manta.wdl" as Manta | |||
import "./tasks/AnnotSV.wdl" as AnnotSV | |||
import "./tasks/CNVkit.wdl" as CNVkit | |||
import "./tasks/MSIsensor.wdl" as MSIsensor | |||
import "./tasks/HRD.wdl" as HRD | |||
import "./tasks/TMB.wdl" as TMB | |||
workflow {{ project_name }} { | |||
String sample_id | |||
File hrd | |||
File? tumor_bam | |||
File? tumor_bam_index | |||
File? normal_bam | |||
File? normal_bam_index | |||
String? duplex_umi | |||
String? read_structure | |||
String SENTIEON_LICENSE | |||
String sentieon_docker | |||
String manta_docker | |||
String bcftools_docker | |||
String annovar_docker | |||
String annotsv_docker | |||
String cnvkit_docker | |||
String sequenza_docker | |||
String msisensor_docker | |||
String tmb_docker | |||
String platform | |||
File ref_dir | |||
String fasta | |||
File dbmills_dir | |||
String db_mills | |||
File dbsnp_dir | |||
String dbsnp | |||
File germline_resource | |||
File germline_resource_tbi | |||
File annovar_database | |||
File annotsv_database | |||
File gc | |||
File baseline | |||
File ref_flat | |||
File? regions | |||
Int? interval_padding | |||
String disk_size | |||
String cluster_config | |||
call CNVkit.CNVkit as CNVkit { | |||
input: | |||
sample=sample_id, | |||
fasta=fasta, | |||
ref_dir=ref_dir, | |||
regions=regions, | |||
ref_flat=ref_flat, | |||
normal_bam=normal_bam, | |||
normal_bam_index=normal_bam_index, | |||
tumor_bam=tumor_bam, | |||
tumor_bam_index=tumor_bam_index, | |||
hrd=hrd, | |||
docker=cnvkit_docker, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} |