diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b340f61..a9150cd 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@6c2e22b4d901f0c42ca66c5069f8026df026d165 # v2 - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: diff --git a/.github/workflows/twistgp_ci.yml b/.github/workflows/twistgp_ci.yml index e277f90..42a2600 100644 --- a/.github/workflows/twistgp_ci.yml +++ b/.github/workflows/twistgp_ci.yml @@ -38,13 +38,13 @@ jobs: fetch-depth: 0 - name: Set up Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@6c2e22b4d901f0c42ca66c5069f8026df026d165 # v2 with: version: "${{ matrix.NXF_VER }}" - name: Set up Apptainer if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-apptainer@main + uses: eWaterCycle/setup-apptainer@3f706d898c9db585b1d741b4692e66755f3a1b40 # main - name: Set up Singularity if: matrix.profile == 'singularity' diff --git a/CHANGELOG.md b/CHANGELOG.md index ad92ea7..3c1d6ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added GATK4 FilterMutectCalls after Mutect2 variant calling - Added BCFTOOLS_VIEW pre-filtering step prior to TMB calculation - Added `--tmb_popaf_cutoff` and `--tmb_vaf_cutoff` parameters +- Added `--skip_cnv`, `--skip_msi`, and `--skip_tmb` parameters to allow skipping CNV calling, MSI analysis, and TMB calculation respectively ## 1.1.0dev diff --git a/README.md b/README.md index 5ea2d84..bdf9586 100644 --- a/README.md +++ b/README.md @@ -183,10 +183,35 @@ Prior to TMB calculation, annotated variants are pre-filtered using `bcftools vi The following parameters control these thresholds: - `--tmb_popaf_cutoff` (default: `3.0`): Minimum POPAF value (negative log10 of population allele frequency) to include a variant. The default of `3.0` corresponds to a population allele frequency of ≤ 0.001 (0.1%), excluding common germline variants that are unlikely to be somatic. This value is derived from the Mutect2 `POPAF` INFO field. -- `--tmb_vaf_cutoff` (default: `0.05`): Minimum variant allele frequency (FORMAT/AF) to include a variant. The default of `0.05` (5%) excludes very low frequency variants that may represent sequencing artifacts or sub-clonal noise, consistent with the [Friends of Cancer Research TMB Harmonization Project](https://friendsofcancerresearch.org/publication/in-silico-assessment-of-variation-in-tmb-quantification-across-diagnostic-platforms-phase-1-of-the-friends-of-cancer-research-harmonization-project/) recommendations. +- `--tmb_vaf_cutoff` (default: `0.10`): Minimum variant allele frequency (FORMAT/AF) to include a variant. The [Friends of Cancer Research TMB Harmonization Project](https://friendsofcancerresearch.org/publication/in-silico-assessment-of-variation-in-tmb-quantification-across-diagnostic-platforms-phase-1-of-the-friends-of-cancer-research-harmonization-project/) recommends a minimum of 0.05 (5%). The default of 0.10 (10%) provides additional stringency to reduce sub-clonal noise in tumor-only analyses. +### Skipping Analysis Steps + +Individual analysis steps can be skipped using the following parameters: + +| Parameter | Description | +| ---------------- | ------------------------------------------------------------------- | +| `--skip_cnv` | Skip CNV calling with CNVkit | +| `--skip_msi` | Skip microsatellite instability analysis (MSIsensor2/MSIsensor-pro) | +| `--skip_tmb` | Skip tumor mutational burden calculation (pyTMB) | +| `--skip_civicpy` | Skip CIViCpy variant annotation | + +For example, to run the pipeline without MSI and TMB: + +```console +nextflow run twistcgp/main.nf \ + -profile docker \ + --input samplesheet.csv \ + --fasta hg38_giab.fa \ + --baits baits.bed \ + --targets targets.bed \ + --outdir results \ + --skip_msi \ + --skip_tmb +``` + ### Variant Filtering with FilterMutectCalls Following variant calling with Mutect2, this pipeline applies [`FilterMutectCalls`](https://gatk.broadinstitute.org/hc/en-us/articles/360036856831-FilterMutectCalls) to annotate variant quality, consistent with [GATK Best Practices for somatic variant discovery](https://www.biorxiv.org/content/biorxiv/early/2019/12/02/861054/DC1/embed/media-1.pdf?download=true) (Benjamin et al., 2019). diff --git a/assets/pytmb_vep.yml b/assets/pytmb_vep.yml index 3e758be..e2f8707 100644 --- a/assets/pytmb_vep.yml +++ b/assets/pytmb_vep.yml @@ -89,12 +89,13 @@ polymDb: gnomad: - gnomADe_AF - gnomADe_AFR_AF - - gnomADe_AMR_AF + - gnomAD_AMR_AF - gnomADe_ASJ_AF - gnomADe_EAS_AF - gnomADe_FIN_AF - gnomADe_MID_AF - gnomADe_NFE_AF + - gnomADe_REMAINING_AF - gnomADe_SAS_AF - gnomADg_AF - gnomADg_AFR_AF diff --git a/conf/modules.config b/conf/modules.config index d1915bc..aabbe60 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,7 @@ process { } withName: BCFTOOLS_VIEW { + ext.when = { params.skip_tmb == false } // NB: ext.args must include --write-index=tbi for downstream processes to run successfully ext.args = { [ @@ -53,8 +54,15 @@ process { saveAs: { filename -> filename != 'versions.yml' && params.save_reference ? filename : null }, ] } - withName: CIVICPY { - ext.when = { params.skip_civicpy == false } + + withName: CIVICPY_UPDATE_CACHE { + publishDir = [ + path: { params.save_reference ? "${params.outdir}/reference" : params.outdir }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename != 'versions.yml' && params.save_reference ? filename : null }, + ] + } + withName: CIVICPY_ANNOTATE_VCF { publishDir = [ path: { "${params.outdir}/${meta.id}" }, mode: params.publish_dir_mode, @@ -63,6 +71,7 @@ process { } withName: CNVKIT_BATCH { + ext.when = { params.skip_cnv == false } // hmm-tumor performs well for relatively pure, high coverage tumor samples // alternative segmentation methods can be explored here: // https://cnvkit.readthedocs.io/en/stable/pipeline.html#segmentation-methods @@ -85,13 +94,13 @@ process { } withName: 'ENSEMBLVEP_VEP' { - ext.args = "--vcf --af_gnomade --af_1kg" + (params.cosmic_vcf ? " --custom ${params.cosmic_vcf},COSMIC,vcf,exact,0,ID" : "") + ext.args = "--format vcf --vcf --af_gnomade --af_1kg" + (params.cosmic_vcf ? " --custom ${params.cosmic_vcf},COSMIC,vcf,exact,0,ID" : "") ext.prefix = { "${meta.id}.vep" } publishDir = [ [ mode: params.publish_dir_mode, path: { "${params.outdir}/${meta.id}/" }, - pattern: "*{gz,html}", + pattern: "*{gz,gz.tbi,html}", ] ] } @@ -106,8 +115,9 @@ process { } withName: GATK4_FILTERMUTECTCALLS { + ext.prefix = { "${meta.id}.labeled" } publishDir = [ - path: { "${params.outdir}/${meta.id}.labeled" }, + path: { "${params.outdir}/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*{vcf.gz,vcf.gz.tbi,filteringStats.tsv}", ] @@ -148,6 +158,7 @@ process { } withName: MSISENSOR2_MSI { + ext.when = { params.skip_msi == false } // NB: The module outputs the summary file as "${prefix}" ext.prefix = { "${meta.id}.msi" } publishDir = [ @@ -158,6 +169,7 @@ process { } withName: MSISENSORPRO_PRO { + ext.when = { params.skip_msi == false } // NB: The module outputs the summary file as "${prefix}" ext.prefix = { "${meta.id}.msi" } publishDir = [ @@ -266,8 +278,11 @@ process { } withName: TMB { + ext.when = { params.skip_tmb == false } + errorStrategy = { task.attempt <= task.maxRetries ? 'retry' : 'finish' } + maxRetries = 3 // VAF, minDepth, and minAltDepth recommended by: https://friendsofcancerresearch.org/publication/in-silico-assessment-of-variation-in-tmb-quantification-across-diagnostic-platforms-phase-1-of-the-friends-of-cancer-research-harmonization-project/ - ext.args = "--polymDb gnomad --filterPolym --vaf 0.05 --minDepth 25 --minAltDepth 3 --filterLowQual --filterIndels --filterNonCoding --filterSyn --maf 0.01" + ext.args = "--polymDb gnomad --filterPolym --vaf ${params.tmb_vaf_cutoff} --minDepth 25 --minAltDepth 3 --filterLowQual --filterIndels --filterNonCoding --filterSyn --maf 0.01" publishDir = [ path: { "${params.outdir}/${meta.id}" }, mode: params.publish_dir_mode, @@ -284,7 +299,15 @@ process { ] } - withName: TABIX_BGZIPTABIX { + withName: '.*VCF_ANNOTATE_ENSEMBLVEP:TABIX_TABIX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/${meta.id}/" }, + pattern: "*.tbi", + ] + } + + withName: '.*VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ mode: params.publish_dir_mode, @@ -293,8 +316,8 @@ process { ] } - withName: TABIX_TABIX { - ext.prefix = { "${meta.id}.snpeff" } + withName: 'TWISTCGP:TABIX_BGZIPTABIX' { + ext.prefix = { "${meta.id}.civic" } publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/${meta.id}/" }, diff --git a/conf/test.config b/conf/test.config index db3e5ea..a523d62 100644 --- a/conf/test.config +++ b/conf/test.config @@ -42,22 +42,15 @@ process { // FilterMutectCalls fails with the minimal test data ext.when = { false } } - withName: "TMB" { - // This process runs after SNPEFF_SNPEFF, which we are also skipping - ext.when = { false } - } - withName: "MSISENSOR2_MSI" { - // TODO: Create small test data to use in integration testing - ext.when = { false } - } - withName: "MSISENSORPRO_PRO" { - // This process fails with the empty output from MSISENSOR2_SCAN - // TODO: Create small test data to use in integration testing + withName: "CIVICPY_UPDATE_CACHE" { + // Downloading the CIViC cache is too slow for CI ext.when = { false } } } params { + skip_tmb = true + skip_msi = true config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' diff --git a/modules/local/civicpy/environment.yml b/modules/local/civicpy/annotate/environment.yml similarity index 100% rename from modules/local/civicpy/environment.yml rename to modules/local/civicpy/annotate/environment.yml diff --git a/modules/local/civicpy/main.nf b/modules/local/civicpy/annotate/main.nf similarity index 84% rename from modules/local/civicpy/main.nf rename to modules/local/civicpy/annotate/main.nf index 29125da..d3d2a6f 100644 --- a/modules/local/civicpy/main.nf +++ b/modules/local/civicpy/annotate/main.nf @@ -1,15 +1,16 @@ -process CIVICPY { +process CIVICPY_ANNOTATE_VCF { tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/civicpy:5.1.0--pyhdfd78af_0' - : 'docker.io/griffithlab/civicpy:5.1.0' }" + ? 'https://depot.galaxyproject.org/singularity/civicpy:5.2.0--pyhdfd78af_0' + : 'docker.io/griffithlab/civicpy:v5.2.0' }" input: tuple val(meta), path(vcf), path(tbi) val annotation_genome_version + path cache output: tuple val(meta), path("*.vcf"), emit: vcf @@ -23,12 +24,13 @@ process CIVICPY { def prefix = task.ext.prefix ?: "${meta.id}.civic" """ - export CIVICPY_CACHE_FILE=\$PWD/.civicpy + export CIVICPY_CACHE_FILE=\$PWD/${cache} civicpy annotate-vcf --input-vcf ${vcf} \\ --output-vcf ${prefix}.vcf \\ --reference ${annotation_genome_version} \\ - --include-status accepted + --include-status accepted \\ + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/civicpy/meta.yml b/modules/local/civicpy/annotate/meta.yml similarity index 100% rename from modules/local/civicpy/meta.yml rename to modules/local/civicpy/annotate/meta.yml diff --git a/modules/local/civicpy/update_cache/main.nf b/modules/local/civicpy/update_cache/main.nf new file mode 100644 index 0000000..9937a65 --- /dev/null +++ b/modules/local/civicpy/update_cache/main.nf @@ -0,0 +1,37 @@ +process CIVICPY_UPDATE_CACHE { + label 'process_single' + + conda "${moduleDir}/../annotate/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/civicpy:5.2.0--pyhdfd78af_0' + : 'docker.io/griffithlab/civicpy:v5.2.0' }" + + output: + path "civicpy_cache.pkl", emit: cache + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + export CIVICPY_CACHE_FILE=\$PWD/civicpy_cache.pkl + + civicpy update + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + civicpy: \$(civicpy --version | sed 's/.*version //') + END_VERSIONS + """ + + stub: + """ + touch civicpy_cache.pkl + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + civicpy: \$(civicpy --version | sed 's/.*version //') + END_VERSIONS + """ +} diff --git a/modules/local/civicpy/update_cache/meta.yml b/modules/local/civicpy/update_cache/meta.yml new file mode 100644 index 0000000..33bf7ea --- /dev/null +++ b/modules/local/civicpy/update_cache/meta.yml @@ -0,0 +1,38 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "civicpy_update_cache" + +description: Downloads the CIViC nightly cache for use with civicpy annotation +keywords: + - clinical interpretation + - variant annotation + - genomics +tools: + - "civicpy": + description: "CIViC variant knowledgebase analysis toolkit." + homepage: "https://docs.civicpy.org/en/latest/" + documentation: "https://docs.civicpy.org/en/latest/" + tool_dev_url: "https://github.com/griffithlab/civicpy" + doi: "10.1200/CCI.19.00127" + licence: ["MIT"] + identifier: biotools:CIViCpy + +input: [] + +output: + cache: + - "civicpy_cache.pkl": + type: file + description: CIViC database cache file + pattern: "*.pkl" + + versions_yml: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emmcauley" +maintainers: + - "@emmcauley" diff --git a/modules/local/tmb/main.nf b/modules/local/tmb/main.nf index 47a688a..4de019c 100644 --- a/modules/local/tmb/main.nf +++ b/modules/local/tmb/main.nf @@ -37,7 +37,7 @@ process TMB { > ${prefix}.tmb.log cat <<-END_VERSIONS > versions.yml "${task.process}": - tmb: \$(echo \$(pyTMB.py --version 2>&1) | sed 's/^.*pyTMB.py //; s/.*\$//' | sed 's|[()]||g') + tmb: \$(pyTMB.py --version 2>&1 | sed 's/pyTMB.py (//; s/)//') END_VERSIONS """ @@ -50,7 +50,7 @@ process TMB { cat <<-END_VERSIONS > versions.yml "${task.process}": - tmb: \$(echo \$(pyTMB.py --version 2>&1) | sed 's/^.*pyTMB.py //; s/.*\$//' | sed 's|[()]||g') + tmb: \$(pyTMB.py --version 2>&1 | sed 's/pyTMB.py (//; s/)//') END_VERSIONS """ } diff --git a/nextflow.config b/nextflow.config index df7792f..ab4bec4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,12 +60,15 @@ params { // TMB options tmb_popaf_cutoff = 3.0 - tmb_vaf_cutoff = 0.05 + tmb_vaf_cutoff = 0.10 tmb_vep_config = "${projectDir}/assets/pytmb_vep.yml" tmb_mutect2_config = "${projectDir}/assets/pytmb_mutect2.yml" - //CIVICpy + // Skip options skip_civicpy = false + skip_cnv = false + skip_msi = false + skip_tmb = false // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 95f886b..2c28995 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -171,6 +171,21 @@ "default": false, "description": "If true, skip the CIVICpy annotation step." }, + "skip_cnv": { + "type": "boolean", + "default": false, + "description": "If true, skip CNV calling with CNVkit." + }, + "skip_msi": { + "type": "boolean", + "default": false, + "description": "If true, skip microsatellite instability analysis (MSIsensor2 or MSIsensor-pro)." + }, + "skip_tmb": { + "type": "boolean", + "default": false, + "description": "If true, skip tumor mutational burden (TMB) calculation." + }, "tmb_mutect2_config": { "type": "string", "exists": true, @@ -189,9 +204,9 @@ }, "tmb_vaf_cutoff": { "type": "number", - "default": 0.05, + "default": 0.1, "description": "Minimum variant allele frequency (FORMAT/AF) for TMB pre-filtering.", - "help_text": "Used by bcftools view to pre-filter variants before TMB calculation. A value of 0.05 (5%) excludes very low frequency variants that may represent sequencing artifacts." + "help_text": "Used by bcftools view to pre-filter variants before TMB calculation. The Friends of Cancer Research TMB Harmonization Project recommends a minimum of 0.05 (5%). The default of 0.10 (10%) provides additional stringency to reduce sub-clonal noise in tumor-only analyses." } } }, diff --git a/workflows/twistcgp.nf b/workflows/twistcgp.nf index dcdb6c0..d9a17ae 100644 --- a/workflows/twistcgp.nf +++ b/workflows/twistcgp.nf @@ -5,7 +5,8 @@ */ include { ALIGNBAM } from '../modules/local/alignbam' include { BCFTOOLS_VIEW } from '../modules/nf-core/bcftools/view/main' -include { CIVICPY } from '../modules/local/civicpy/main' +include { CIVICPY_ANNOTATE_VCF } from '../modules/local/civicpy/annotate/main' +include { CIVICPY_UPDATE_CACHE } from '../modules/local/civicpy/update_cache/main' include { FASTP } from '../modules/nf-core/fastp/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { FGBIO_FASTQTOBAM } from '../modules/nf-core/fgbio/fastqtobam/main' @@ -20,7 +21,7 @@ include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/picard/collectmultiplemetrics' include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard/collecthsmetrics/main' include { PICARD_INTERVALLISTTOBED } from '../modules/local/picard/intervallisttobed' -include { TABIX_TABIX } from '../modules/nf-core/tabix/tabix' +include { TABIX_BGZIPTABIX } from '../modules/nf-core/tabix/bgziptabix' include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -161,89 +162,98 @@ workflow TWISTCGP { ch_multiqc_files = ch_multiqc_files.mix(VCF_ANNOTATE.out.reports) // - // MODULE: CIVICPY - CIVICPY(VCF_ANNOTATE.out.vcf_ann, params.annotation_genome_version) - ch_versions = ch_versions.mix(CIVICPY.out.versions.first()) + // MODULE: CIVICPY_UPDATE_CACHE and CIVICPY_ANNOTATE_VCF + if (!params.skip_civicpy) { + CIVICPY_UPDATE_CACHE() + CIVICPY_ANNOTATE_VCF(VCF_ANNOTATE.out.vcf_ann, params.annotation_genome_version, CIVICPY_UPDATE_CACHE.out.cache.collect()) + ch_versions = ch_versions.mix(CIVICPY_UPDATE_CACHE.out.versions.first()) + ch_versions = ch_versions.mix(CIVICPY_ANNOTATE_VCF.out.versions.first()) + } // - // MODULE: BCFTOOLS_VIEW (pre-filter for TMB) + // MODULE: BCFTOOLS_VIEW (pre-filter for TMB) and TMB // Excludes CIVIC-annotated cancer hotspots (if not --skip_civicpy); // applies quality/variant filters // - if (!params.skip_civicpy) { - TABIX_TABIX(CIVICPY.out.vcf) + if (!params.skip_tmb) { + if (!params.skip_civicpy) { + TABIX_BGZIPTABIX(CIVICPY_ANNOTATE_VCF.out.vcf) - ch_bcftools_in = CIVICPY.out.vcf - .join(TABIX_TABIX.out.tbi, by: 0) + ch_bcftools_in = TABIX_BGZIPTABIX.out.gz_tbi } else { ch_bcftools_in = VCF_ANNOTATE.out.vcf_ann } - BCFTOOLS_VIEW( - ch_bcftools_in, - [], // regions (unused) - targets[1], // targets BED file - [], // samples (unused) - ) - ch_pre_tmb_vcf_tbi = BCFTOOLS_VIEW.out.vcf - .join(BCFTOOLS_VIEW.out.tbi) + BCFTOOLS_VIEW( + ch_bcftools_in, + [], // regions (unused) + targets[1], // targets BED file + [], // samples (unused) + ) + ch_pre_tmb_vcf_tbi = BCFTOOLS_VIEW.out.vcf + .join(BCFTOOLS_VIEW.out.tbi) - // - // MODULE: TMB - // - TMB(ch_pre_tmb_vcf_tbi, targets, tmb_vep_config, tmb_mutect2_config) - ch_versions = ch_versions.mix(TMB.out.versions.first()) + // + // MODULE: TMB + // + TMB(ch_pre_tmb_vcf_tbi, targets, tmb_vep_config, tmb_mutect2_config) + ch_versions = ch_versions.mix(TMB.out.versions.first()) + } // // CNVKIT_BATCH // // Currently the pipeline does not support matched tumor-normal analysis, so an empty // list is supplied for the normal BAM. - baits_are_bed = baits[1].getExtension() == "bed" - if (!baits_are_bed) { - BAITS_TO_BED(baits) + if (!params.skip_cnv) { + baits_are_bed = baits[1].getExtension() == "bed" + if (!baits_are_bed) { + BAITS_TO_BED(baits) + } + ch_baits_bed = baits_are_bed ? baits : BAITS_TO_BED.out.bed.collect() + ch_cnv_bam_pair = PICARD_MARKDUPLICATES.out.bam.map { meta, bam -> tuple(meta, bam, []) } + CNVKIT_BATCH( + ch_cnv_bam_pair, + ch_fasta, + ch_fasta_fai, + ch_baits_bed, // note the process labels this "targets", however CNVkit documentation recommends using baits + tuple([], pon_cnn), // no metadata supplied for the optional panel of normal reference cnn file + false // boolean, true indicates no tumor sample, multiple normal samples, only output a PON reference + ) + ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions.first()) } - ch_baits_bed = baits_are_bed ? baits : BAITS_TO_BED.out.bed.collect() - ch_cnv_bam_pair = PICARD_MARKDUPLICATES.out.bam.map { meta, bam -> tuple(meta, bam, []) } - CNVKIT_BATCH( - ch_cnv_bam_pair, - ch_fasta, - ch_fasta_fai, - ch_baits_bed, // note the process labels this "targets", however CNVkit documentation recommends using baits - tuple([], pon_cnn), // no metadata supplied for the optional panel of normal reference cnn file - false // boolean, true indicates no tumor sample, multiple normal samples, only output a PON reference - ) - ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions.first()) // // MODULE: MSISENSOR2_MSI or MSISENSORPRO_PRO // // MSIsensor-pro is free for non-profit use but a license is required for commercial use // https://github.com/xjtu-omics/msisensor-pro/blob/master/docs/2_License.md - if (use_msi_pro) { - MSISENSORPRO_PRO( - ch_bam_and_index, - ch_msi_scan, - [[:], []], // fasta and fai are only required for CRAM format - [[:], []], - ) - ch_versions = ch_versions.mix(MSISENSORPRO_PRO.out.versions.first()) - } - else { - // Currently the pipeline does not support matched tumor-normal analysis, so an empty - // list is supplied for the normal BAM. No interval list is passed. - // An optional scan file can be provided via --msisensor_scan (e.g. for non-human panels). - ch_bam_for_msi = ch_bam_and_index.map { meta, bam, bai -> tuple(meta, bam, bai, [], [], []) } - ch_msi_scan_file = ch_msi_scan.map { _meta, scan -> scan } - GIT_CLONEMSISENSOR2MODEL(msi_sensor2_model_name) - ch_versions = ch_versions.mix(GIT_CLONEMSISENSOR2MODEL.out.versions.first()) - MSISENSOR2_MSI( - ch_bam_for_msi, - ch_msi_scan_file, - GIT_CLONEMSISENSOR2MODEL.out.model.collect(), - ) - ch_versions = ch_versions.mix(MSISENSOR2_MSI.out.versions.first()) + if (!params.skip_msi) { + if (use_msi_pro) { + MSISENSORPRO_PRO( + ch_bam_and_index, + ch_msi_scan, + [[:], []], // fasta and fai are only required for CRAM format + [[:], []], + ) + ch_versions = ch_versions.mix(MSISENSORPRO_PRO.out.versions.first()) + } + else { + // Currently the pipeline does not support matched tumor-normal analysis, so an empty + // list is supplied for the normal BAM. No interval list is passed. + // An optional scan file can be provided via --msisensor_scan (e.g. for non-human panels). + ch_bam_for_msi = ch_bam_and_index.map { meta, bam, bai -> tuple(meta, bam, bai, [], [], []) } + ch_msi_scan_file = ch_msi_scan.map { _meta, scan -> scan } + GIT_CLONEMSISENSOR2MODEL(msi_sensor2_model_name) + ch_versions = ch_versions.mix(GIT_CLONEMSISENSOR2MODEL.out.versions.first()) + MSISENSOR2_MSI( + ch_bam_for_msi, + ch_msi_scan_file, + GIT_CLONEMSISENSOR2MODEL.out.model.collect(), + ) + ch_versions = ch_versions.mix(MSISENSOR2_MSI.out.versions.first()) + } }