diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 1f1cbbb712..5d894e8bfd 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -39,6 +39,7 @@ import nextflow.config.ConfigBuilder import nextflow.config.ConfigMap import nextflow.config.ConfigValidator import nextflow.config.Manifest +import nextflow.config.SchemaParamsHelper import nextflow.exception.AbortOperationException import nextflow.file.FileHelper import nextflow.plugin.Plugins @@ -339,6 +340,8 @@ class CmdRun extends CmdBase implements HubOptions { // -- load command line params final baseDir = scriptFile.parent final cliParams = parsedParams(ConfigBuilder.getConfigVars(baseDir, null)) + // under v2 syntax parser, CLI args arrive as strings; coerce via nextflow_schema.json types if available + SchemaParamsHelper.applySchemaTypes(baseDir, cliParams) /* * 2-PHASE CONFIGURATION LOADING STRATEGY diff --git a/modules/nextflow/src/main/groovy/nextflow/config/SchemaParamsHelper.groovy b/modules/nextflow/src/main/groovy/nextflow/config/SchemaParamsHelper.groovy new file mode 100644 index 0000000000..d978746d67 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/config/SchemaParamsHelper.groovy @@ -0,0 +1,167 @@ +/* + * Copyright 2013-2026, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.config + +import java.nio.file.Files +import java.nio.file.Path + +import groovy.json.JsonSlurper +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.NF +import nextflow.SysEnv + +/** + * Coerces CLI param string values to typed values using a + * {@code nextflow_schema.json} file (JSON Schema, as used by nf-core + * pipelines) as a fallback type source. + * + * Under syntax parser v2, CLI params arrive as strings. When the pipeline + * has not declared typed params in main.nf or non-null defaults in + * nextflow.config, those strings stay strings -- and may break param logic + * that expects e.g. numeric comparison. If a {@code nextflow_schema.json} + * lives next to main.nf, this helper reads property types from it and + * coerces CLI values accordingly, giving the pipeline typed params for + * free without requiring main.nf changes. + * + * Coercion is best-effort and non-destructive: values that don't match a + * declared type are left as strings, and a missing or malformed schema + * is silently ignored. + * + * @author Phil Ewels + */ +@Slf4j +@CompileStatic +class SchemaParamsHelper { + + static final String SCHEMA_FILENAME = 'nextflow_schema.json' + + /** + * Apply schema-based type coercion in place on a CLI params map. + * + * @param baseDir pipeline project base directory (where main.nf lives) + * @param cliParams CLI params map; mutated in place + */ + static void applySchemaTypes(Path baseDir, Map cliParams) { + if( !cliParams || baseDir == null ) + return + if( !NF.isSyntaxParserV2() ) + return + if( SysEnv.get('NXF_DISABLE_PARAMS_TYPE_DETECTION') ) + return + + final schemaFile = baseDir.resolve(SCHEMA_FILENAME) + if( !Files.exists(schemaFile) ) + return + + final types = readSchemaTypes(schemaFile) + if( !types ) + return + + log.debug "Applying types from ${schemaFile} to ${cliParams.size()} CLI param(s) -- ${types.size()} param type(s) declared in schema" + coerceInPlace(cliParams, types) + } + + /** + * Parse a JSON schema file and return a map of {@code paramName -> jsonType} + * (e.g. {@code "integer"}, {@code "number"}, {@code "boolean"}). + */ + static Map readSchemaTypes(Path schemaFile) { + try { + final root = new JsonSlurper().parse(schemaFile) + final types = new LinkedHashMap() + collectProperties(root, types) + return types + } + catch( Exception e ) { + log.warn "Unable to parse ${schemaFile} for fallback param typing -- ${e.message}" + return Collections.emptyMap() + } + } + + /** + * Recursively walk a JSON Schema fragment, collecting top-level property + * names and their declared {@code type}. Handles nf-core-style schemas + * that nest properties under {@code definitions} or {@code $defs}, plus + * any {@code allOf}/{@code oneOf}/{@code anyOf} compositions. + */ + private static void collectProperties(Object node, Map types) { + if( node !instanceof Map ) + return + final map = (Map) node + + final props = map.get('properties') + if( props instanceof Map ) { + for( final entry in (Map) props ) { + final schemaMap = entry.value instanceof Map ? (Map) entry.value : null + if( schemaMap == null ) + continue + final type = schemaMap.get('type') + if( type instanceof String && !types.containsKey(entry.key) ) + types.put(entry.key, (String) type) + } + } + + for( final key in ['definitions', '$defs', 'allOf', 'oneOf', 'anyOf'] ) { + final sub = map.get(key) + final children = sub instanceof Map ? ((Map) sub).values() + : sub instanceof List ? (List) sub + : null + if( children == null ) + continue + for( final child in children ) + collectProperties(child, types) + } + } + + private static void coerceInPlace(Map params, Map types) { + for( final name : new ArrayList(params.keySet()) ) { + final value = params.get(name) + final coerced = coerceValue(value, types.get(name)) + // coerceValue returns the same reference when no coercion applied + if( coerced !== value ) + ((Map) params).put(name, coerced) + } + } + + private static Object coerceValue(Object value, String type) { + if( value !instanceof CharSequence ) + return value + if( !type ) + return value + final str = value.toString() + switch( type ) { + case 'boolean': + if( str.equalsIgnoreCase('true') ) return Boolean.TRUE + if( str.equalsIgnoreCase('false') ) return Boolean.FALSE + break + case 'integer': + if( str.isInteger() ) return str.toInteger() + if( str.isLong() ) return str.toLong() + if( str.isBigInteger() ) return str.toBigInteger() + break + case 'number': + if( str.isInteger() ) return str.toInteger() + if( str.isLong() ) return str.toLong() + if( str.isFloat() ) return str.toFloat() + if( str.isDouble() ) return str.toDouble() + if( str.isBigDecimal() ) return str.toBigDecimal() + break + } + return value + } +} diff --git a/modules/nextflow/src/test/groovy/nextflow/config/SchemaParamsHelperTest.groovy b/modules/nextflow/src/test/groovy/nextflow/config/SchemaParamsHelperTest.groovy new file mode 100644 index 0000000000..664b2bc154 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/config/SchemaParamsHelperTest.groovy @@ -0,0 +1,278 @@ +/* + * Copyright 2013-2026, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.config + +import java.nio.file.Files +import java.nio.file.Path + +import nextflow.SysEnv +import spock.lang.Specification + +/** + * @author Phil Ewels + */ +class SchemaParamsHelperTest extends Specification { + + Path tempDir + + def setup() { + tempDir = Files.createTempDirectory('nf-schema-test') + SysEnv.push([:]) + } + + def cleanup() { + SysEnv.pop() + tempDir.toFile().deleteDir() + } + + private Path writeSchema(String content) { + final f = tempDir.resolve('nextflow_schema.json') + f.text = content + return f + } + + def 'reads top-level types from a flat schema'() { + given: + writeSchema(''' + { + "properties": { + "input": { "type": "string" }, + "cpus": { "type": "integer" }, + "ratio": { "type": "number" }, + "skip": { "type": "boolean" } + } + } + '''.stripIndent()) + + when: + def types = SchemaParamsHelper.readSchemaTypes(tempDir.resolve('nextflow_schema.json')) + + then: + types == [input: 'string', cpus: 'integer', ratio: 'number', skip: 'boolean'] + } + + def 'reads types nested under definitions / $defs / allOf'() { + given: + writeSchema(''' + { + "allOf": [ + { "$ref": "#/$defs/group_one" } + ], + "definitions": { + "group_one": { + "properties": { + "input": { "type": "string" }, + "max_cpus":{ "type": "integer" } + } + } + }, + "$defs": { + "group_two": { + "properties": { + "skip_qc": { "type": "boolean" } + } + } + } + } + '''.stripIndent()) + + when: + def types = SchemaParamsHelper.readSchemaTypes(tempDir.resolve('nextflow_schema.json')) + + then: + types == [input: 'string', max_cpus: 'integer', skip_qc: 'boolean'] + } + + def 'coerces CLI string params to schema-declared types'() { + given: + writeSchema(''' + { + "properties": { + "input": { "type": "string" }, + "cpus": { "type": "integer" }, + "ratio": { "type": "number" }, + "skip": { "type": "boolean" } + } + } + '''.stripIndent()) + def cli = [input: 'data.csv', cpus: '8', ratio: '0.5', skip: 'true', extra: 'hello'] + + when: + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + + then: + cli.input == 'data.csv' + cli.cpus == 8 + cli.cpus instanceof Integer + cli.ratio == 0.5f + cli.ratio instanceof Float + cli.skip == Boolean.TRUE + cli.extra == 'hello' // not in schema -- left untouched + } + + def 'leaves un-coercible values as strings'() { + given: + writeSchema(''' + { + "properties": { + "cpus": { "type": "integer" }, + "skip": { "type": "boolean" } + } + } + '''.stripIndent()) + def cli = [cpus: 'abc', skip: 'maybe'] + + when: + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + + then: + cli.cpus == 'abc' + cli.skip == 'maybe' + } + + def 'does nothing when schema file is missing'() { + given: + def cli = [cpus: '8'] + + when: + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + + then: + cli.cpus == '8' + } + + def 'does nothing when schema is malformed'() { + given: + writeSchema('{ broken') + def cli = [cpus: '8'] + + when: + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + + then: + cli.cpus == '8' + } + + def 'is a no-op when params type detection is disabled'() { + given: + writeSchema('{ "properties": { "cpus": { "type": "integer" } } }') + SysEnv.push(NXF_DISABLE_PARAMS_TYPE_DETECTION: 'true') + def cli = [cpus: '8'] + + when: + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + + then: + cli.cpus == '8' + + cleanup: + SysEnv.pop() + } + + def 'is a no-op under syntax parser v1'() { + given: + writeSchema('{ "properties": { "cpus": { "type": "integer" } } }') + SysEnv.push(NXF_SYNTAX_PARSER: 'v1') + def cli = [cpus: '8'] + + when: + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + + then: + cli.cpus == '8' + + cleanup: + SysEnv.pop() + } + + def 'ignores schema properties that have no type field'() { + given: + writeSchema(''' + { + "properties": { + "input": { "description": "no type given here" }, + "cpus": { "type": "integer" } + } + } + '''.stripIndent()) + def cli = [input: '42', cpus: '8'] + + when: + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + + then: + cli.input == '42' // schema didn't declare a type -- left as string + cli.cpus == 8 + } + + def 'first declaration wins when a property is repeated across definitions'() { + given: + writeSchema(''' + { + "definitions": { + "first": { + "properties": { "size": { "type": "integer" } } + }, + "second": { + "properties": { "size": { "type": "string" } } + } + } + } + '''.stripIndent()) + + when: + def types = SchemaParamsHelper.readSchemaTypes(tempDir.resolve('nextflow_schema.json')) + + then: + types == [size: 'integer'] + } + + def 'coerces booleans regardless of letter casing'() { + given: + writeSchema('{ "properties": { "flag": { "type": "boolean" } } }') + + expect: + coerceFlag(input) == expected + + where: + input | expected + 'TRUE' | Boolean.TRUE + 'False' | Boolean.FALSE + 'true' | Boolean.TRUE + 'false' | Boolean.FALSE + } + + private Object coerceFlag(String input) { + def cli = [flag: input] + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + return cli.flag + } + + def 'preserves non-string values already typed by upstream parsing'() { + given: + writeSchema('{ "properties": { "cpus": { "type": "integer" } } }') + // value supplied as Integer (e.g. from a JSON params file) is left alone + def cli = [cpus: 16] as Map + + when: + SchemaParamsHelper.applySchemaTypes(tempDir, cli) + + then: + cli.cpus == 16 + cli.cpus instanceof Integer + } +}