diff --git a/modules/nextflow/build.gradle b/modules/nextflow/build.gradle index de5240774c..4a19c65843 100644 --- a/modules/nextflow/build.gradle +++ b/modules/nextflow/build.gradle @@ -73,6 +73,7 @@ dependencies { api 'org.apache.commons:commons-compress:1.27.1' // For tar.gz extraction api 'io.seqera:npr-api:0.22.0' api 'io.seqera:npr-client:0.22.0' + api 'com.networknt:json-schema-validator:1.5.6' testImplementation 'org.subethamail:subethasmtp:3.1.7' testImplementation (project(':nf-lineage')) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/module/CmdModuleValidate.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/module/CmdModuleValidate.groovy index 3802a9ac2e..4556cba5cc 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/module/CmdModuleValidate.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/module/CmdModuleValidate.groovy @@ -26,6 +26,7 @@ import groovy.util.logging.Slf4j import nextflow.cli.CmdBase import nextflow.exception.AbortOperationException import nextflow.module.ModuleReference +import nextflow.module.ModuleSchemaValidator import nextflow.module.ModuleStorage import nextflow.module.ModuleValidator import nextflow.util.TestOnly @@ -43,6 +44,9 @@ class CmdModuleValidate extends CmdBase { @Parameter(description = "[namespace/name or path]", required = true) List args + @Parameter(names = '--schema', description = 'URL or local path of the JSON schema used to validate meta.yml') + String schema + @TestOnly protected Path root @@ -57,7 +61,8 @@ class CmdModuleValidate extends CmdBase { throw new AbortOperationException("Incorrect number of arguments -- usage: nextflow module validate ") final moduleDir = determineModuleDir(args[0]) - final errors = ModuleValidator.validate(moduleDir) + final schemaLocation = schema ?: ModuleSchemaValidator.DEFAULT_SCHEMA_URL + final errors = ModuleValidator.validate(moduleDir, schemaLocation) if( errors ) { throw new AbortOperationException( diff --git a/modules/nextflow/src/main/groovy/nextflow/module/ModuleSchemaValidator.groovy b/modules/nextflow/src/main/groovy/nextflow/module/ModuleSchemaValidator.groovy new file mode 100644 index 0000000000..60dea2d737 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/module/ModuleSchemaValidator.groovy @@ -0,0 +1,134 @@ +/* + * Copyright 2013-2026, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.module + +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.Paths + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.ObjectMapper +import com.networknt.schema.JsonSchema +import com.networknt.schema.JsonSchemaFactory +import com.networknt.schema.SpecVersion +import com.networknt.schema.SpecVersionDetector +import com.networknt.schema.ValidationMessage +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.exception.AbortOperationException +import org.yaml.snakeyaml.Yaml + +/** + * Validates a module spec (meta.yml) against the Nextflow module JSON schema. + * + * @author Paolo Di Tommaso + */ +@Slf4j +@CompileStatic +class ModuleSchemaValidator { + + static final String DEFAULT_SCHEMA_URL = + 'https://raw.githubusercontent.com/nextflow-io/schemas/refs/heads/main/module/v1/schema.json' + + private static final ObjectMapper JSON_MAPPER = new ObjectMapper() + + /** + * Validate a meta.yml file against the JSON schema located at the given + * URL or local file path. + * + * @param metaYaml Path to the meta.yml file to validate + * @param schemaLocation URL (http/https), file: URI, or local file path of the schema + * @return List of validation error messages, empty if the spec is valid + */ + static List validate(Path metaYaml, String schemaLocation) { + final schemaNode = parseSchema(loadSchema(schemaLocation), schemaLocation) + final specVersion = detectSpecVersion(schemaNode, schemaLocation) + final schema = buildSchema(schemaNode, specVersion, schemaLocation) + final metaNode = loadMeta(metaYaml) + final Set messages = schema.validate(metaNode) + return messages.collect { it.message }.toList() + } + + static List validate(Path metaYaml) { + return validate(metaYaml, DEFAULT_SCHEMA_URL) + } + + private static JsonNode parseSchema(String schemaText, String schemaLocation) { + try { + return JSON_MAPPER.readTree(schemaText) + } + catch( Exception e ) { + throw new AbortOperationException("Invalid module schema at '${schemaLocation}': ${e.message}", e) + } + } + + private static SpecVersion.VersionFlag detectSpecVersion(JsonNode schemaNode, String schemaLocation) { + try { + return SpecVersionDetector.detect(schemaNode) + } + catch( Exception e ) { + throw new AbortOperationException( + "Cannot determine JSON Schema draft for '${schemaLocation}': ${e.message}. " + + "The schema must declare a supported \$schema (e.g. https://json-schema.org/draft/2020-12/schema).", e) + } + } + + private static JsonSchema buildSchema(JsonNode schemaNode, SpecVersion.VersionFlag specVersion, String schemaLocation) { + try { + return JsonSchemaFactory.getInstance(specVersion).getSchema(schemaNode) + } + catch( Exception e ) { + throw new AbortOperationException("Invalid module schema at '${schemaLocation}': ${e.message}", e) + } + } + + private static JsonNode loadMeta(Path metaYaml) { + try( final stream = Files.newInputStream(metaYaml) ) { + return JSON_MAPPER.valueToTree(new Yaml().load(stream)) + } + catch( Exception e ) { + throw new AbortOperationException("Failed to read module spec '${metaYaml}': ${e.message}", e) + } + } + + /** + * Load the JSON schema text from a remote URL, file: URI, or local file path. + * Hard-fails with AbortOperationException on any I/O error. + */ + private static String loadSchema(String location) { + try { + if( location.startsWith('http://') || location.startsWith('https://') ) { + final url = new URL(location) + final conn = url.openConnection() + conn.setConnectTimeout(10_000) + conn.setReadTimeout(20_000) + try( final stream = conn.getInputStream() ) { + return new String(stream.readAllBytes(), 'UTF-8') + } + } + if( location.startsWith('file:') ) { + return Files.readString(Paths.get(URI.create(location))) + } + return Files.readString(Paths.get(location)) + } + catch( Exception e ) { + throw new AbortOperationException( + "Failed to load module schema from '${location}': ${e.message}. " + + "Pass --schema to override.", e) + } + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/module/ModuleSpec.groovy b/modules/nextflow/src/main/groovy/nextflow/module/ModuleSpec.groovy index abd9e10c50..aa7a863cc6 100644 --- a/modules/nextflow/src/main/groovy/nextflow/module/ModuleSpec.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/module/ModuleSpec.groovy @@ -71,22 +71,17 @@ class ModuleSpec { Map _passthrough /** - * Validate the module spec for required fields + * Validate Nextflow-specific module spec rules that are not expressed by + * the JSON schema (see ModuleSchemaValidator). * * @return List of validation errors (empty if valid) */ List validate() { final List errors = [] - if( !name ) - errors << "Missing required field: name" - if( !version ) errors << "Missing required field: version" - if( !description ) - errors << "Missing required field: description" - if( !license ) errors << "Missing required field: license" @@ -123,11 +118,8 @@ class ModuleSpec { return } - if( !param.type || param.type == TODO_TYPE ) - errors << "Missing type for ${name}${param.name ? " ($param.name)" : ''}".toString() - - if( !param.description || param.description == TODO_DESCRIPTION ) - errors << "Missing description for ${name}${param.name ? " ($param.name)" : ''}".toString() + if( param.description == TODO_DESCRIPTION ) + errors << "Placeholder description for ${name}${param.name ? " ($param.name)" : ''}".toString() } /** @@ -169,7 +161,7 @@ class ModuleSpec { */ Map asMap() { final result = new LinkedHashMap() - result['$schema'] = 'https://raw.githubusercontent.com/nextflow-io/schemas/refs/heads/main/module/v1/schema.json' + result['$schema'] = ModuleSchemaValidator.DEFAULT_SCHEMA_URL if( name ) result['name'] = name if( version ) diff --git a/modules/nextflow/src/main/groovy/nextflow/module/ModuleValidator.groovy b/modules/nextflow/src/main/groovy/nextflow/module/ModuleValidator.groovy index 0cdf5c4eef..d3601c2969 100644 --- a/modules/nextflow/src/main/groovy/nextflow/module/ModuleValidator.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/module/ModuleValidator.groovy @@ -37,8 +37,9 @@ class ModuleValidator { * An empty list means the module is valid. * * @param moduleDir + * @param schemaLocation URL or local path of the JSON schema used to validate meta.yml */ - static List validate(Path moduleDir) { + static List validate(Path moduleDir, String schemaLocation) { final errors = new ArrayList() // Level 1: validate module structure @@ -46,8 +47,13 @@ class ModuleValidator { if( errors ) return errors // can't proceed without required files - // Level 2: validate module spec (meta.yml) + // Level 2a: validate module spec (meta.yml) against the JSON schema final manifestPath = moduleDir.resolve(ModuleStorage.MODULE_MANIFEST_FILE) + errors.addAll(ModuleSchemaValidator.validate(manifestPath, schemaLocation)) + if( errors ) + return errors + + // Level 2b: validate Nextflow-specific rules not expressed by the schema final spec = ModuleSpecFactory.fromYaml(manifestPath) errors.addAll(spec.validate()) if( errors ) @@ -61,6 +67,10 @@ class ModuleValidator { return errors } + static List validate(Path moduleDir) { + return validate(moduleDir, ModuleSchemaValidator.DEFAULT_SCHEMA_URL) + } + /** * Check that required files exist. * diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/module/CmdModuleValidateTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/module/CmdModuleValidateTest.groovy index 5877edbc70..b2b064dc45 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/module/CmdModuleValidateTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/module/CmdModuleValidateTest.groovy @@ -32,6 +32,42 @@ class CmdModuleValidateTest extends Specification { @TempDir Path tempDir + private static final String SCHEMA_JSON = '''\ + { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "name": { "type": "string" }, + "version": { "type": "string" }, + "description": { "type": "string" }, + "license": { "type": "string" }, + "input": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "type": { + "type": "string", + "enum": ["boolean", "float", "integer", "string", "list", "map", "file", "directory"] + }, + "description": { "type": "string" } + }, + "required": ["type", "description"] + } + } + }, + "required": ["name", "description"] + } + '''.stripIndent() + + private Path schemaPath() { + final p = tempDir.resolve('schema.json') + if( !Files.exists(p) ) + Files.writeString(p, SCHEMA_JSON) + return p + } + private Path createValidModule(String namespace='myorg', String name='hello') { def moduleDir = tempDir.resolve("modules/$namespace/$name") Files.createDirectories(moduleDir) @@ -73,7 +109,7 @@ class CmdModuleValidateTest extends Specification { def moduleDir = createValidModule() when: - def errors = ModuleValidator.validate(moduleDir) + def errors = ModuleValidator.validate(moduleDir, schemaPath().toString()) then: errors.isEmpty() @@ -85,7 +121,7 @@ class CmdModuleValidateTest extends Specification { Files.delete(moduleDir.resolve('main.nf')) when: - def errors = ModuleValidator.validate(moduleDir) + def errors = ModuleValidator.validate(moduleDir, schemaPath().toString()) then: errors.any { it.contains('main.nf') } @@ -97,7 +133,7 @@ class CmdModuleValidateTest extends Specification { Files.delete(moduleDir.resolve('meta.yml')) when: - def errors = ModuleValidator.validate(moduleDir) + def errors = ModuleValidator.validate(moduleDir, schemaPath().toString()) then: errors.any { it.contains('meta.yml') } @@ -109,13 +145,13 @@ class CmdModuleValidateTest extends Specification { Files.delete(moduleDir.resolve('README.md')) when: - def errors = ModuleValidator.validate(moduleDir) + def errors = ModuleValidator.validate(moduleDir, schemaPath().toString()) then: errors.any { it.contains('README.md') } } - def 'should fail when meta.yml has missing required fields'() { + def 'should fail when meta.yml is missing schema-required fields'() { given: def moduleDir = createValidModule() moduleDir.resolve('meta.yml').text = '''\ @@ -124,10 +160,31 @@ class CmdModuleValidateTest extends Specification { '''.stripIndent() when: - def errors = ModuleValidator.validate(moduleDir) + def errors = ModuleValidator.validate(moduleDir, schemaPath().toString()) then: + // schema-level validation runs first; reports missing required `description` errors.any { it.contains('description') } + } + + def 'should fail when meta.yml is missing nextflow-only fields'() { + given: + def moduleDir = createValidModule() + moduleDir.resolve('meta.yml').text = '''\ + name: myorg/hello + description: A test module + input: + - name: greeting + type: string + description: A greeting string + '''.stripIndent() + + when: + def errors = ModuleValidator.validate(moduleDir, schemaPath().toString()) + + then: + // schema passes, then ModuleSpec.validate() reports missing version + license + errors.any { it.contains('version') } errors.any { it.contains('license') } } @@ -142,7 +199,7 @@ class CmdModuleValidateTest extends Specification { '''.stripIndent() when: - def errors = ModuleValidator.validate(moduleDir) + def errors = ModuleValidator.validate(moduleDir, schemaPath().toString()) then: errors.any { it.contains('version') } diff --git a/modules/nextflow/src/test/groovy/nextflow/module/ModuleSchemaValidatorTest.groovy b/modules/nextflow/src/test/groovy/nextflow/module/ModuleSchemaValidatorTest.groovy new file mode 100644 index 0000000000..318e1d6307 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/module/ModuleSchemaValidatorTest.groovy @@ -0,0 +1,167 @@ +/* + * Copyright 2013-2026, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.module + +import java.nio.file.Files +import java.nio.file.Path + +import nextflow.exception.AbortOperationException +import spock.lang.Specification +import spock.lang.TempDir + +/** + * Tests for ModuleSchemaValidator. + * + * @author Paolo Di Tommaso + */ +class ModuleSchemaValidatorTest extends Specification { + + @TempDir + Path tempDir + + private static final String MINIMAL_SCHEMA = '''\ + { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "name": { "type": "string" }, + "description": { "type": "string" }, + "input": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { "type": "string", "enum": ["string", "file", "directory"] }, + "description": { "type": "string" } + }, + "required": ["type", "description"] + } + } + }, + "required": ["name", "description"] + } + '''.stripIndent() + + private Path writeSchema(String text = MINIMAL_SCHEMA) { + final p = tempDir.resolve('schema.json') + Files.writeString(p, text) + return p + } + + private Path writeMeta(String yaml) { + final p = tempDir.resolve('meta.yml') + Files.writeString(p, yaml) + return p + } + + def 'should pass validation when meta.yml satisfies the schema' () { + given: + def schema = writeSchema() + def meta = writeMeta('''\ + name: nf-core/fastqc + description: Run FastQC + '''.stripIndent()) + + when: + def errors = ModuleSchemaValidator.validate(meta, schema.toString()) + + then: + errors.isEmpty() + } + + def 'should report missing required fields against the schema' () { + given: + def schema = writeSchema() + def meta = writeMeta('''\ + name: nf-core/fastqc + '''.stripIndent()) + + when: + def errors = ModuleSchemaValidator.validate(meta, schema.toString()) + + then: + !errors.isEmpty() + errors.any { it.contains('description') } + } + + def 'should report invalid input type against the schema enum' () { + given: + def schema = writeSchema() + def meta = writeMeta('''\ + name: nf-core/fastqc + description: Run FastQC + input: + - name: reads + type: bogus + description: input reads + '''.stripIndent()) + + when: + def errors = ModuleSchemaValidator.validate(meta, schema.toString()) + + then: + !errors.isEmpty() + errors.any { it.toLowerCase().contains('type') || it.toLowerCase().contains('enum') } + } + + def 'should accept a file: URI for the schema location' () { + given: + def schema = writeSchema() + def meta = writeMeta('''\ + name: nf-core/fastqc + description: Run FastQC + '''.stripIndent()) + + when: + def errors = ModuleSchemaValidator.validate(meta, schema.toUri().toString()) + + then: + errors.isEmpty() + } + + def 'should hard-fail when the schema cannot be loaded' () { + given: + def meta = writeMeta('name: x\ndescription: y\n') + + when: + ModuleSchemaValidator.validate(meta, tempDir.resolve('does-not-exist.json').toString()) + + then: + def e = thrown(AbortOperationException) + e.message.contains('Failed to load module schema') + } + + def 'should hard-fail when the schema does not declare a supported draft' () { + given: + def schema = writeSchema('''\ + { + "type": "object", + "properties": { + "name": { "type": "string" } + } + } + '''.stripIndent()) + def meta = writeMeta('name: x\n') + + when: + ModuleSchemaValidator.validate(meta, schema.toString()) + + then: + def e = thrown(AbortOperationException) + e.message.contains('Cannot determine JSON Schema draft') + } +} diff --git a/modules/nextflow/src/test/groovy/nextflow/module/ModuleSpecFactoryTest.groovy b/modules/nextflow/src/test/groovy/nextflow/module/ModuleSpecFactoryTest.groovy index c538586706..67dada8269 100644 --- a/modules/nextflow/src/test/groovy/nextflow/module/ModuleSpecFactoryTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/module/ModuleSpecFactoryTest.groovy @@ -359,7 +359,6 @@ class ModuleSpecFactoryTest extends Specification { !parsed.containsKey('version') yaml.contains('# TODO:') yaml.contains('Missing required field: version') - yaml.contains('Missing required field: description') yaml.contains('Missing required field: license') and: diff --git a/modules/nextflow/src/test/groovy/nextflow/module/ModuleSpecTest.groovy b/modules/nextflow/src/test/groovy/nextflow/module/ModuleSpecTest.groovy index 5262e53b78..17909f68aa 100644 --- a/modules/nextflow/src/test/groovy/nextflow/module/ModuleSpecTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/module/ModuleSpecTest.groovy @@ -53,20 +53,19 @@ class ModuleSpecTest extends Specification { spec.isValid() } - def 'should detect missing required fields' () { + def 'should detect missing required fields not covered by schema' () { given: def spec = new ModuleSpec( name: 'nf-core/fastqc' - // missing version, description, license + // missing version and license — description is checked by the JSON schema ) when: def errors = spec.validate() then: - errors.size() == 3 + errors.size() == 2 errors.any { it.contains('version') } - errors.any { it.contains('description') } errors.any { it.contains('license') } !spec.isValid() } @@ -145,7 +144,7 @@ class ModuleSpecTest extends Specification { def 'should render TODO list for missing required fields'() { given: def spec = new ModuleSpec(name: 'my-namespace/fastqc') - // version, description, license are all missing + // version and license are missing (description is validated by the JSON schema) when: def yaml = spec.toYaml() @@ -153,7 +152,6 @@ class ModuleSpecTest extends Specification { then: yaml.contains('# TODO:') yaml.contains('Missing required field: version') - yaml.contains('Missing required field: description') yaml.contains('Missing required field: license') }