From 9d6c826ae5594a2ff9a1cfb2ab11cbdcffd32d7a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 25 Sep 2024 13:37:37 -0500 Subject: [PATCH 01/28] Update docs and tests with strict syntax Signed-off-by: Ben Sherman --- docs/channel.md | 2 +- docs/process.md | 16 ++--- docs/reference/channel.md | 10 +-- docs/reference/operator.md | 14 ++--- docs/reference/process.md | 8 +-- docs/script.md | 62 +++++++++++-------- docs/snippets/branch-criteria.nf | 14 ++--- docs/snippets/branch-with-fallback.nf | 12 ++-- docs/snippets/branch-with-mapper.nf | 16 ++--- docs/snippets/branch.nf | 10 +-- docs/snippets/buffer-with-closing.nf | 2 +- docs/snippets/collect-with-mapper.nf | 2 +- docs/snippets/collectfile-closure.nf | 6 +- docs/snippets/collectfile.nf | 6 +- docs/snippets/count-with-filter-closure.nf | 2 +- docs/snippets/cross-with-mapper.nf | 2 +- docs/snippets/distinct-with-mapper.nf | 2 +- docs/snippets/dump.nf | 4 +- docs/snippets/filter-closure.nf | 2 +- docs/snippets/first.nf | 2 +- docs/snippets/flatmap-map.nf | 2 +- docs/snippets/map.nf | 2 +- docs/snippets/max-with-comparator.nf | 2 +- docs/snippets/max-with-mapper.nf | 2 +- docs/snippets/max.nf | 2 +- docs/snippets/min-with-comparator.nf | 2 +- docs/snippets/min-with-mapper.nf | 2 +- docs/snippets/min.nf | 2 +- docs/snippets/multimap-criteria.nf | 6 +- docs/snippets/multimap-shared.nf | 6 +- docs/snippets/multimap.nf | 10 +-- docs/snippets/process-out-env.nf | 2 +- docs/snippets/process-stdout.nf | 2 +- docs/snippets/reduce-with-initial-value.nf | 8 +-- docs/snippets/reduce.nf | 2 +- docs/snippets/splitjson-array.nf | 2 +- docs/snippets/splitjson-object.nf | 2 +- docs/snippets/splitjson-with-path.nf | 2 +- docs/snippets/subscribe-with-on-complete.nf | 2 +- docs/snippets/subscribe-with-param.nf | 4 +- docs/snippets/subscribe.nf | 2 +- docs/snippets/sum-with-mapper.nf | 4 +- docs/snippets/sum.nf | 2 +- docs/snippets/tap.nf | 10 +-- docs/snippets/unique-with-mapper.nf | 2 +- docs/snippets/until.nf | 2 +- docs/snippets/view-with-mapper.nf | 2 +- docs/snippets/your-first-script.nf | 2 +- docs/workflow.md | 6 +- tests/blast-dsl2.nf | 18 +++--- tests/blast-parallel-dsl2.nf | 31 ++++++---- tests/collect_and_merge.nf | 2 +- tests/complex-names-dsl2.nf | 8 +-- ...included => config-labels-included.config} | 0 tests/config-labels.config | 2 +- tests/config-labels.nf | 16 ++--- tests/dynamic-filename.nf | 5 +- tests/env-out.nf | 4 +- tests/env2.nf | 2 +- tests/error-finish.nf | 11 ++-- tests/files.nf | 3 +- tests/output-dsl.nf | 4 ++ tests/output-globs.nf | 24 +++---- tests/output-val-dsl2.nf | 4 +- tests/profiles.config | 3 +- tests/publish-saveas.nf | 4 +- tests/race.nf | 4 +- tests/rnaseq-toy-dsl2.nf | 7 +-- tests/sets.nf | 12 ++-- tests/singleton.nf | 2 +- tests/subworkflow-dsl2.nf | 5 +- tests/task-escape-path-dsl2.nf | 20 +++--- tests/task-retry.nf | 6 +- tests/template-dyn.nf | 4 +- tests/tuples-dsl2.nf | 24 ++++--- tests/watch-dsl2.nf | 16 +++-- tests/when-block.nf | 11 ++-- tests/workdir-with-blank.nf | 2 +- validation/test-complexpaths.nf | 10 +-- validation/test-overwrite.nf | 2 +- 80 files changed, 291 insertions(+), 265 deletions(-) rename tests/{config-labels.included => config-labels-included.config} (100%) diff --git a/docs/channel.md b/docs/channel.md index 83baca700c..f63d123abc 100644 --- a/docs/channel.md +++ b/docs/channel.md @@ -52,7 +52,7 @@ process foo { workflow { result = foo(1) - result.view { "Result: ${it}" } + result.view { txt -> "Result: ${txt}" } } ``` diff --git a/docs/process.md b/docs/process.md index d89f3b62e1..de5f1986d3 100644 --- a/docs/process.md +++ b/docs/process.md @@ -584,7 +584,7 @@ The `env` qualifier allows you to define an environment variable in the process ```groovy process printEnv { input: - env HELLO + env 'HELLO' ''' echo $HELLO world! @@ -619,7 +619,7 @@ process printAll { workflow { Channel.of('hello', 'hola', 'bonjour', 'ciao') - | map { it + '\n' } + | map { v -> v + '\n' } | printAll } ``` @@ -841,7 +841,7 @@ workflow { methods = ['prot', 'dna', 'rna'] receiver = foo(methods) - receiver.view { "Received: $it" } + receiver.view { method -> "Received: $method" } } ``` @@ -868,9 +868,9 @@ workflow { ch_dummy = Channel.fromPath('*').first() (ch_var, ch_str, ch_exp) = foo(ch_dummy) - ch_var.view { "ch_var: $it" } - ch_str.view { "ch_str: $it" } - ch_exp.view { "ch_exp: $it" } + ch_var.view { var -> "ch_var: $var" } + ch_str.view { str -> "ch_str: $str" } + ch_exp.view { exp -> "ch_exp: $exp" } } ``` @@ -890,7 +890,7 @@ process randomNum { workflow { numbers = randomNum() - numbers.view { "Received: ${it.text}" } + numbers.view { v -> "Received: ${v.text}" } } ``` @@ -931,7 +931,7 @@ process splitLetters { workflow { splitLetters | flatten - | view { "File: ${it.name} => ${it.text}" } + | view { chunk -> "File: ${chunk.name} => ${chunk.text}" } } ``` diff --git a/docs/reference/channel.md b/docs/reference/channel.md index c43cac2d2e..1f6d9e9ed4 100644 --- a/docs/reference/channel.md +++ b/docs/reference/channel.md @@ -22,7 +22,7 @@ The `channel.from` method allows you to create a channel emitting any sequence o ```groovy ch = channel.from( 1, 3, 5, 7 ) -ch.subscribe { println "value: $it" } +ch.subscribe { v -> println "value: $v" } ``` The first line in this example creates a variable `ch` which holds a channel object. This channel emits the values specified as a parameter in the `from` method. Thus the second line will print the following: @@ -70,7 +70,7 @@ The `channel.fromList` method allows you to create a channel emitting the values ```groovy channel .fromList( ['a', 'b', 'c', 'd'] ) - .view { "value: $it" } + .view { v -> "value: $v" } ``` Prints: @@ -350,7 +350,7 @@ The `channel.of` method allows you to create a channel that emits the arguments ```groovy ch = channel.of( 1, 3, 5, 7 ) -ch.view { "value: $it" } +ch.view { v -> "value: $v" } ``` The first line in this example creates a variable `ch` which holds a channel object. This channel emits the arguments @@ -461,7 +461,7 @@ For example: ```groovy channel .watchPath( '/path/*.fa' ) - .subscribe { println "Fasta file: $it" } + .subscribe { fa -> println "Fasta file: $fa" } ``` By default it watches only for new files created in the specified folder. Optionally, it is possible to provide a second @@ -476,7 +476,7 @@ You can specify more than one of these events by using a comma separated string ```groovy channel .watchPath( '/path/*.fa', 'create,modify' ) - .subscribe { println "File created or modified: $it" } + .subscribe { fa -> println "File created or modified: $fa" } ``` :::{warning} diff --git a/docs/reference/operator.md b/docs/reference/operator.md index 4cf6d3973f..36f8f0d794 100644 --- a/docs/reference/operator.md +++ b/docs/reference/operator.md @@ -253,10 +253,10 @@ The following example shows how to use a closure to collect and sort all sequenc Channel .fromPath('/data/sequences.fa') .splitFasta( record: [id: true, sequence: true] ) - .collectFile( name: 'result.fa', sort: { it.size() } ) { - it.sequence + .collectFile( name: 'result.fa', sort: { v -> v.size() } ) { + v -> v.sequence } - .view { it.text } + .view { v -> v.text } ``` :::{warning} @@ -1342,8 +1342,8 @@ The `by` option can be used to emit chunks of *N* lines: Channel .fromPath('/some/path/*.txt') .splitText( by: 10 ) - .subscribe { - print it; + .subscribe { chunk -> + print chunk print "--- end of the chunk ---\n" } ``` @@ -1353,7 +1353,7 @@ An optional {ref}`closure ` can be used to transform each text c ```groovy Channel .fromPath('/some/path/*.txt') - .splitText( by: 10 ) { it.toUpperCase() } + .splitText( by: 10 ) { v -> v.toUpperCase() } .view() ``` @@ -1518,7 +1518,7 @@ The `toInteger` operator converts string values from a source channel to integer `toInteger` is equivalent to: ```groovy -map { it -> it as Integer } +map { v -> v as Integer } ``` ::: diff --git a/docs/reference/process.md b/docs/reference/process.md index 5a7193e088..b66f2f31fe 100644 --- a/docs/reference/process.md +++ b/docs/reference/process.md @@ -72,9 +72,9 @@ Additionally, the [directive values](#directives) for the given task can be acce `stageAs` : Alias of `name`. -`env( identifier )` +`env( name )` -: Declare an environment variable input. The received value should be a string, and it will be exported to the task environment as an environment variable given by `identifier`. +: Declare an environment variable input. The received value should be a string, and it will be exported to the task environment as an environment variable given by `name`. `stdin` @@ -139,9 +139,9 @@ Additionally, the [directive values](#directives) for the given task can be acce `type` : Type of paths returned, either `file`, `dir` or `any` (default: `any`, or `file` if the specified file name pattern contains a double star (`**`)). -`env( identifier )` +`env( name )` -: Declare an environment variable output. It receives the value of the environment variable (given by the identifier) from the task environment. +: Declare an environment variable output. It receives the value of the environment variable (given by `name`) from the task environment. : :::{versionchanged} 23.12.0-edge Prior to this version, if the environment variable contained multiple lines of output, the output would be compressed to a single line by converting newlines to spaces. diff --git a/docs/script.md b/docs/script.md index 98e49584ed..fe3b2f2801 100644 --- a/docs/script.md +++ b/docs/script.md @@ -371,46 +371,48 @@ def fib( x ) { ## Closures -Briefly, a closure is a block of code that can be passed as an argument to a function. Thus, you can define a chunk of code and then pass it around as if it were a string or an integer. +A closure is a function that can be used like a regular value. Typically, closures are passed as arguments to *higher-order functions* to express computations in a declarative manner. -More formally, you can create functions that are defined as *first-class objects*. +For example: ```groovy -square = { it * it } +square = { v -> v * v } ``` -The curly brackets around the expression `it * it` tells the script interpreter to treat this expression as code. The `it` identifier is an implicit variable that represents the value that is passed to the function when it is invoked. +The above example defines a closure, which takes one parameter named `v` and returns the "square" of `v` (`v * v`), and assigns the closure to the variable `square`. -Once compiled the function object is assigned to the variable `square` as any other variable assignments shown previously. Now we can do something like this: +Now we can call `square` like a function: ```groovy println square(9) ``` -and get the value 81. +which prints `81`. -This is not very interesting until we find that we can pass the function `square` as an argument to other functions or methods. Some built-in functions take a function like this as an argument. One example is the `collect` method on lists: +The main use case for a closure, however, is as an argument to a higher-order function: ```groovy [ 1, 2, 3, 4 ].collect(square) ``` -This expression says: Create an array with the values 1, 2, 3 and 4, then call its `collect` method, passing in the closure we defined above. The `collect` method runs through each item in the array, calls the closure on the item, then puts the result in a new array, resulting in: +The `collect` method of a list applies a mapping function to each value in the list and produces a new list. The above example produces: ```groovy [ 1, 4, 9, 16 ] ``` -For more methods that you can call with closures as arguments, see the [Groovy GDK documentation](http://docs.groovy-lang.org/latest/html/groovy-jdk/). +The example can be expressed more concisely as: + +```groovy +[ 1, 2, 3, 4 ].collect { v -> v * v } +``` -By default, closures take a single parameter called `it`, but you can also create closures with multiple, custom-named parameters. For example, the method `Map.each()` can take a closure with two arguments, to which it binds the `key` and the associated `value` for each key-value pair in the `Map`. Here, we use the obvious variable names `key` and `value` in our closure: +Another example is the `each` method of a map, which takes a closure with two arguments corresponding to the key and value of each map entry: ```groovy -printMapClosure = { key, value -> +[ "Yue" : "Wu", "Mark" : "Williams", "Sudha" : "Kumari" ].each { key, value -> println "$key = $value" } - -[ "Yue" : "Wu", "Mark" : "Williams", "Sudha" : "Kumari" ].each(printMapClosure) ``` Prints: @@ -421,31 +423,41 @@ Mark = Williams Sudha = Kumari ``` -Closures can also access variables outside of their scope, and they can be used anonymously, that is without assigning them to a variable. Here is an example that demonstrates both of these things: +Closures can access variables outside of their scope: ```groovy -myMap = ["China": 1, "India": 2, "USA": 3] +counts = ["China": 1, "India": 2, "USA": 3] result = 0 -myMap.keySet().each { result += myMap[it] } +counts.keySet().each { v -> + result += counts[v] +} println result ``` -A closure can also declare local variables that exist only for the lifetime of the closure: +A closure can also declare local variables that exist only for the lifetime of each closure invocation: ```groovy result = 0 -myMap.keySet().each { - def count = myMap[it] - result += count +myMap.keySet().each { v -> + def count = myMap[v] + result += count } ``` :::{warning} -Local variables should be declared using a qualifier such as `def` or a type name, otherwise they will be interpreted as global variables, which could lead to a {ref}`race condition `. +Local variables should be declared using `def`, otherwise they will be interpreted as global variables, which could lead to a {ref}`race condition `. ::: +While the `each` method is a convenient way to iterate through a collection and build up some result, a more idiomatic way to do this is to use the `inject` method: + +```groovy +result = counts.values().inject { sum, v -> sum + v } +``` + +This way, the closure is fully "self-contained" because it doesn't access or mutate any variables outside of its scope. + Learn more about closures in the [Groovy documentation](http://groovy-lang.org/closures.html) ## Syntax sugar @@ -472,20 +484,20 @@ It is especially useful when calling a function with a closure parameter: ```groovy // full syntax -[1, 2, 3].each({ println it }) +[1, 2, 3].each({ v -> println v }) // shorthand -[1, 2, 3].each { println it } +[1, 2, 3].each { v -> println v } ``` If the last argument is a closure, the closure can be written outside of the parentheses: ```groovy // full syntax -[1, 2, 3].inject('result:', { accum, v -> accum + ' ' + v }) +[1, 2, 3].inject('result:', { acc, v -> acc + ' ' + v }) // shorthand -[1, 2, 3].inject('result:') { accum, v -> accum + ' ' + v } +[1, 2, 3].inject('result:') { acc, v -> acc + ' ' + v } ``` :::{note} diff --git a/docs/snippets/branch-criteria.nf b/docs/snippets/branch-criteria.nf index 9730317433..741d3210b2 100644 --- a/docs/snippets/branch-criteria.nf +++ b/docs/snippets/branch-criteria.nf @@ -1,12 +1,12 @@ -def criteria = branchCriteria { - small: it < 10 - large: it > 10 +def criteria = branchCriteria { v -> + small: v < 10 + large: v > 10 } Channel.of(1, 2, 30).branch(criteria).set { ch1 } Channel.of(10, 20, 3).branch(criteria).set { ch2 } -ch1.small.view { "$it is small" } -ch1.large.view { "$it is large" } -ch2.small.view { "$it is small" } -ch2.large.view { "$it is large" } \ No newline at end of file +ch1.small.view { v -> "$v is small" } +ch1.large.view { v -> "$v is large" } +ch2.small.view { v -> "$v is small" } +ch2.large.view { v -> "$v is large" } \ No newline at end of file diff --git a/docs/snippets/branch-with-fallback.nf b/docs/snippets/branch-with-fallback.nf index b9ed430843..4d4f8f3168 100644 --- a/docs/snippets/branch-with-fallback.nf +++ b/docs/snippets/branch-with-fallback.nf @@ -1,11 +1,11 @@ Channel.of(1, 2, 3, 40, 50) - .branch { - small: it < 10 - large: it < 50 + .branch { v -> + small: v < 10 + large: v < 50 other: true } .set { result } -result.small.view { "$it is small" } -result.large.view { "$it is large" } -result.other.view { "$it is other" } \ No newline at end of file +result.small.view { v -> "$v is small" } +result.large.view { v -> "$v is large" } +result.other.view { v -> "$v is other" } \ No newline at end of file diff --git a/docs/snippets/branch-with-mapper.nf b/docs/snippets/branch-with-mapper.nf index b7e98adb1a..b22bb36009 100644 --- a/docs/snippets/branch-with-mapper.nf +++ b/docs/snippets/branch-with-mapper.nf @@ -1,16 +1,16 @@ Channel.of(1, 2, 3, 40, 50) - .branch { - foo: it < 10 - return it+2 + .branch { v -> + foo: v < 10 + return v + 2 - bar: it < 50 - return it-2 + bar: v < 50 + return v - 2 other: true return 0 } .set { result } -result.foo.view { "$it is foo" } -result.bar.view { "$it is bar" } -result.other.view { "$it is other" } \ No newline at end of file +result.foo.view { v -> "$v is foo" } +result.bar.view { v -> "$v is bar" } +result.other.view { v -> "$v is other" } \ No newline at end of file diff --git a/docs/snippets/branch.nf b/docs/snippets/branch.nf index 973fd46800..98a03d5df2 100644 --- a/docs/snippets/branch.nf +++ b/docs/snippets/branch.nf @@ -1,9 +1,9 @@ Channel.of(1, 2, 3, 40, 50) - .branch { - small: it < 10 - large: it > 10 + .branch { v -> + small: v < 10 + large: v > 10 } .set { result } -result.small.view { "$it is small" } -result.large.view { "$it is large" } \ No newline at end of file +result.small.view { v -> "$v is small" } +result.large.view { v -> "$v is large" } \ No newline at end of file diff --git a/docs/snippets/buffer-with-closing.nf b/docs/snippets/buffer-with-closing.nf index 36739fecf8..ebe119ff59 100644 --- a/docs/snippets/buffer-with-closing.nf +++ b/docs/snippets/buffer-with-closing.nf @@ -1,3 +1,3 @@ Channel.of( 1, 2, 3, 1, 2, 3 ) - .buffer { it == 2 } + .buffer { v -> v == 2 } .view() \ No newline at end of file diff --git a/docs/snippets/collect-with-mapper.nf b/docs/snippets/collect-with-mapper.nf index ba9eda1ab3..b8908a7984 100644 --- a/docs/snippets/collect-with-mapper.nf +++ b/docs/snippets/collect-with-mapper.nf @@ -1,3 +1,3 @@ Channel.of( 'hello', 'ciao', 'bonjour' ) - .collect { it.length() } + .collect { v -> v.length() } .view() \ No newline at end of file diff --git a/docs/snippets/collectfile-closure.nf b/docs/snippets/collectfile-closure.nf index c31e4e45f6..3bec5e06fa 100644 --- a/docs/snippets/collectfile-closure.nf +++ b/docs/snippets/collectfile-closure.nf @@ -2,7 +2,7 @@ Channel.of('Hola', 'Ciao', 'Hello', 'Bonjour', 'Halo') .collectFile { item -> [ "${item[0]}.txt", item + '\n' ] } - .subscribe { - println "File '${it.name}' contains:" - println it.text + .subscribe { txt -> + println "File '${txt.name}' contains:" + println txt.text } \ No newline at end of file diff --git a/docs/snippets/collectfile.nf b/docs/snippets/collectfile.nf index c2295baa4c..00dc5342b2 100644 --- a/docs/snippets/collectfile.nf +++ b/docs/snippets/collectfile.nf @@ -1,6 +1,6 @@ Channel.of('alpha', 'beta', 'gamma') .collectFile(name: 'sample.txt', newLine: true) - .subscribe { - println "Entries are saved to file: $it" - println "File content is: ${it.text}" + .subscribe { txt -> + println "Entries are saved to file: $txt" + println "File content is: ${txt.text}" } \ No newline at end of file diff --git a/docs/snippets/count-with-filter-closure.nf b/docs/snippets/count-with-filter-closure.nf index 7282cce013..afed9721eb 100644 --- a/docs/snippets/count-with-filter-closure.nf +++ b/docs/snippets/count-with-filter-closure.nf @@ -1,3 +1,3 @@ Channel.of('a', 'c', 'c', 'q', 'b') - .count { it <= 'c' } + .count { v -> v <= 'c' } .view() \ No newline at end of file diff --git a/docs/snippets/cross-with-mapper.nf b/docs/snippets/cross-with-mapper.nf index 7e20c3d6cc..4234e9cd06 100644 --- a/docs/snippets/cross-with-mapper.nf +++ b/docs/snippets/cross-with-mapper.nf @@ -1,4 +1,4 @@ source = Channel.of( [1, 'alpha'], [2, 'beta'] ) target = Channel.of( [1, 'a'], [1, 'b'], [2, 'a'], [2, 'b'] ) -source .cross(target) { it[1][0] } .view() \ No newline at end of file +source .cross(target) { v -> v[1][0] } .view() \ No newline at end of file diff --git a/docs/snippets/distinct-with-mapper.nf b/docs/snippets/distinct-with-mapper.nf index b35422e4ee..386ee152ca 100644 --- a/docs/snippets/distinct-with-mapper.nf +++ b/docs/snippets/distinct-with-mapper.nf @@ -1,3 +1,3 @@ Channel.of( 1, 1, 2, 2, 2, 3, 1, 1, 2, 4, 6 ) - .distinct { it % 2 } + .distinct { v -> v % 2 } .view() \ No newline at end of file diff --git a/docs/snippets/dump.nf b/docs/snippets/dump.nf index 7f3b482698..d2ae0686c6 100644 --- a/docs/snippets/dump.nf +++ b/docs/snippets/dump.nf @@ -1,7 +1,7 @@ Channel.of( 1, 2, 3 ) - .map { it+1 } + .map { v -> v + 1 } .dump(tag: 'foo') Channel.of( 1, 2, 3 ) - .map { it^2 } + .map { v -> v ^ 2 } .dump(tag: 'bar') \ No newline at end of file diff --git a/docs/snippets/filter-closure.nf b/docs/snippets/filter-closure.nf index 59578268e7..43fbfd6454 100644 --- a/docs/snippets/filter-closure.nf +++ b/docs/snippets/filter-closure.nf @@ -1,3 +1,3 @@ Channel.of( 1, 2, 3, 4, 5 ) - .filter { it % 2 == 1 } + .filter { v -> v % 2 == 1 } .view() \ No newline at end of file diff --git a/docs/snippets/first.nf b/docs/snippets/first.nf index d4ec5d15df..740c8fc132 100644 --- a/docs/snippets/first.nf +++ b/docs/snippets/first.nf @@ -15,5 +15,5 @@ Channel.of( 1, 2, 'a', 'b', 3 ) // emits the first item for which the predicate evaluates to true: 4 Channel.of( 1, 2, 3, 4, 5 ) - .first { it > 3 } + .first { v -> v > 3 } .view() \ No newline at end of file diff --git a/docs/snippets/flatmap-map.nf b/docs/snippets/flatmap-map.nf index fd78c38674..2bb3f01ab0 100644 --- a/docs/snippets/flatmap-map.nf +++ b/docs/snippets/flatmap-map.nf @@ -1,3 +1,3 @@ Channel.of( 1, 2, 3 ) .flatMap { n -> [ number: n, square: n*n, cube: n*n*n ] } - .view { "${it.key}: ${it.value}" } \ No newline at end of file + .view { entry -> "${entry.key}: ${entry.value}" } \ No newline at end of file diff --git a/docs/snippets/map.nf b/docs/snippets/map.nf index a4df86f706..3a6b302f7b 100644 --- a/docs/snippets/map.nf +++ b/docs/snippets/map.nf @@ -1,3 +1,3 @@ Channel.of( 1, 2, 3, 4, 5 ) - .map { it * it } + .map { v -> v * v } .view() \ No newline at end of file diff --git a/docs/snippets/max-with-comparator.nf b/docs/snippets/max-with-comparator.nf index 9be747b11c..26bb2df174 100644 --- a/docs/snippets/max-with-comparator.nf +++ b/docs/snippets/max-with-comparator.nf @@ -1,4 +1,4 @@ // comparator function Channel.of( "hello", "hi", "hey" ) - .max { a, b -> a.size() <=> b.size() } + .max { a, b -> a.length() <=> b.length() } .view() \ No newline at end of file diff --git a/docs/snippets/max-with-mapper.nf b/docs/snippets/max-with-mapper.nf index e047f1a4ea..c428a022e3 100644 --- a/docs/snippets/max-with-mapper.nf +++ b/docs/snippets/max-with-mapper.nf @@ -1,4 +1,4 @@ // mapping function Channel.of( "hello", "hi", "hey" ) - .max { it.size() } + .max { v -> v.length() } .view() \ No newline at end of file diff --git a/docs/snippets/max.nf b/docs/snippets/max.nf index d2b321416a..79e221b6cf 100644 --- a/docs/snippets/max.nf +++ b/docs/snippets/max.nf @@ -1,3 +1,3 @@ Channel.of( 8, 6, 2, 5 ) .max() - .view { "Max value is $it" } \ No newline at end of file + .view { v -> "Max value is $v" } \ No newline at end of file diff --git a/docs/snippets/min-with-comparator.nf b/docs/snippets/min-with-comparator.nf index 3392c95a0e..85fe62a26e 100644 --- a/docs/snippets/min-with-comparator.nf +++ b/docs/snippets/min-with-comparator.nf @@ -1,4 +1,4 @@ // comparator function Channel.of( "hello", "hi", "hey" ) - .min { a, b -> a.size() <=> b.size() } + .min { a, b -> a.length() <=> b.length() } .view() \ No newline at end of file diff --git a/docs/snippets/min-with-mapper.nf b/docs/snippets/min-with-mapper.nf index 32ab5287f3..957d993426 100644 --- a/docs/snippets/min-with-mapper.nf +++ b/docs/snippets/min-with-mapper.nf @@ -1,4 +1,4 @@ // mapping function Channel.of( "hello", "hi", "hey" ) - .min { it.size() } + .min { v -> v.length() } .view() \ No newline at end of file diff --git a/docs/snippets/min.nf b/docs/snippets/min.nf index 78ab2571fe..9cedee3a05 100644 --- a/docs/snippets/min.nf +++ b/docs/snippets/min.nf @@ -1,3 +1,3 @@ Channel.of( 8, 6, 2, 5 ) .min() - .view { "Min value is $it" } \ No newline at end of file + .view { v -> "Min value is $v" } \ No newline at end of file diff --git a/docs/snippets/multimap-criteria.nf b/docs/snippets/multimap-criteria.nf index 7b15740d97..4a00af187d 100644 --- a/docs/snippets/multimap-criteria.nf +++ b/docs/snippets/multimap-criteria.nf @@ -1,6 +1,6 @@ -def criteria = multiMapCriteria { - small: [it, it < 10] - large: [it, it > 10] +def criteria = multiMapCriteria { v -> + small: [v, v < 10] + large: [v, v > 10] } Channel.of(1, 2, 30).multiMap(criteria).set { ch1 } diff --git a/docs/snippets/multimap-shared.nf b/docs/snippets/multimap-shared.nf index 0573f53199..8d8831cbae 100644 --- a/docs/snippets/multimap-shared.nf +++ b/docs/snippets/multimap-shared.nf @@ -1,6 +1,6 @@ Channel.of( 1, 2, 3 ) - .multiMap { it -> foo: bar: it } + .multiMap { v -> foo: bar: v } .set { result } -result.foo.view { "foo $it" } -result.bar.view { "bar $it" } \ No newline at end of file +result.foo.view { v -> "foo $v" } +result.bar.view { v -> "bar $v" } \ No newline at end of file diff --git a/docs/snippets/multimap.nf b/docs/snippets/multimap.nf index 52ab934aca..67ce2a9ab5 100644 --- a/docs/snippets/multimap.nf +++ b/docs/snippets/multimap.nf @@ -1,9 +1,9 @@ Channel.of( 1, 2, 3, 4 ) - .multiMap { it -> - foo: it + 1 - bar: it * it + .multiMap { v -> + foo: v + 1 + bar: v * v } .set { result } -result.foo.view { "foo $it" } -result.bar.view { "bar $it" } \ No newline at end of file +result.foo.view { v -> "foo $v" } +result.bar.view { v -> "bar $v" } \ No newline at end of file diff --git a/docs/snippets/process-out-env.nf b/docs/snippets/process-out-env.nf index 848d2f9f82..8ac03a1880 100644 --- a/docs/snippets/process-out-env.nf +++ b/docs/snippets/process-out-env.nf @@ -1,6 +1,6 @@ process myTask { output: - env FOO + env 'FOO' script: ''' diff --git a/docs/snippets/process-stdout.nf b/docs/snippets/process-stdout.nf index 9e2e719896..803957c778 100644 --- a/docs/snippets/process-stdout.nf +++ b/docs/snippets/process-stdout.nf @@ -8,5 +8,5 @@ process sayHello { } workflow { - sayHello | view { "I say... $it" } + sayHello | view { message -> "I say... $message" } } \ No newline at end of file diff --git a/docs/snippets/reduce-with-initial-value.nf b/docs/snippets/reduce-with-initial-value.nf index 28427b32a8..afe25d7b2f 100644 --- a/docs/snippets/reduce-with-initial-value.nf +++ b/docs/snippets/reduce-with-initial-value.nf @@ -1,6 +1,6 @@ Channel.of( 1, 2, 3, 4, 5 ) - .reduce( 'result:' ) { accum, v -> - println accum - accum + ' ' + v + .reduce( 'result:' ) { acc, v -> + println acc + acc + ' ' + v } - .view { "final $it" } \ No newline at end of file + .view { result -> "final $result" } \ No newline at end of file diff --git a/docs/snippets/reduce.nf b/docs/snippets/reduce.nf index f4c0402269..34206f7562 100644 --- a/docs/snippets/reduce.nf +++ b/docs/snippets/reduce.nf @@ -3,4 +3,4 @@ Channel.of( 1, 2, 3, 4, 5 ) println "a: $a b: $b" a + b } - .view { "result = $it" } \ No newline at end of file + .view { result -> "result = $result" } \ No newline at end of file diff --git a/docs/snippets/splitjson-array.nf b/docs/snippets/splitjson-array.nf index 99b81ab1b8..b3b76b15a0 100644 --- a/docs/snippets/splitjson-array.nf +++ b/docs/snippets/splitjson-array.nf @@ -1,4 +1,4 @@ // Example with a JSON array Channel.of('[1, null, ["A", {}], true]') .splitJson() - .view{"Item: ${it}"} \ No newline at end of file + .view { v -> "Item: ${v}" } \ No newline at end of file diff --git a/docs/snippets/splitjson-object.nf b/docs/snippets/splitjson-object.nf index 8f403576f6..7958d4030c 100644 --- a/docs/snippets/splitjson-object.nf +++ b/docs/snippets/splitjson-object.nf @@ -1,4 +1,4 @@ // Example with a JSON object Channel.of('{"A": 1, "B": [1, 2, 3], "C": {"D": null}}') .splitJson() - .view{"Item: ${it}"} \ No newline at end of file + .view { v -> "Item: ${v}" } \ No newline at end of file diff --git a/docs/snippets/splitjson-with-path.nf b/docs/snippets/splitjson-with-path.nf index 15a652d53b..883eca07fc 100644 --- a/docs/snippets/splitjson-with-path.nf +++ b/docs/snippets/splitjson-with-path.nf @@ -1,3 +1,3 @@ Channel.of('{"A": 1, "B": [2, 3, {"C": {"D": null, "E": 4, "F": 5}}]}') .splitJson(path: 'B[2].C') - .view{"Item: ${it}"} \ No newline at end of file + .view { v -> "Item: ${v}" } \ No newline at end of file diff --git a/docs/snippets/subscribe-with-on-complete.nf b/docs/snippets/subscribe-with-on-complete.nf index fa44724378..4f2b73fb03 100644 --- a/docs/snippets/subscribe-with-on-complete.nf +++ b/docs/snippets/subscribe-with-on-complete.nf @@ -1,2 +1,2 @@ Channel.of( 1, 2, 3 ) - .subscribe onNext: { println it }, onComplete: { println 'Done' } \ No newline at end of file + .subscribe onNext: { v -> println v }, onComplete: { println 'Done' } \ No newline at end of file diff --git a/docs/snippets/subscribe-with-param.nf b/docs/snippets/subscribe-with-param.nf index 273e519ce3..26a82d0c18 100644 --- a/docs/snippets/subscribe-with-param.nf +++ b/docs/snippets/subscribe-with-param.nf @@ -1,5 +1,5 @@ Channel .of( 'alpha', 'beta', 'lambda' ) - .subscribe { String str -> - println "Got: ${str}; len: ${str.size()}" + .subscribe { str -> + println "Got: ${str}; len: ${str.length()}" } \ No newline at end of file diff --git a/docs/snippets/subscribe.nf b/docs/snippets/subscribe.nf index e5a4a3b8f0..4d436d7df3 100644 --- a/docs/snippets/subscribe.nf +++ b/docs/snippets/subscribe.nf @@ -2,4 +2,4 @@ source = Channel.of( 'alpha', 'beta', 'delta' ) // subscribe to the channel with a function that prints each value -source.subscribe { println "Got: $it" } \ No newline at end of file +source.subscribe { v -> println "Got: $v" } \ No newline at end of file diff --git a/docs/snippets/sum-with-mapper.nf b/docs/snippets/sum-with-mapper.nf index 9824e9bade..21c34fe47e 100644 --- a/docs/snippets/sum-with-mapper.nf +++ b/docs/snippets/sum-with-mapper.nf @@ -1,3 +1,3 @@ Channel.of( 4, 1, 7, 5 ) - .sum { it * it } - .view { "Square: $it" } \ No newline at end of file + .sum { v -> v * v } + .view { result -> "Square: $result" } \ No newline at end of file diff --git a/docs/snippets/sum.nf b/docs/snippets/sum.nf index add3d63e1f..adc740fc22 100644 --- a/docs/snippets/sum.nf +++ b/docs/snippets/sum.nf @@ -1,3 +1,3 @@ Channel.of( 8, 6, 2, 5 ) .sum() - .view { "The sum is $it" } \ No newline at end of file + .view { result -> "The sum is $result" } \ No newline at end of file diff --git a/docs/snippets/tap.nf b/docs/snippets/tap.nf index 41278f9711..d244d7f141 100644 --- a/docs/snippets/tap.nf +++ b/docs/snippets/tap.nf @@ -1,9 +1,9 @@ Channel.of( 'a', 'b', 'c' ) .tap { log1 } - .map { it * 2 } + .map { v -> v * 2 } .tap { log2 } - .map { it.toUpperCase() } - .view { "Result: $it" } + .map { v -> v.toUpperCase() } + .view { result -> "Result: $result" } -log1.view { "Log 1: $it" } -log2.view { "Log 2: $it" } \ No newline at end of file +log1.view { v -> "Log 1: $v" } +log2.view { v -> "Log 2: $v" } \ No newline at end of file diff --git a/docs/snippets/unique-with-mapper.nf b/docs/snippets/unique-with-mapper.nf index 3eee3005f0..c2ba1b4640 100644 --- a/docs/snippets/unique-with-mapper.nf +++ b/docs/snippets/unique-with-mapper.nf @@ -1,3 +1,3 @@ Channel.of( 1, 1, 2, 2, 2, 3, 1, 1, 2, 4, 6 ) - .unique { it % 2 } + .unique { v -> v % 2 } .view() \ No newline at end of file diff --git a/docs/snippets/until.nf b/docs/snippets/until.nf index 9fe22e26fa..528b2aa136 100644 --- a/docs/snippets/until.nf +++ b/docs/snippets/until.nf @@ -1,3 +1,3 @@ Channel.of( 3, 2, 1, 5, 1, 5 ) - .until { it == 5 } + .until { v -> v == 5 } .view() \ No newline at end of file diff --git a/docs/snippets/view-with-mapper.nf b/docs/snippets/view-with-mapper.nf index 643cfd4394..60344c4de3 100644 --- a/docs/snippets/view-with-mapper.nf +++ b/docs/snippets/view-with-mapper.nf @@ -1,3 +1,3 @@ Channel.of(1, 2, 3) - .map { it -> [it, it*it] } + .map { v -> [v, v*v] } .view { num, sqr -> "The square of $num is $sqr" } \ No newline at end of file diff --git a/docs/snippets/your-first-script.nf b/docs/snippets/your-first-script.nf index 6fe3710e9b..0217cae8fc 100644 --- a/docs/snippets/your-first-script.nf +++ b/docs/snippets/your-first-script.nf @@ -22,5 +22,5 @@ process convertToUpper { } workflow { - splitLetters | flatten | convertToUpper | view { it.trim() } + splitLetters | flatten | convertToUpper | view { v -> v.trim() } } diff --git a/docs/workflow.md b/docs/workflow.md index 36d9b09e53..ccfef17043 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -336,7 +336,7 @@ process foo { } workflow { - channel.from('Hello','Hola','Ciao') | foo | map { it.toUpperCase() } | view + channel.from('Hello','Hola','Ciao') | foo | map { v -> v.toUpperCase() } | view } ``` @@ -349,7 +349,7 @@ Statements can also be split across multiple lines for better readability: workflow { channel.from('Hello','Hola','Ciao') | foo - | map { it.toUpperCase() } + | map { v -> v.toUpperCase() } | view } ``` @@ -384,7 +384,7 @@ process bar { workflow { channel.from('Hello') - | map { it.reverse() } + | map { v -> v.reverse() } | (foo & bar) | mix | view diff --git a/tests/blast-dsl2.nf b/tests/blast-dsl2.nf index 7a8d96e25b..3b4f0c3063 100644 --- a/tests/blast-dsl2.nf +++ b/tests/blast-dsl2.nf @@ -4,17 +4,16 @@ params.db = "$baseDir/blast-db/tiny" params.query = "$baseDir/data/sample.fa" params.chunkSize = 1 -DB = file(params.db) - process blast { input: path 'seq.fa' + path db output: path 'out' """ - blastp -db $DB -query seq.fa -outfmt 6 > out + blastp -db $db -query seq.fa -outfmt 6 > out """ } @@ -32,10 +31,11 @@ process sort { workflow { - Channel.fromPath(params.query) | - splitFasta( by: params.chunkSize, file:true ) | - blast | - collect | - sort | - subscribe { println it } + ch_fasta = Channel.fromPath(params.query) + | splitFasta( by: params.chunkSize, file:true ) + + blast(ch_fasta, file(params.db)) + | collect + | sort + | subscribe { hits -> println hits } } diff --git a/tests/blast-parallel-dsl2.nf b/tests/blast-parallel-dsl2.nf index a2b3addbd7..2b44327651 100644 --- a/tests/blast-parallel-dsl2.nf +++ b/tests/blast-parallel-dsl2.nf @@ -4,17 +4,16 @@ params.db = "$baseDir/blast-db/tiny" params.query = "$baseDir/data/sample.fa" params.chunk = 1 -db = file(params.db) - /* * Extends a BLAST query for each entry in the 'chunks' channel */ process blast { input: path 'query.fa' + path db output: - path top_hits + path 'top_hits' """ blastp -db ${db} -query query.fa -outfmt 6 > blast_result @@ -28,11 +27,15 @@ process blast { process extract { input: path top_hits + path db output: path 'sequences' - "blastdbcmd -db ${db} -entry_batch top_hits | head -n 10 > sequences" + script: + """ + blastdbcmd -db ${db} -entry_batch top_hits | head -n 10 > sequences + """ } @@ -45,18 +48,24 @@ process align { input: path all_seq - "t_coffee $all_seq 2>/dev/null | tee align_result" + script: + """ + t_coffee $all_seq 2>/dev/null | tee align_result + """ } /* * main flow */ workflow { - Channel.fromPath(params.query) | - splitFasta(by: params.chunk, file:true) | - blast | - extract | - collectFile(name:'all_seq') | // Collect all hits to a single file called 'all_seq' - align + db = file(params.db) + + ch_fasta = Channel.fromPath(params.query) + | splitFasta(by: params.chunk, file:true) + + ch_sequences = blast(ch_fasta, db) + extract(ch_sequences, db) + | collectFile(name:'all_seq') // Collect all hits to a single file called 'all_seq' + | align } diff --git a/tests/collect_and_merge.nf b/tests/collect_and_merge.nf index b77a742373..853afc85fb 100644 --- a/tests/collect_and_merge.nf +++ b/tests/collect_and_merge.nf @@ -44,7 +44,7 @@ process merge { debug true input: - tuple val(barcode), val(seq_id), file(bam: 'bam?'), file(bai: 'bai?') + tuple val(barcode), val(seq_id), path(bam), path(bai) """ echo barcode: $barcode diff --git a/tests/complex-names-dsl2.nf b/tests/complex-names-dsl2.nf index 90e6cb0fa5..bd44448768 100644 --- a/tests/complex-names-dsl2.nf +++ b/tests/complex-names-dsl2.nf @@ -12,7 +12,7 @@ process foo { path '.alpha' script: - $/ + """ echo A > hello.txt echo B > sample.zip echo C > sample.html @@ -24,7 +24,7 @@ process foo { echo 3 > f3.fa mkdir .alpha echo "Hello world!" > .alpha/hello.txt - /$ + """ } process bar { @@ -34,10 +34,10 @@ process bar { path '*' script: - $/ + """ cat .alpha/hello.txt [ `cat * | grep -c ''` == 9 ] || false - /$ + """ } /* diff --git a/tests/config-labels.included b/tests/config-labels-included.config similarity index 100% rename from tests/config-labels.included rename to tests/config-labels-included.config diff --git a/tests/config-labels.config b/tests/config-labels.config index 294fa3900a..51af5f4f80 100644 --- a/tests/config-labels.config +++ b/tests/config-labels.config @@ -32,6 +32,6 @@ profiles { } test3 { - includeConfig 'config-labels.included' + includeConfig 'config-labels-included.config' } } diff --git a/tests/config-labels.nf b/tests/config-labels.nf index c152e9d76c..7f261177bf 100644 --- a/tests/config-labels.nf +++ b/tests/config-labels.nf @@ -23,30 +23,30 @@ workflow { process alpha { debug true - / + """ echo alpha memry: ${task.memory} echo alpha queue: ${task.queue} - / + """ } process beta { debug true label 'small' - / + """ echo beta memry: ${task.memory} echo beta queue: ${task.queue} - / + """ } process delta { debug true label 'big' - / + """ echo delta memry: ${task.memory} echo delta queue: ${task.queue} - / + """ } process gamma { @@ -55,8 +55,8 @@ process gamma { memory 40.MB queue 'foo' - / + """ echo gamma memry: ${task.memory} echo gamma queue: ${task.queue} - / + """ } diff --git a/tests/dynamic-filename.nf b/tests/dynamic-filename.nf index 7a3b1c1346..ff0e849dd5 100644 --- a/tests/dynamic-filename.nf +++ b/tests/dynamic-filename.nf @@ -17,9 +17,6 @@ params.prefix = 'my' -data = 'Hello\n' -list = ['alpha', 'delta', 'gamma', 'omega'] - process foo { input: @@ -36,5 +33,7 @@ process foo { } workflow { + data = 'Hello\n' + list = ['alpha', 'delta', 'gamma', 'omega'] foo(list, data) | subscribe { println "~ Saving ${it.name}"; it.copyTo('.') } } diff --git a/tests/env-out.nf b/tests/env-out.nf index e33b6e297d..749d919081 100644 --- a/tests/env-out.nf +++ b/tests/env-out.nf @@ -17,14 +17,14 @@ process foo { output: - env FOO + env 'FOO' /FOO=Hello/ } process bar { debug true input: - env FOO + env 'FOO' 'echo "bar says $FOO"' } diff --git a/tests/env2.nf b/tests/env2.nf index 9f4cc8dfb4..23df6bfca9 100644 --- a/tests/env2.nf +++ b/tests/env2.nf @@ -19,7 +19,7 @@ process printEnv { debug true input: - env HELLO + env 'HELLO' ''' echo $HELLO world! diff --git a/tests/error-finish.nf b/tests/error-finish.nf index 8f70d60605..88d2832fa1 100644 --- a/tests/error-finish.nf +++ b/tests/error-finish.nf @@ -42,12 +42,11 @@ process bar { ''' } - -workflow.onError { - println "success: $workflow.success" - println "exitStatus: $workflow.exitStatus" -} - workflow { foo([1,2,3]) | bar + + workflow.onError { + println "success: $workflow.success" + println "exitStatus: $workflow.exitStatus" + } } diff --git a/tests/files.nf b/tests/files.nf index bda693dcd5..8e6c09bf27 100644 --- a/tests/files.nf +++ b/tests/files.nf @@ -16,7 +16,6 @@ */ params.in = "$baseDir/data/sample.fa" -SPLIT = (System.properties['os.name'] == 'Mac OS X' ? 'gcsplit' : 'csplit') process split { input: @@ -25,6 +24,8 @@ process split { output: path 'seq_*' + script: + SPLIT = (System.properties['os.name'] == 'Mac OS X' ? 'gcsplit' : 'csplit') """ $SPLIT query.fa '%^>%' '/^>/' '{*}' -f seq_ """ diff --git a/tests/output-dsl.nf b/tests/output-dsl.nf index 22d9cea365..908650405f 100644 --- a/tests/output-dsl.nf +++ b/tests/output-dsl.nf @@ -26,6 +26,7 @@ process align { path("*.bam") path("${x}.bai") + script: """ echo ${x} > ${x}.bam echo ${x} | rev > ${x}.bai @@ -40,6 +41,7 @@ process my_combine { output: path 'result.txt' + script: """ cat $bamfile > result.txt cat $baifile >> result.txt @@ -50,6 +52,7 @@ process foo { output: path 'xxx' + script: ''' mkdir xxx touch xxx/A @@ -59,6 +62,7 @@ process foo { } workflow { + main: def input = Channel.of('alpha','beta','delta') align(input) diff --git a/tests/output-globs.nf b/tests/output-globs.nf index 836d3323a8..4a3da72312 100644 --- a/tests/output-globs.nf +++ b/tests/output-globs.nf @@ -1,19 +1,21 @@ -def CMD = """ - mkdir -p a/a b/b c/c - touch a/1.txt - touch b/1.txt - touch c/1.txt - touch a/a/2.txt - touch b/b/2.txt - touch c/c/2.txt - """ +def getCmd() { + """ + mkdir -p a/a b/b c/c + touch a/1.txt + touch b/1.txt + touch c/1.txt + touch a/a/2.txt + touch b/b/2.txt + touch c/c/2.txt + """ +} process foo { output: file("a/*/*.txt") script: - CMD + getCmd() } process bar { @@ -21,7 +23,7 @@ process bar { output: file("a/*/*.txt") script: - CMD + getCmd() } workflow { diff --git a/tests/output-val-dsl2.nf b/tests/output-val-dsl2.nf index e769f9e4d1..3bb587b5d3 100644 --- a/tests/output-val-dsl2.nf +++ b/tests/output-val-dsl2.nf @@ -1,8 +1,5 @@ #!/usr/bin/env nextflow -x = 100 -y = 200 - process foo { input: path fastq @@ -14,6 +11,7 @@ process foo { val y script: + x = 100 y = 'two hundred' """ echo bar diff --git a/tests/profiles.config b/tests/profiles.config index b74a3f87ef..95f0ac4ee9 100644 --- a/tests/profiles.config +++ b/tests/profiles.config @@ -16,8 +16,7 @@ echo = true -def x = 'delta' -includeConfig "${x}.config" +includeConfig "${'delta'}.config" profiles { diff --git a/tests/publish-saveas.nf b/tests/publish-saveas.nf index 7b760db1cb..047550f527 100644 --- a/tests/publish-saveas.nf +++ b/tests/publish-saveas.nf @@ -23,12 +23,12 @@ def rule( file ) { return null if( file == 'file_3.txt' ) - return "$PWD/results/gamma/$file" + return "${System.getenv('PWD')}/results/gamma/$file" } process foo { - publishDir path: 'results', saveAs: this.&rule + publishDir path: 'results', saveAs: { file -> rule(file) } input: each x output: path '*.txt' diff --git a/tests/race.nf b/tests/race.nf index 090a90c842..b006e76625 100644 --- a/tests/race.nf +++ b/tests/race.nf @@ -15,9 +15,9 @@ * limitations under the License. */ -seqs = channel.fromList(file("$baseDir/data/seqs/*.fastq")) - workflow { + seqs = channel.fromList(file("$baseDir/data/seqs/*.fastq")) + seqs | proc1 seqs | proc2 seqs | proc3 diff --git a/tests/rnaseq-toy-dsl2.nf b/tests/rnaseq-toy-dsl2.nf index 94e76aae20..c10673205b 100644 --- a/tests/rnaseq-toy-dsl2.nf +++ b/tests/rnaseq-toy-dsl2.nf @@ -58,15 +58,12 @@ process makeTranscript { """ } -/* - * main flow - */ -read_pairs = Channel.fromFilePairs( params.reads, checkIfExists: true ) - /* * main flow */ workflow { + read_pairs = Channel.fromFilePairs( params.reads, checkIfExists: true ) + buildIndex(params.genome) mapping(params.genome, buildIndex.out, read_pairs) makeTranscript(mapping.out) diff --git a/tests/sets.nf b/tests/sets.nf index 75bdeed69d..1b56ac1c50 100644 --- a/tests/sets.nf +++ b/tests/sets.nf @@ -21,10 +21,10 @@ process touch { output: tuple val(id), path('file*') - / + """ echo Creating $id touch $fileName - / + """ } process makeFiles { @@ -34,10 +34,10 @@ process makeFiles { output: tuple val(id), path('*') - / - cp file_x copy_$id - touch beta_$id - / + """ + cp file_x copy_$id + touch beta_$id + """ } diff --git a/tests/singleton.nf b/tests/singleton.nf index 70f4bde70a..7fd4418b13 100644 --- a/tests/singleton.nf +++ b/tests/singleton.nf @@ -17,7 +17,7 @@ process foo { output: - file x + file 'x' ''' echo -n Hello > x diff --git a/tests/subworkflow-dsl2.nf b/tests/subworkflow-dsl2.nf index 27d70ccc09..9ce483f8b3 100644 --- a/tests/subworkflow-dsl2.nf +++ b/tests/subworkflow-dsl2.nf @@ -33,6 +33,7 @@ workflow flow2 { } workflow test1 { + main: flow1() flow2() ch1 = flow1.out.result @@ -41,7 +42,9 @@ workflow test1 { } workflow test2 { - emit: ( flow1 & flow2 ) | mix | collectFile(name:"$PWD/test2.txt") + main: + result = ( flow1 & flow2 ) | mix | collectFile(name:"${System.getenv('PWD')}/test2.txt") + emit: result } workflow { diff --git a/tests/task-escape-path-dsl2.nf b/tests/task-escape-path-dsl2.nf index 29c89a7495..af298f1561 100644 --- a/tests/task-escape-path-dsl2.nf +++ b/tests/task-escape-path-dsl2.nf @@ -1,8 +1,9 @@ process foo1 { debug true - input: path x - input: path y + input: + path x + path y """ echo "FOO1: ${x}; ${y}" """ @@ -10,8 +11,9 @@ process foo1 { process foo2 { debug true - input: path x - input: path y + input: + path x + path y script: """ echo "FOO2: ${x}; ${y}" @@ -20,8 +22,9 @@ process foo2 { process foo3 { debug true - input: path x - input: path y + input: + path x + path y shell: ''' echo "FOO3: !{x}; !{y}" @@ -30,8 +33,9 @@ process foo3 { process foo4 { debug true - input: path x - input: path y + input: + path x + path y script: template("$baseDir/task-escape-path-dsl2.sh") } diff --git a/tests/task-retry.nf b/tests/task-retry.nf index 174db33737..16ca73b369 100644 --- a/tests/task-retry.nf +++ b/tests/task-retry.nf @@ -29,13 +29,13 @@ process foo { script: """ - if [[ -f $PWD/marker ]]; then + if [[ -f marker ]]; then echo DONE - mem: $task.memory - time: $task.time exit 0 else echo FAIL - touch $PWD/marker - exit 5; + touch marker + exit 5 fi """ diff --git a/tests/template-dyn.nf b/tests/template-dyn.nf index fb738ab010..a3c0b053b0 100644 --- a/tests/template-dyn.nf +++ b/tests/template-dyn.nf @@ -15,9 +15,9 @@ * limitations under the License. */ -list = 'alpha,delta,gamma'.tokenize(',') - workflow { + list = 'alpha,delta,gamma'.tokenize(',') + foo(list) bar(list) } diff --git a/tests/tuples-dsl2.nf b/tests/tuples-dsl2.nf index ceeb5e1d24..ed3e2e25c4 100644 --- a/tests/tuples-dsl2.nf +++ b/tests/tuples-dsl2.nf @@ -8,10 +8,10 @@ process touch { tuple val(id), path('file*') - / + """ echo Creating $id touch $fileName - / + """ } process makeFiles { @@ -21,18 +21,16 @@ process makeFiles { output: tuple val(id), path('*') - / - cp file_x copy_$id - touch beta_$id - / + """ + cp file_x copy_$id + touch beta_$id + """ } workflow { - - Channel - .from( ['a', 'file1'], ['b','file2'] ) \ - | touch \ - | makeFiles \ - | flatten \ - | subscribe { println it } + Channel.from( ['a', 'file1'], ['b','file2'] ) + | touch + | makeFiles + | flatten + | subscribe { println it } } diff --git a/tests/watch-dsl2.nf b/tests/watch-dsl2.nf index 4718f9c85d..92d5cb5ed7 100644 --- a/tests/watch-dsl2.nf +++ b/tests/watch-dsl2.nf @@ -9,7 +9,7 @@ process align { path fasta output: - path aln + path 'aln' """ t_coffee -in $fasta 1> aln @@ -22,12 +22,10 @@ process align { workflow { - Channel - .watchPath(params.files, params.events) \ - | align \ - | subscribe { - println '------' - println it.text - } - + Channel.watchPath(params.files, params.events) + | align + | subscribe { + println '------' + println it.text + } } diff --git a/tests/when-block.nf b/tests/when-block.nf index 3b6e685b7f..edda552ec2 100644 --- a/tests/when-block.nf +++ b/tests/when-block.nf @@ -15,17 +15,20 @@ * limitations under the License. */ -items = [0,1,2,3,4] -decode = ['zero','one','two','three','fourth'] +def decode(i) { + ['zero','one','two','three','fourth'][i] +} workflow { + items = [0,1,2,3,4] + channel.fromList(items) | foo channel.fromList(items) | bar } process foo { debug true - tag "${decode[x]}" + tag "${decode(x)}" input: val x @@ -41,7 +44,7 @@ process foo { process bar { debug true - tag "${decode[x]}" + tag "${decode(x)}" input: val x diff --git a/tests/workdir-with-blank.nf b/tests/workdir-with-blank.nf index bfb6b99121..d9105b2d38 100644 --- a/tests/workdir-with-blank.nf +++ b/tests/workdir-with-blank.nf @@ -20,7 +20,7 @@ process foo { each x output: - file result_data + file 'result_data' """ echo Hello $x > result_data diff --git a/validation/test-complexpaths.nf b/validation/test-complexpaths.nf index 2050313196..f3b66b2abd 100644 --- a/validation/test-complexpaths.nf +++ b/validation/test-complexpaths.nf @@ -1,5 +1,5 @@ workflow { - foo | mix | collect | bar + foo | mix | collect | bar } process foo { @@ -14,7 +14,7 @@ process foo { file '.alpha' script: - $/ + """ echo A > hello.txt echo B > sample.zip echo C > sample.html @@ -26,7 +26,7 @@ process foo { echo 3 > f3.fa mkdir .alpha echo "Hello world!" > .alpha/hello.txt - /$ + """ } process bar { @@ -36,8 +36,8 @@ process bar { file '*' script: - $/ + """ cat .alpha/hello.txt [ `cat * | grep -c ''` == 9 ] || false - /$ + """ } diff --git a/validation/test-overwrite.nf b/validation/test-overwrite.nf index db4ab02b00..aa9adfb326 100644 --- a/validation/test-overwrite.nf +++ b/validation/test-overwrite.nf @@ -3,7 +3,7 @@ workflow { } process foo { - container = 'quay.io/nextflow/bash' + container 'quay.io/nextflow/bash' publishDir "gs://rnaseq-nf/scratch/tests", overwrite: true output: path 'hello.txt' From 189b560edc519ab78712c4e8c995daff59de13e7 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 26 Sep 2024 05:44:38 -0500 Subject: [PATCH 02/28] Remove some references to Groovy Signed-off-by: Ben Sherman --- docs/config.md | 10 +++++++- docs/developer/plugins.md | 2 +- docs/dsl1.md | 2 +- docs/overview.md | 6 ++--- docs/process.md | 12 +++++---- docs/script.md | 51 +++++++++++--------------------------- docs/sharing.md | 2 +- docs/working-with-files.md | 2 +- 8 files changed, 36 insertions(+), 51 deletions(-) diff --git a/docs/config.md b/docs/config.md index b83b4323d2..9b5388d857 100644 --- a/docs/config.md +++ b/docs/config.md @@ -48,7 +48,15 @@ The same mechanism allows you to access environment variables defined in the hos ### Comments -Configuration files use the same conventions for comments used by the Groovy or Java programming languages. Thus, use `//` to comment a single line, or `/*` .. `*/` to comment a block on multiple lines. +You can use `//` to comment a single line, or `/* ... */` to comment a block on multiple lines: + +```groovy +// single line comment + +/* + * multi-line comment + */ +``` ### Includes diff --git a/docs/developer/plugins.md b/docs/developer/plugins.md index 0b6c77888a..47e551a565 100644 --- a/docs/developer/plugins.md +++ b/docs/developer/plugins.md @@ -151,7 +151,7 @@ Refer to the source code of Nextflow's built-in executors to see how to implemen :::{versionadded} 22.09.0-edge ::: -Plugins can define custom Groovy functions, which can then be included into Nextflow pipelines. +Plugins can define custom functions, which can then be included into Nextflow pipelines. To implement a custom function, create a class in your plugin that extends the `PluginExtensionPoint` class, and implement your function with the `Function` annotation: diff --git a/docs/dsl1.md b/docs/dsl1.md index 50cea72d4e..8ec246c970 100644 --- a/docs/dsl1.md +++ b/docs/dsl1.md @@ -88,7 +88,7 @@ In DSL1, the entire Nextflow pipeline must be defined in a single file (e.g. `ma DSL2 introduces the concept of "module scripts" (or "modules" for short), which are Nextflow scripts that can be "included" by other scripts. While modules are not essential to migrating to DSL2, nor are they mandatory in DSL2 by any means, modules can help you organize a large pipeline into multiple smaller files, and take advantage of modules created by others. Check out the {ref}`module-page` to get started. :::{note} -With DSL2, the Groovy shell used by Nextflow also imposes a 64KB size limit on pipeline scripts, so if your DSL1 script is very large, you may need to split your script into modules anyway to avoid this limit. +With DSL2, Nextflow scripts cannot exceed 64KB in size, so if your DSL1 script is very large, you may need to split your script into modules anyway to avoid this limit. ::: ## Deprecations diff --git a/docs/overview.md b/docs/overview.md index f848facde4..9a9a6f4da2 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -95,11 +95,9 @@ Read the {ref}`executor-page` to learn more about the Nextflow executors. ## Scripting language -Nextflow is designed to have a minimal learning curve, without having to pick up a new programming language. In most cases, users can utilise their current skills to develop Nextflow workflows. However, it also provides a powerful scripting DSL. +Nextflow is a workflow language, based on [Java](https://en.wikipedia.org/wiki/Java_(programming_language)) and [Groovy](https://groovy-lang.org/), which is designed to make it as simple as possible to write scalable and reproducible pipelines. In most cases, users can leverage their existing programming skills to develop Nextflow pipelines, without the steep learning curve that usually comes with a new programming language. -Nextflow scripting is an extension of the [Groovy programming language](), which in turn is a super-set of the Java programming language. Groovy can be considered as Python for Java in that it simplifies the writing of code and is more approachable. - -Read the {ref}`script-page` section to learn about the Nextflow scripting language. +Read the {ref}`script-page` page to learn about the Nextflow scripting language. ## Configuration options diff --git a/docs/process.md b/docs/process.md index de5f1986d3..1a7736b81f 100644 --- a/docs/process.md +++ b/docs/process.md @@ -139,9 +139,9 @@ Since the actual location of the interpreter binary file can differ across platf ### Conditional scripts -So far, our `script` block has always been a simple string expression, but in reality, the `script` block is just Groovy code that returns a string. This means that you can write arbitrary Groovy code to determine the script to execute, as long as the final statement is a string (remember that the `return` keyword is optional in Groovy). +So far, the `script` block has just been a string, but in reality, the `script` block is like a function that returns a string. This means that you can write arbitrary code to determine the script, as long as the final statement is a string (remember that the `return` keyword is optional). -For example, you can use flow control statements (`if`, `switch`, etc) to execute a different script based on the process inputs. The only difference here is that you must explicitly declare the `script` guard, whereas before it was not required. Here is an example: +For example, you can use if-else statements to produce a different script based on the task inputs. The only difference here is that you must explicitly declare the `script` guard, whereas before it was not required. Here is an example: ```groovy mode = 'tcoffee' @@ -171,7 +171,7 @@ process align { } ``` -In the above example, the process will execute one of the script fragments depending on the value of the `mode` parameter. By default it will execute the `tcoffee` command, but changing the `mode` variable will cause a different branch to be executed. +In the above example, the process will execute one of several scripts depending on the value of the `mode` parameter. By default it will execute the `tcoffee` command. (process-template)= @@ -250,7 +250,7 @@ In the above example, `$USER` is treated as a Bash variable, while `!{str}` is t ### Native execution -Nextflow processes can also execute native Groovy code as the task itself, using the `exec` block. Whereas the `script` block defines a script to be executed, the `exec` block defines Groovy code to be executed directly. +Whereas the `script` block defines a script that is executed as a separate job, the `exec` block simply executes the code that it is given. For example: @@ -276,6 +276,8 @@ Hello Mr. a Hello Mr. c ``` +A native process is very similar to a {ref}`function `, but provides additional capabilities such as parallelism, caching, and progress logging. + (process-stub)= ## Stub @@ -492,7 +494,7 @@ In this case, `x.name` returns the file name with the parent directory (e.g. `my ### Multiple input files -A `path` input can also accept a collection of files instead of a single value. In this case, the input variable will be a Groovy list, and you can use it as such. +A `path` input can also accept a collection of files instead of a single value. In this case, the input variable will be a list, and you can use it as such. When the input has a fixed file name and a collection of files is received by the process, the file name will be appended with a numerical suffix representing its ordinal position in the list. For example: diff --git a/docs/script.md b/docs/script.md index fe3b2f2801..0dc404fe9f 100644 --- a/docs/script.md +++ b/docs/script.md @@ -2,13 +2,9 @@ # Scripts -Nextflow is a domain-specific language (DSL) based on Groovy, a general-purpose programming language for the Java virtual machine. Nextflow extends the Groovy syntax with features that ease the writing of computational pipelines in a declarative manner. +Nextflow is a workflow language that runs on the Java virtual machine (JVM). Nextflow's syntax is very similar to [Groovy](https://groovy-lang.org/), a scripting language for the JVM, but Nextflow is specialized for writing computational pipelines in a declarative manner. -For more background on Groovy, refer to these resources: - -- [Groovy User Guide](http://groovy-lang.org/documentation.html) -- [Groovy Cheat sheet](http://www.cheat-sheets.org/saved-copy/rc015-groovy_online.pdf) -- [Groovy in Action](http://www.manning.com/koenig2/) +Nextflow scripts can also make full use of the Java and Groovy standard libraries; see the {ref}`stdlib-page` page for more information. :::{warning} Nextflow uses UTF-8 as the default character encoding for source files. Make sure to use UTF-8 encoding when editing Nextflow scripts with your preferred text editor. @@ -218,7 +214,7 @@ result = myLongCmdline.execute().text In the preceding example, `blastp` and its `-in`, `-out`, `-db` and `-html` switches and their arguments are effectively a single line. :::{warning} -When using backslashes to continue a multi-line command, make sure to not put any spaces after the backslash, otherwise it will be interpreted by the Groovy lexer as an escaped space instead of a backslash, which will make your script incorrect. It will also print this warning: +When using backslashes to continue a multi-line command, make sure to not put any spaces after the backslash, otherwise it will be interpreted as an escaped space instead of a backslash, which will make your script incorrect. It will also print this warning: ``` unknown recognition error type: groovyjarjarantlr4.v4.runtime.LexerNoViableAltException @@ -277,6 +273,14 @@ println y // prints: nicenice ``` +To remove part of a string, simply replace it with a blank string: + +```groovy +z = 'Hello World!'.replaceFirst(/(?i)\s+Wo\w+/, '') +println z +// prints: Hello! +``` + ### Capturing groups You can match a pattern that includes groups. First create a matcher object with the `=~` operator. Then, you can index the matcher object to find the matches: `matcher[0]` returns a list representing the first match of the regular expression in the string. The first list element is the string that matches the entire regular expression, and the remaining elements are the strings that match each group. @@ -307,30 +311,7 @@ println patch // 3 println flavor // beta ``` -### Removing part of a string - -You can remove part of a `String` value using a regular expression pattern. The first match found is replaced with an empty String: - -```groovy -// define the regexp pattern -wordStartsWithGr = ~/(?i)\s+Gr\w+/ - -// apply and verify the result -('Hello Groovy world!' - wordStartsWithGr) == 'Hello world!' -('Hi Grails users' - wordStartsWithGr) == 'Hi users' -``` - -Remove the first 5-character word from a string: - -```groovy -assert ('Remove first match of 5 letter word' - ~/\b\w{5}\b/) == 'Remove match of 5 letter word' -``` - -Remove the first number with its trailing whitespace from a string: - -```groovy -assert ('Line contains 20 characters' - ~/\d+\s+/) == 'Line contains characters' -``` +(script-functions)= ## Functions @@ -462,9 +443,9 @@ Learn more about closures in the [Groovy documentation](http://groovy-lang.org/c ## Syntax sugar -Groovy provides several forms of "syntax sugar", or shorthands that can make your code easier to read. +Nextflow provides several forms of "syntax sugar", or shorthands that can make your code easier to read. -Some programming languages require every statement to be terminated by a semi-colon. In Groovy, semi-colons are optional, but they can still be used to write multiple statements on the same line: +Some programming languages require every statement to be terminated by a semi-colon. In Nextflow, semi-colons are optional, but they can still be used to write multiple statements on the same line: ```groovy println 'Hello!' ; println 'Hello again!' @@ -499,7 +480,3 @@ If the last argument is a closure, the closure can be written outside of the par // shorthand [1, 2, 3].inject('result:') { acc, v -> acc + ' ' + v } ``` - -:::{note} -In some cases, you might not be able to omit the parentheses because it would be syntactically ambiguous. You can use the `groovysh` REPL console to play around with Groovy and figure out what works. -::: diff --git a/docs/sharing.md b/docs/sharing.md index 021d0d768c..9e8cc38d98 100644 --- a/docs/sharing.md +++ b/docs/sharing.md @@ -117,7 +117,7 @@ For example, shebang definitions `#!/usr/bin/python` and `#!/usr/local/bin/pytho #### The `lib` directory -Any Groovy scripts or JAR files in the `lib` directory will be automatically loaded and made available to your pipeline scripts. The `lib` directory is a useful way to provide utility code or external libraries without cluttering the pipeline scripts. +Any Groovy scripts or Java libraries (JARs) in the `lib` directory will be automatically loaded and made available to your pipeline scripts. The `lib` directory is a useful way to provide utility code or external libraries without cluttering the pipeline scripts. ### Data diff --git a/docs/working-with-files.md b/docs/working-with-files.md index a52262ca98..dd7f278327 100644 --- a/docs/working-with-files.md +++ b/docs/working-with-files.md @@ -58,7 +58,7 @@ assert path.parent == '/some/path' ``` :::{tip} -In Groovy, any method that looks like `get*()` can also be accessed as a field. For example, `myFile.getName()` is equivalent to `myFile.name`, `myFile.getBaseName()` is equivalent to `myFile.baseName`, and so on. +When accessing an object property, any method that looks like `get*()` can also be accessed as a field. For example, `path.getName()` is equivalent to `path.name`, `path.getBaseName()` is equivalent to `path.baseName`, and so on. ::: See the {ref}`stdlib-types-path` reference for the list of available methods. From 24bb4031e5a72da2ee585f7d97fc00ec99227d0e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 26 Sep 2024 05:45:09 -0500 Subject: [PATCH 03/28] Rename "implicit workflow" -> "entry workflow" Signed-off-by: Ben Sherman --- docs/workflow.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index ccfef17043..4122bbc383 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -41,12 +41,12 @@ The `main:` label can be omitted if there are no `take:` or `emit:` blocks. Workflows were introduced in DSL2. If you are still using DSL1, see the {ref}`dsl1-page` page to learn how to migrate your Nextflow pipelines to DSL2. ::: -## Implicit workflow +## Entry workflow -A script can define a single workflow without a name (also known as the *implicit workflow*), which is the default entrypoint of the script. The `-entry` command line option can be used to execute a different workflow as the entrypoint at runtime. +A script can define a single workflow without a name (also known as the *entry workflow*), which is the default entrypoint of the script. The `-entry` command line option can be used to execute a different workflow as the entrypoint at runtime. :::{note} -Implicit workflow definitions are ignored when a script is included as a module. This way, a script can be written such that it can be either imported as a module or executed as a pipeline. +Entry workflow definitions are ignored when a script is included as a module. This way, a script can be written such that it can be either imported as a module or executed as a pipeline. ::: ## Named workflows @@ -82,7 +82,7 @@ workflow { ``` :::{tip} -The use of global variables and params in named workflows is discouraged because it breaks the modularity of the workflow. As a best practice, every workflow input should be explicitly defined as such in the `take:` block, and params should only be used in the implicit workflow. +The use of global variables and params in named workflows is discouraged because it breaks the modularity of the workflow. As a best practice, every workflow input should be explicitly defined as such in the `take:` block, and params should only be used in the entry workflow. ::: ## Workflow inputs (`take`) @@ -404,7 +404,7 @@ In the above snippet, the initial channel is piped to the {ref}`operator-map` op This feature requires the `nextflow.preview.output` feature flag to be enabled. ::: -A script may define the set of outputs that should be published by the implicit workflow, known as the workflow output definition: +A script may define the set of outputs that should be published by the entry workflow, known as the workflow output definition: ```groovy workflow { @@ -416,7 +416,7 @@ output { } ``` -The output definition must be defined after the implicit workflow. +The output definition must be defined after the entry workflow. ### Publishing channels From 5c0ec1d1e27a5c8b88635d7a2857c1d06f352a6c Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 27 Sep 2024 23:44:50 -0500 Subject: [PATCH 04/28] Initial syntax reference page Signed-off-by: Ben Sherman --- docs/index.md | 1 + docs/module.md | 2 + docs/process.md | 60 +-- docs/reference/cli.md | 3 + docs/reference/feature-flags.md | 2 +- docs/reference/operator.md | 4 +- docs/reference/stdlib.md | 27 ++ docs/reference/syntax.md | 831 ++++++++++++++++++++++++++++++++ docs/script.md | 81 +--- docs/workflow.md | 303 ++++++------ 10 files changed, 1020 insertions(+), 294 deletions(-) create mode 100644 docs/reference/syntax.md diff --git a/docs/index.md b/docs/index.md index 50d503411f..4b35977527 100644 --- a/docs/index.md +++ b/docs/index.md @@ -111,6 +111,7 @@ fusion :caption: Reference :maxdepth: 1 +reference/syntax reference/cli reference/config reference/env-vars diff --git a/docs/module.md b/docs/module.md index abf07a350b..66e577985c 100644 --- a/docs/module.md +++ b/docs/module.md @@ -96,6 +96,8 @@ workflow { } ``` +(module-params)= + ## Module parameters :::{deprecated} 24.07.0-edge diff --git a/docs/process.md b/docs/process.md index 1a7736b81f..ee707c1049 100644 --- a/docs/process.md +++ b/docs/process.md @@ -2,39 +2,23 @@ # Processes -In Nextflow, a **process** is the basic processing primitive to execute a user script. +In Nextflow, a **process** is a function that is specialized for executing scripts in a scalable and portable manner. -The process definition starts with the keyword `process`, followed by process name and finally the process body delimited by curly braces. The process body must contain a string which represents the command or, more generally, a script that is executed by it. A basic process looks like the following example: +Here is an example process definition: ```groovy process sayHello { + output: + path 'hello.txt' + + script: """ - echo 'Hello world!' > file + echo 'Hello world!' > hello.txt """ } ``` -A process may contain any of the following definition blocks: directives, inputs, outputs, when clause, and the process script. The syntax is defined as follows: - -``` -process < name > { - - [ directives ] - - input: - < process inputs > - - output: - < process outputs > - - when: - < condition > - - [script|shell|exec]: - < user script to be executed > - -} -``` +Refer to {ref}`syntax-process` in the syntax reference for a full description of the process syntax. (process-script)= @@ -276,7 +260,7 @@ Hello Mr. a Hello Mr. c ``` -A native process is very similar to a {ref}`function `, but provides additional capabilities such as parallelism, caching, and progress logging. +A native process is very similar to a {ref}`function `, but provides additional capabilities such as parallelism, caching, and progress logging. (process-stub)= @@ -1134,8 +1118,14 @@ In this example, the process is normally expected to produce an `output.txt` fil While this option can be used with any process output, it cannot be applied to individual elements of a [tuple](#output-tuples-tuple) output. The entire tuple must be optional or not optional. ::: +(process-when)= + ## When +:::{deprecated} 24.10.0 +Use conditional logic (e.g. `if` statement, {ref}`operator-filter` operator) in the calling workflow instead. +::: + The `when` block allows you to define a condition that must be satisfied in order to execute the process. The condition can be any expression that returns a boolean value. It can be useful to enable/disable the process execution depending on the state of various inputs and parameters. For example: @@ -1156,32 +1146,12 @@ process find { } ``` -:::{tip} -As a best practice, it is better to define such control flow logic in the workflow block, i.e. with an `if` statement or with channel operators, to make the process more portable. -::: - (process-directives)= ## Directives Directives are optional settings that affect the execution of the current process. -They must be entered at the top of the process body, before any other declaration blocks (`input`, `output`, etc), and have the following syntax: - -```groovy -// directive with simple value -name value - -// directive with list value -name arg1, arg2, arg3 - -// directive with map value -name key1: val1, key2: val2 - -// directive with value and options -name arg, opt1: val1, opt2: val2 -``` - By default, directives are evaluated when the process is defined. However, if the value is a dynamic string or closure, it will be evaluated separately for each task, which allows task-specific variables like `task` and `val` inputs to be used. Some directives are only supported by specific executors. Refer to the {ref}`executor-page` page for more information about each executor. diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 3d4ef1b8fd..92a356fb74 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -987,6 +987,9 @@ The `run` command is used to execute a local pipeline script or remote pipeline : Add the specified variable to execution environment. `-entry` +: :::{deprecated} 24.10.0 + Use params in the entry workflow to call different workflows from the command line. + ::: : Entry workflow to be executed. `-h, -help` diff --git a/docs/reference/feature-flags.md b/docs/reference/feature-flags.md index 616f1adfef..8af49f7d04 100644 --- a/docs/reference/feature-flags.md +++ b/docs/reference/feature-flags.md @@ -1,4 +1,4 @@ -(config-feature-flags)= +(feature-flags)= # Feature flags diff --git a/docs/reference/operator.md b/docs/reference/operator.md index 36f8f0d794..591e858d40 100644 --- a/docs/reference/operator.md +++ b/docs/reference/operator.md @@ -748,10 +748,10 @@ Available options: : The zero-based index of each item to use as the matching key. Can also be a list of indices, e.g. `by: [0, 2]` (default: `[0]`). `failOnDuplicate` -: When `true`, an error is reported when the operator receives multiple items from the same channel with the same key (default: `false`). Value is set to `true` if {ref}`strict mode ` is enabled. +: When `true`, an error is reported when the operator receives multiple items from the same channel with the same key (default: `false`). Value is set to `true` if {ref}`strict mode ` is enabled. `failOnMismatch` -: When `true`, an error is reported when the operator receives an item from one channel for which there no matching item from the other channel (default: `false`). Value is set to `true` if {ref}`strict mode ` is enabled. This option cannot be used with `remainder`. +: When `true`, an error is reported when the operator receives an item from one channel for which there no matching item from the other channel (default: `false`). Value is set to `true` if {ref}`strict mode ` is enabled. This option cannot be used with `remainder`. `remainder` : When `true`, unmatched items are emitted at the end, otherwise they are discarded (default: `false`). diff --git a/docs/reference/stdlib.md b/docs/reference/stdlib.md index febee3de55..977dbf2387 100644 --- a/docs/reference/stdlib.md +++ b/docs/reference/stdlib.md @@ -56,6 +56,9 @@ The following constants are globally available in a Nextflow script: `params` : Map of workflow parameters specified in the config file or as command line options. +: :::{note} + As a best practice, parameters should only be used in the entry workflow. + ::: `projectDir` : Alias of `workflow.projectDir`. @@ -327,6 +330,18 @@ The following methods are available for a `Duration` object: `getSeconds()`, `toSeconds()` : Get the duration value in seconds (rounded down). +(stdlib-list)= + +## List + +TODO + +(stdlib-map)= + +## Map + +TODO + (stdlib-types-memoryunit)= ## MemoryUnit @@ -685,3 +700,15 @@ The following methods are available for splitting and counting the records in fi `splitText()` : Splits a text file into a list of lines. See the {ref}`operator-splittext` operator for available options. + +(stdlib-set)= + +## Set + +TODO + +(stdlib-string)= + +## String + +TODO diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md new file mode 100644 index 0000000000..15eab4970e --- /dev/null +++ b/docs/reference/syntax.md @@ -0,0 +1,831 @@ +(syntax-page)= + +# Syntax + +This page provides a comprehensive description of the Nextflow language. + +## Comments + +Nextflow uses Java-style comments: `//` for a line comment, and `/* ... */` for a block comment: + +```groovy +println 'Hello world!' // line comment + +/* + * block comment + */ +println 'Hello again!' +``` + +## Top-level declarations + +A Nextflow script may contain the following top-level declarations: + +- Shebang +- Feature flags +- Includes +- Parameter definitions +- Workflow definitions +- Process definitions +- Function definitions +- Enum types +- Output block + +These declarations are in turn composed of statements and expressions. + +Alternatively, a script may contain one or more [statements](#statements), as long as there are no top-level declarations. In this case, the entire script will be treated as an entry workflow. + +For example, the following script: + +```groovy +println 'Hello world!' +``` + +Is equivalent to: + +```groovy +workflow { + println 'Hello world!' +} +``` + +:::{warning} +Top-level declarations and statements can not be mixed at the same level. If your script has top-level declarations, all statements must be contained within top-level declarations such as the entry workflow. +::: + +### Shebang + +The first line of a script can be a [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)): + +```sh +#!/usr/bin/env nextflow +``` + +### Feature flag + +A feature flag declaration is an assignment, where the target should be a valid {ref}`feature flag ` and the source should be a literal (e.g. number, string, boolean): + +```groovy +nextflow.preview.topic = true +``` + +### Include + +An include declaration consists of an *include source* and one or more *include clauses*: + +```groovy +include { foo ; bar as baz } from './some/module' +``` + +The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`). + +Each include clause should specify a name, and may also specify an *alias*. In the example above, `bar` is included under the alias `baz`. + +Include clauses can be separated by newlines or semi-colons, or they can be specified as separate includes: + +```groovy +// newlines +include { + foo + bar as baz +} from './some/module' + +// separate includes +include { foo } from './some/module' +include { bar as baz } from './some/module' +``` + +The following definitions can be included: + +- Functions +- Processes +- Named workflows + +### Parameter + +A parameter declaration is an assignment, where the target should be a pipeline parameter and the source should be an expression: + +```groovy +params.message = 'Hello world!' +``` + +Parameters supplied via command line options, params files, and config files take precedence over parameter definitions in a script. + +(syntax-workflow)= + +### Workflow + +A workflow consists of a name and a body. The workflow body consists of a *main* section, with additional sections for *takes*, *emits*, and *publishers* (shown later): + +```groovy +workflow greet { + take: + greetings + + main: + messages = greetings.map { v -> "$v world!" } + + emit: + messages +} +``` + +- The take, emit, and publish sections are optional. If they are not specified, the `main:` section label can be omitted. + +- The take section consists of one or more parameters. + +- The main section consists of one or more [statements](#statements). + +- The emit section consists of one or more *emit statements*. An emit statement can be a [variable name](#variable), an [assignment](#assignment), or an [expression statement](#expression-statement). If an emit statement is an expression statement, it must be the only emit. + +An alternative workflow form, known as an *entry workflow*, has no name and may only define a main and publish section: + +```groovy +workflow { + main: + greetings = Channel.of('Bonjour', 'Ciao', 'Hello', 'Hola') + messages = greetings.map { v -> "$v world!" } + greetings.view { it -> '$it world!' } + + publish: + messages >> 'messages' +} +``` + +- Only one entry workflow may be defined in a script. + +- The `main:` section label can be omitted if the publish section is not specified. + +- The publish section consists of one or more *publish statements*. A publish statement is a [right-shift expression](#binary-expressions), where the left-hand side is an expression that refers to a value in the workflow body, and the right-hand side is an expression that returns a string. + +- The publish section can also be specified in named workflows as a convenience, but is intended mainly to be used in the entry workflow. + +In order for a script to be executable, it must either define an entry workflow or use the implicit workflow syntax described [above](#top-level-declarations). + +Entry workflow definitions are ignored when a script is included as a module. This way, the same script can be included as a module or executed as a pipeline. + +(syntax-process)= + +### Process + +A process consists of a name and a body. The process body consists of one or more [statements](#statements). A minimal process definition must return a string: + +```groovy +process sayHello { + """ + echo 'Hello world!' + """ +} +``` + +A process may define additional sections for *directives*, *inputs*, *outputs*, *script*, *shell*, *exec*, and *stub*: + +```groovy +process greet { + // directives + errorStrategy 'retry' + tag { "${greeting}/${name}" } + + input: + val greeting + val name + + output: + stdout + + script: // or shell: or exec: + """ + echo '${greeting}, ${name}!' + """ + + stub: + """ + # do nothing + """ +} +``` + +- Each of these additional sections are optional. Directives do not have an explicit section label, but are simply defined first. + +- The `script:` section label can be omitted only when there are no other sections in the body. + +- Sections must be defined in the order shown above, with the exception of the output section, which can alternatively be specified after the script and stub. + +Each section may contain one or more statements. For directives, inputs, and outputs, these statements must be [function calls](#function-call). Refer to {ref}`process-reference` for the set of available input qualifiers, output qualifiers, and directives. + +The script section can be substituted with a shell or exec section: + +```groovy +process greetShell { + input: + val greeting + + shell: + ''' + echo '!{greeting}, ${USER}!' + ''' +} + +process greetExec { + input: + val greeting + val name + + exec: + message = "${greeting}, ${name}!" + + output: + val message +} +``` + +The script, shell, and stub sections must return a string in the same manner as a [function](#function). + +Refer to {ref}`process-page` for more information on the semantics of each process section. + +(syntax-function)= + +### Function + +A function consists of a name, parameter list, and a body: + +```groovy +def greet(greeting, name) { + println "${greeting}, ${name}!" +} +``` + +The function body consists of one or more [statements](#statements). The last statement is implicitly treated as a return statement if it is an [expression statement](#expression-statement) that returns a value. + +The [return statement](#return) can be used to explicitly return from a function: + +```groovy +// return with no value +def greet(greeting, name) { + if( !greeting || !name ) + return + println "${greeting}, ${name}!" +} + +// return a value +def fib(x) { + if( x <= 1 ) + return x + fib(x - 1) + fib(x - 2) +} +``` + +### Enum type + +An enum type declaration consists of a name and a body, which consists of a comma-separated list of identifiers: + +```groovy +enum Day { + MONDAY, + TUESDAY, + WEDNESDAY, + THURSDAY, + FRIDAY, + SATURDAY, + SUNDAY +} +``` + +Enum values can be accessed as `Day.MONDAY`, `Day.TUESDAY`, and so on. + +:::{note} +Enum types cannot be included across modules at this time. +::: + +### Output block + +The output block consists of one or more *target blocks*. A target block consists of a *target name* and one or more *target directives* for configuring the corresponding publish target: + +```groovy +output { + 'fastq' { + path 'samples' + index { + path 'index.csv' + } + } +} +``` + +Only one output block may be defined in a script. Refer to {ref}`workflow-output-def` for the set of available target directives. + +## Statements + +Statements should be separated by a newline or semi-colon: + +```groovy +// newline +println 'Hello!' +println 'Hello again!' + +// semi-colon +println 'Hello!' ; println 'Hello again!' +``` + +### Variable declaration + +Variables can be declared with the `def` keyword: + +```groovy +def x = 42 +``` + +Multiple variables can be declared in a single statement as long as the initializer is a [list literal](#list) with as many elements as declared variables: + +```groovy +def (x, y) = [ 1, 2 ] +``` + +Every variable has a *scope*, which determines the region of code in which the variable is defined. + +Variables declared in a function, as well as the parameters of that function, exist for the duration of that function call. The same applies to closures. + +Workflow inputs exist for the entire workflow body. Variables declared in the main section exist for the main, emit, and publish sections. Named outputs are not considered variable declarations and therefore do not have any scope. + +Process input variables exist for the entire process body. Variables declared in the process script, shell, exec, and stub sections exist only in their respective section, with one exception -- in these sections, a variable can be declared with the `def` keyword, in which case it will also exist in the output section. + +Variables declared in an if or else branch exist only within that branch: + +```groovy +if( true ) + def x = 'foo' +println x // error: `x` is undefined + +// solution: declare `x` outside of if branch +def x +if( true ) + x = 'foo' +println x +``` + +A variable cannot be declared with the same name as another variable in the same scope or any enclosing scope: + +```groovy +def clash(x) { + def x // error: `x` is already declared + if( true ) + def x // error: `x` is already declared +} +``` + +### Assignment + +An assignment statement consists of a *target* expression and a *source* expression separated by an equals sign: + +```groovy +v = 42 +list[0] = 'first' +map.key = 'value' +``` + +The target expression must be a [variable](#variable), [index](#binary-expressions), or [property](#binary-expressions) expression. The source expression can be any expression. + +Multiple variables can be assigned in a single statement as long as the source expression is a [list literal](#list) with as many elements as assigned variables: + +```groovy +(x, y) = [ 1, 2 ] +``` + +### Expression statement + +Any [expression](#expressions) can also be a statement. + +In general, the only expressions that can have any effect as expression statements are function calls that have side effects (e.g. `println`) or an implicit return statement (e.g. in a function or closure). + +### assert + +An assert statement consists of the `assert` keyword followed by a boolean expression, with an optional error message separated by a colon: + +```groovy +assert 2 + 2 == 4 : 'The math broke!' +``` + +If the condition is false, an error will be raised with the given error message. + +### if / else + +An if/else statement consists of an *if branch* and an optional *else branch*. Each branch consists of a boolean expression in parentheses, followed by either a single statement or a *block statement* (one or more statements in curly braces). + +```groovy +def x = Math.random() +if( x < 0.5 ) { + println 'You lost.' +} +else { + println 'You won!' +} +``` + +If the condition is true, the if branch will be executed, otherwise the else branch will be executed. + +If / else statements can be chained any number of times by making the else branch another if / else statement: + +```groovy +def grade = 89 +if( grade >= 90 ) + println 'You get an A!' +else if( grade >= 80 ) + println 'You get a B!' +else if( grade >= 70 ) + println 'You get a C!' +else if( grade >= 60 ) + println 'You get a D!' +else + println 'You failed.' +``` + +A more verbose way to write the same code would be: + +```groovy +def grade = 89 +if( grade >= 90 ) { + println 'You get an A!' +} +else { + if( grade >= 80 ) { + println 'You get a B!' + } + else { + if( grade >= 70 ) { + println 'You get a C!' + } + else { + if( grade >= 60 ) { + println 'You get a D!' + } + else { + println 'You failed.' + } + } + } +} +``` + +### return + +A return statement consists of the `return` keyword with an optional expression: + +```groovy +def add(a, b) { + return a + b +} + +def sayHello(name) { + if( !name ) + return + println "Hello, ${name}!" +} +``` + +Return statements can only be used in functions and closures. In the case of a nested closure, the return statement will return from the nearest enclosing closure. + +If a function or closure has multiple return statements (including implicit returns), all of the return statements should either return a value or return nothing. If a function or closure does return a value, it should do so for every conditional branch. + +```groovy +def isEven1(n) { + if( n % 2 == 1 ) + return // error: return value is required here + return true +} + +def isEven2(n) { + if( n % 2 == 0 ) + return true + // error: return value is required here +} +``` + +Note that if the last statement is not a return or expression statement (implicit return), it is equivalent to appending an empty return. + +### throw + +A throw statement consists of the `throw` keyword followed by an expression that returns an error type: + +```groovy +throw new Exception('something failed!') +``` + +:::{note} +In general, the appropriate way to raise an error is to use the {ref}`error ` function: +```groovy +error 'something failed!' +``` +::: + +### try / catch + +A try / catch statement consists of a *try block* followed by any number of *catch clauses*: + +```groovy +def text = null +try { + text = file('foo.txt').text +} +catch( IOException e ) { + log.warn "Could not load foo.txt" +} +``` + +The try block will be executed, and if an error is raised and matches the expected error type of a catch clause, the code in that catch clause will be executed. If no catch clause is matched, the error will be raised to the next enclosing try / catch statement, or to the Nextflow runtime. + +## Expressions + +### Variable + +A variable expression is a reference to a variable or other defined name: + +```groovy +def x = 42 + +x +// -> 42 +``` + +### Number + +A number literal can be an integer or real (i.e. floating-point) number. Integers can specified in binary with `0b`, octal with `0`, or hexadecimal with `0x`. Real numbers can use scientific notation with the `e` or `E` prefix. Underscores can be used as thousands separators to make long numbers more readable. + +```groovy +// integer +42 +-1 +0b1001 // -> 9 +031 // -> 25 +0xabcd // -> 43981 + +// real +3.14 +-0.1 +1.59e7 // -> 15_900_000 +1.59e-7 // -> 0.000000159 +``` + +### Boolean + +A boolean literal can be `true` or `false`: + +```groovy +assert true != false +assert !true == false +assert true == !false +``` + +### Null + +The null literal is specified as `null`. It can be used to represent an "empty" value: + +```groovy +def x = null +x = 42 +``` + +:::{note} +Attempting to use a null value (e.g. index or property access) will cause a "null reference" error. It is best to avoid the use of `null` where possible. +::: + +### String + +A string literal consists of arbitrary text enclosed by single or double quotes: + +```groovy +println "I said 'hello'" +println 'I said "hello" again!' +``` + +A triple-quoted string can span multiple lines: + +```groovy +println ''' + Hello, + How are you today? + ''' + +println """ + We don't have to escape quotes anymore! + Even "double" quotes! + """ +``` + +A *slashy string* is enclosed by slashes instead of quotes: + +```groovy +/no escape!/ +``` + +Slashy strings can also span multiple lines: + +```groovy +/ +Patterns in the code, +Symbols dance to match and find, +Logic unconfined. +/ +``` + +Note that a slashy string cannot be empty because it would become a line comment. + +Refer to {ref}`stdlib-string` for the set of available string operations. + +### Dynamic string + +Double-quoted strings can be interpolated using the `${}` placeholder, which can contain any expression: + +```groovy +def names = ['Thing 1', 'Thing 2'] +println "Hello, ${names.join(' and ')}!" +// -> Hello, Thing 1 and Thing 2! +``` + +If the expression is a name or simple property expression (one or more identifiers separated by dots), the curly braces can be omitted: + +```groovy +def name = [first: '', last: ''] +println "Hello, ${name.first} ${name.last}!" +// -> Hello, ! +``` + +Multi-line double-quoted strings can also be interpolated: + +```groovy +""" +blastp \ + -in $input \ + -out $output \ + -db $blast_db \ + -html +""" +``` + +### List + +A list literal consists of square brackets with a comma-separated list of zero or more expressions: + +```groovy +[1, 2, 3] +``` + +Refer to {ref}`stdlib-list` for the set of available list operations. + +### Map + +A map literal consists of square brackets with a comma-separated list of one or more key-value pairs, with the key and value separated by a colon: + +```groovy +[foo: 1, bar: 2, baz: 3] +``` + +The empty map contains a single colon to distinguish it from an empty list: + +```groovy +[:] +``` + +Both the key and value can be any expression. Identifier keys are treated as string literals (i.e. the quotes can be omitted). To reference a variable as a key, simply wrap it in parentheses: + +```groovy +def x = 'foo' +[(x): 1] +// -> ['foo': 1] +``` + +Refer to {ref}`stdlib-map` for the set of available map operations. + +### Closure + +A closure, also known as an anonymous function, consists of a parameter list followed by zero or more statements, wrapped in curly braces: + +```groovy +{ a, b -> a + b } +``` + +The above closure takes two arguments and returns their sum. + +The closure body is identical to that of a [function](#function). Statements should be separated by newlines or semi-colons, and the last statement is implicitly treated as a [return statement](#return): + +```groovy +{ v -> + println 'Hello!' + println "We're in a closure!" + println 'Goodbye...' + v * v +} +``` + +Closures can access variables outside of their scope: + +```groovy +def factor = 2 +println [1, 2, 3].collect { v -> factor * v } +// -> [2, 4, 6] +``` + +And they can declare local variables that exist only for the lifetime of each closure invocation: + +```groovy +def result = 0 +[1, 2, 3].each { v -> + def squared = v * v + result += squared +} + +println result +// -> 14 +``` + +Refer to the {ref}`standard library ` and {ref}`operator ` reference pages for examples of closures being used in practice. + +### Function call + +A function call consists of a name and argument list: + +```groovy +printf('Hello %s!\n', 'World') +``` + +TODO: object expression, named args + +When the function call is also an [expression statement](#expression-statement) and there is at least one argument, the parentheses can be omitted: + +```groovy +printf 'Hello %s!\n', 'World' +``` + +If the last argument is a closure, it can be specified outside of the parentheses: + +```groovy +// closure arg with additional args +[1, 2, 3].inject('result:') { acc, v -> acc + ' ' + v } + +// single closure arg +[1, 2, 3].each() { v -> println v } + +// single closure arg without parentheses +[1, 2, 3].each { v -> println v } +``` + +### Constructor call + +TODO + +### Unary expressions + +TODO + +### Binary expressions + +TODO + +**Regex finder** + +The `=~` operator checks whether a string contains a pattern: + +```groovy +assert 'foo' =~ /foo/ // true +assert 'foobar' =~ /foo/ // true +``` + +**Regex matcher** + +The `==~` operator checks whether a string matches a pattern exactly: + +```groovy +assert 'foo' ==~ /foo/ // true +assert 'foobar' ==~ /foo/ // false +``` + +**Index expression** + +TODO + +**Property expression** + +TODO + +### Ternary expression + +TODO + +### Parentheses + +Any expression can be wrapped in a set of parentheses to enforce a particular order of operations: + +```groovy +1 + 2 * 3 +// -> 1 + 6 -> 7 + +(1 + 2) * 3 +// -> 3 * 3 -> 9 +``` + +## Deprecations + +The following legacy features were excluded from this page because they are deprecated: + +- The `addParams` and `params` clauses of include declarations (see {ref}`module-params`) +- The `when:` section of a process definition (see {ref}`process-when`) +- The implicit `it` closure parameter diff --git a/docs/script.md b/docs/script.md index 0dc404fe9f..b2b1a575a1 100644 --- a/docs/script.md +++ b/docs/script.md @@ -2,7 +2,7 @@ # Scripts -Nextflow is a workflow language that runs on the Java virtual machine (JVM). Nextflow's syntax is very similar to [Groovy](https://groovy-lang.org/), a scripting language for the JVM, but Nextflow is specialized for writing computational pipelines in a declarative manner. +Nextflow is a workflow language that runs on the Java virtual machine (JVM). Nextflow's syntax is very similar to [Groovy](https://groovy-lang.org/), a scripting language for the JVM, but Nextflow is specialized for writing computational pipelines in a declarative manner. Refer to the {ref}`syntax-page` page for a full description of the Nextflow language. Nextflow scripts can also make full use of the Java and Groovy standard libraries; see the {ref}`stdlib-page` page for more information. @@ -311,43 +311,6 @@ println patch // 3 println flavor // beta ``` -(script-functions)= - -## Functions - -Functions can be defined using the following syntax: - -```groovy -def ( arg1, arg, .. ) { - -} -``` - -For example: - -```groovy -def foo() { - 'Hello world' -} - -def bar(alpha, omega) { - alpha + omega -} -``` - -The above snippet defines two simple functions, that can be invoked in the workflow script as `foo()`, which returns `'Hello world'`, and `bar(10, 20)`, which returns the sum of two parameters (`30` in this case). - -Functions implicitly return the result of the last statement. Additionally, the `return` keyword can be used to explicitly exit from a function and return the specified value. For example: - -```groovy -def fib( x ) { - if( x <= 1 ) - return x - - fib(x-1) + fib(x-2) -} -``` - (script-closure)= ## Closures @@ -438,45 +401,3 @@ result = counts.values().inject { sum, v -> sum + v } ``` This way, the closure is fully "self-contained" because it doesn't access or mutate any variables outside of its scope. - -Learn more about closures in the [Groovy documentation](http://groovy-lang.org/closures.html) - -## Syntax sugar - -Nextflow provides several forms of "syntax sugar", or shorthands that can make your code easier to read. - -Some programming languages require every statement to be terminated by a semi-colon. In Nextflow, semi-colons are optional, but they can still be used to write multiple statements on the same line: - -```groovy -println 'Hello!' ; println 'Hello again!' -``` - -When calling a function, the parentheses around the function arguments are optional: - -```groovy -// full syntax -printf('Hello %s!\n', 'World') - -// shorthand -printf 'Hello %s!\n', 'World' -``` - -It is especially useful when calling a function with a closure parameter: - -```groovy -// full syntax -[1, 2, 3].each({ v -> println v }) - -// shorthand -[1, 2, 3].each { v -> println v } -``` - -If the last argument is a closure, the closure can be written outside of the parentheses: - -```groovy -// full syntax -[1, 2, 3].inject('result:', { acc, v -> acc + ' ' + v }) - -// shorthand -[1, 2, 3].inject('result:') { acc, v -> acc + ' ' + v } -``` diff --git a/docs/workflow.md b/docs/workflow.md index 4122bbc383..97b13f3da7 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -2,73 +2,42 @@ # Workflows -In Nextflow, a **workflow** is a composition of processes and dataflow logic (i.e. channels and operators). +In Nextflow, a **workflow** is a function that is specialized for composing processes and dataflow logic (i.e. channels and operators). -The workflow definition starts with the keyword `workflow`, followed by an optional name, and finally the workflow body delimited by curly braces. A basic workflow looks like the following example: +A script can define a workflow without a name, known as the *entry workflow*, which is the entrypoint of the script: ```groovy workflow { - foo() + Channel.of('Bonjour', 'Ciao', 'Hello', 'Hola') + | map { v -> "$v world!" } + | view } ``` -Where `foo` could be a function, a process, or another workflow. - -Workflows are *lazily executed*, which means that Nextflow parses the entire workflow structure first, and then executes the entire workflow at once. The order in which a task is executed is determined only by its dependencies, so a task will be executed as soon as all of its required inputs are available. - -The syntax of a workflow is defined as follows: +A *named workflow*, on the other hand, is a workflow that can be called from other workflows: ```groovy -workflow [ name ] { - - take: - < workflow inputs > - - main: - < dataflow statements > - - emit: - < workflow outputs > - -} -``` - -:::{tip} -The `main:` label can be omitted if there are no `take:` or `emit:` blocks. -::: - -:::{note} -Workflows were introduced in DSL2. If you are still using DSL1, see the {ref}`dsl1-page` page to learn how to migrate your Nextflow pipelines to DSL2. -::: - -## Entry workflow - -A script can define a single workflow without a name (also known as the *entry workflow*), which is the default entrypoint of the script. The `-entry` command line option can be used to execute a different workflow as the entrypoint at runtime. - -:::{note} -Entry workflow definitions are ignored when a script is included as a module. This way, a script can be written such that it can be either imported as a module or executed as a pipeline. -::: - -## Named workflows - -A named workflow is a workflow that can be invoked from other workflows. For example: - -```groovy -workflow my_pipeline { +workflow my_workflow { foo() bar( foo.out.collect() ) } workflow { - my_pipeline() + my_workflow() } ``` -The above snippet defines a workflow named `my_pipeline`, that can be invoked from another workflow as `my_pipeline()`, just like any other function or process. +The above example defines a workflow named `my_workflow`, that can be called from another workflow as `my_workflow()`, just like any other function or process. Both `foo` and `bar` could be any other process or workflow. + +Refer to {ref}`syntax-workflow` in the syntax reference for a full description of the workflow syntax. -## Using variables and params +:::{note} +Workflows were introduced in DSL2. If you are still using DSL1, see the {ref}`dsl1-page` page to learn how to migrate your Nextflow pipelines to DSL2. +::: + +## Using parameters -A workflow can access any variable or parameter defined in the global scope: +Parameters can be defined in the script with a default value, which can be overridden by params from the CLI, params file, or config file. They can then be used by the entry workflow: ```groovy params.data = '/some/data/file' @@ -81,16 +50,16 @@ workflow { } ``` -:::{tip} -The use of global variables and params in named workflows is discouraged because it breaks the modularity of the workflow. As a best practice, every workflow input should be explicitly defined as such in the `take:` block, and params should only be used in the entry workflow. +:::{note} +While params can also be used by named workflows, this practice is deprecated and will not be supported in the future. Named workflows should receive their inputs explicitly through the `take:` section. ::: ## Workflow inputs (`take`) -A workflow can declare one or more input channels using the `take` keyword. For example: +The `take:` section is used to declare workflow inputs: ```groovy -workflow my_pipeline { +workflow my_workflow { take: data1 data2 @@ -101,24 +70,20 @@ workflow my_pipeline { } ``` -:::{warning} -When the `take` keyword is used, the beginning of the workflow body must be defined with the `main` keyword. -::: - -Inputs can be specified like arguments when invoking the workflow: +Inputs can be specified like arguments when calling the workflow: ```groovy workflow { - my_pipeline( channel.from('/some/data') ) + my_workflow( Channel.of('/some/data') ) } ``` ## Workflow outputs (`emit`) -A workflow can declare one or more output channels using the `emit` keyword. For example: +The `emit:` section is used to declare workflow outputs: ```groovy -workflow my_pipeline { +workflow my_workflow { main: foo(data) bar(foo.out) @@ -128,14 +93,12 @@ workflow my_pipeline { } ``` -When invoking the workflow, the output channel(s) can be accessed using the `out` property, i.e. `my_pipeline.out`. When multiple output channels are declared, use the array bracket notation or the assignment syntax to access each output channel as described for [process outputs](#process-outputs). - -### Named outputs +When calling the workflow, the output can be accessed using the `out` property, i.e. `my_workflow.out`. -If an output channel is assigned to an identifier in the `emit` block, the identifier can be used to reference the channel from the calling workflow. For example: +If an output is assigned to a name, the name can be used to reference the output from the calling workflow. For example: ```groovy -workflow my_pipeline { +workflow my_workflow { main: foo(data) bar(foo.out) @@ -145,18 +108,22 @@ workflow my_pipeline { } ``` -The result of the above workflow can be accessed using `my_pipeline.out.my_data`. +The result of the above workflow can be accessed using `my_workflow.out.my_data`. + +:::{note} +When multiple output channels are declared, every output must be assigned to a name. +::: (workflow-process-invocation)= -## Invoking processes +## Calling processes and workflows -A process can be invoked like a function in a workflow definition, passing the expected input channels like function arguments. For example: +Processes and workflows are called like functions, passing their inputs as arguments: ```groovy process foo { output: - path 'foo.txt' + path 'foo.txt', emit: txt script: """ @@ -169,7 +136,7 @@ process bar { path x output: - path 'bar.txt' + path 'bar.txt', emit: txt script: """ @@ -177,140 +144,135 @@ process bar { """ } -workflow { - data = channel.fromPath('/some/path/*.txt') +workflow flow { + take: + data + + main: foo() bar(data) } -``` - -:::{warning} -A process can be only be invoked once in a single workflow, however you can get around this restriction by using {ref}`module-aliases`. -::: - -### Process composition -Processes with matching input/output declarations can be composed so that the output of the first process is passed as input to the second process. The previous example can be rewritten as follows: - -```groovy workflow { - bar(foo()) + data = Channel.fromPath('/some/path/*.txt') + flow(data) } ``` -### Process outputs +Processes and workflows have a few extra rules around how they are called: + +- Processes and workflows can only be called by workflows + +- A given process or workflow can only be called once in a given workflow. To use a process or workflow multiple times in the same workflow, use {ref}`module-aliases`. -A process output can be accessed using the `out` attribute on the corresponding process object. For example: +The "return value" of a process or workflow call is the process outputs or workflow emits, respectively. The return value can be assigned to a variable or passed into another call: ```groovy -workflow { - foo() - bar(foo.out) - bar.out.view() -} -``` +workflow flow { + take: + data -When a process defines multiple output channels, each output can be accessed by index (`out[0]`, `out[1]`, etc.) or by name (see below). + main: + bar_out = bar(foo(data)) -The process output(s) can also be accessed like the return value of a function: + emit: + bar_out +} -```groovy workflow { - f_out = foo() - (b1, b2) = bar(f_out) - b1.view() + data = Channel.fromPath('/some/path/*.txt') + flow_out = flow(data) } ``` -#### Named outputs - -The `emit` option can be added to the process output definition to assign a name identifier. This name can be used to reference the channel from the calling workflow. For example: +Named outputs can be accessed as properties of the return value: ```groovy -process foo { - output: - path '*.bam', emit: samples_bam +workflow flow { + take: + data - ''' - your_command --here - ''' -} + main: + foo_out = foo(data) + bar_out = bar(foo_out.txt) -workflow { - foo() - foo.out.samples_bam.view() + emit: + bar = bar_out.txt } -``` -When referencing a named output directly from the process invocation, you can use a more concise syntax: - -```groovy workflow { - ch_samples = foo().samples_bam + data = Channel.fromPath('/some/path/*.txt') + flow_out = flow(data) + bar_out = flow_out.bar } ``` -See {ref}`naming process outputs ` for more details. - -#### Named stdout - -The `emit` option can also be used to name a `stdout` output. However, while process output options are usually prefixed with a comma, this is not the case for `stdout`. This is because `stdout` does not have an argument like other types. - +As a convenience, process and workflow outputs can also be accessed without first assigning to a variable, by using the `.out` property of the process or workflow name: ```groovy -process sayHello { - input: - val cheers +workflow flow { + take: + data - output: - stdout emit: verbiage + main: + foo(data) + bar(foo.out) - script: - """ - echo -n $cheers - """ + emit: + bar = bar.out } workflow { - things = channel.of('Hello world!', 'Yo, dude!', 'Duck!') - sayHello(things) - sayHello.out.verbiage.view() + data = Channel.fromPath('/some/path/*.txt') + flow(data) + flow.out.bar.view() } ``` -## Invoking workflows +:::{note} +Process named outputs are defined using the `emit` option on a process output. See {ref}`naming process outputs ` for more details. +::: + +:::{note} +Process and workflow outputs can also be accessed by index (e.g. `foo.out[0]`, `foo.out[1]`, etc), but this practice is deprecated and will not be supported in the future. Access multiple outputs by name instead. +::: -Named workflows can be invoked and composed just like any other process or function. +Workflows can be composed in the same way: ```groovy workflow flow1 { - take: data + take: + data + main: - foo(data) - bar(foo.out) + foo(data) + bar(foo.out) + emit: - bar.out + bar.out } workflow flow2 { - take: data + take: + data + main: - foo(data) - baz(foo.out) + foo(data) + baz(foo.out) + emit: - baz.out + baz.out } workflow { - take: data - main: - flow1(data) - flow2(flow1.out) + data = Channel.fromPath('/some/path/*.txt') + flow1(data) + flow2(flow1.out) } ``` :::{note} -Each workflow invocation has its own scope. As a result, the same process can be invoked in two different workflow scopes, like `foo` in the above snippet, which is used in both `flow1` and `flow2`. The workflow execution path, along with the process names, determines the *fully qualified process name* that is used to distinguish the different process invocations, i.e. `flow1:foo` and `flow2:foo` in the above example. +The same process can be called in different workflows without using an alias, like `foo` in the above example, which is used in both `flow1` and `flow2`. The workflow call stack determines the *fully qualified process name*, which is used to distinguish the different process calls, i.e. `flow1:foo` and `flow2:foo` in the above example. ::: :::{tip} @@ -319,9 +281,11 @@ The fully qualified process name can be used as a {ref}`process selector v.toUpperCase() } | view + Channel.of('Hello','Hola','Ciao') + | foo + | map { v -> v.toUpperCase() } + | view } ``` -The above snippet defines a process named `foo` and invokes it with the `data` channel. The result is then piped to the {ref}`operator-map` operator, which converts each string to uppercase, and finally to the {ref}`operator-view` operator which prints it. - -:::{tip} -Statements can also be split across multiple lines for better readability: +The pipe chain shown above is equivalent to the following: ```groovy workflow { - channel.from('Hello','Hola','Ciao') - | foo - | map { v -> v.toUpperCase() } - | view + foo( Channel.of('Hello','Hola','Ciao') ) + .map { v -> v.toUpperCase() } + .view() } ``` -::: ### And `&` -The `&` *and* operator can be used to feed multiple processes with the same channel(s). For example: +The `&` *and* operator can be used to call multiple processes in parallel with the same channel(s): ```groovy process foo { @@ -383,15 +345,24 @@ process bar { } workflow { - channel.from('Hello') - | map { v -> v.reverse() } - | (foo & bar) - | mix - | view + Channel.of('Hello') + | map { v -> v.reverse() } + | (foo & bar) + | mix + | view } ``` -In the above snippet, the initial channel is piped to the {ref}`operator-map` operator, which reverses the string value. Then, the result is passed to the processes `foo` and `bar`, which are executed in parallel. Each process outputs a channel, and the two channels are combined using the {ref}`operator-mix` operator. Finally, the result is printed using the {ref}`operator-view` operator. +The above example is equivalent to the following: + +```groovy +workflow { + ch = Channel.of('Hello').map { v -> v.reverse() } + ch_foo = foo(ch) + ch_bar = bar(ch) + ch_foo.mix(ch_bar).view() +} +``` (workflow-output-def)= From 39ee1c9140df58a442e764d28667c898761b337f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 28 Sep 2024 11:56:12 -0500 Subject: [PATCH 05/28] Revert non-docs changes Signed-off-by: Ben Sherman --- tests/blast-dsl2.nf | 18 +++++------ tests/blast-parallel-dsl2.nf | 31 +++++++------------ tests/collect_and_merge.nf | 2 +- tests/complex-names-dsl2.nf | 8 ++--- tests/config-labels.config | 2 +- ...included.config => config-labels.included} | 0 tests/config-labels.nf | 16 +++++----- tests/dynamic-filename.nf | 5 +-- tests/env-out.nf | 4 +-- tests/env2.nf | 2 +- tests/error-finish.nf | 11 ++++--- tests/files.nf | 3 +- tests/output-dsl.nf | 4 --- tests/output-globs.nf | 24 +++++++------- tests/output-val-dsl2.nf | 4 ++- tests/profiles.config | 3 +- tests/publish-saveas.nf | 4 +-- tests/race.nf | 4 +-- tests/rnaseq-toy-dsl2.nf | 7 +++-- tests/sets.nf | 12 +++---- tests/singleton.nf | 2 +- tests/subworkflow-dsl2.nf | 5 +-- tests/task-escape-path-dsl2.nf | 20 +++++------- tests/task-retry.nf | 6 ++-- tests/template-dyn.nf | 4 +-- tests/tuples-dsl2.nf | 24 +++++++------- tests/watch-dsl2.nf | 16 +++++----- tests/when-block.nf | 11 +++---- tests/workdir-with-blank.nf | 2 +- validation/test-complexpaths.nf | 10 +++--- validation/test-overwrite.nf | 2 +- 31 files changed, 126 insertions(+), 140 deletions(-) rename tests/{config-labels-included.config => config-labels.included} (100%) diff --git a/tests/blast-dsl2.nf b/tests/blast-dsl2.nf index 3b4f0c3063..7a8d96e25b 100644 --- a/tests/blast-dsl2.nf +++ b/tests/blast-dsl2.nf @@ -4,16 +4,17 @@ params.db = "$baseDir/blast-db/tiny" params.query = "$baseDir/data/sample.fa" params.chunkSize = 1 +DB = file(params.db) + process blast { input: path 'seq.fa' - path db output: path 'out' """ - blastp -db $db -query seq.fa -outfmt 6 > out + blastp -db $DB -query seq.fa -outfmt 6 > out """ } @@ -31,11 +32,10 @@ process sort { workflow { - ch_fasta = Channel.fromPath(params.query) - | splitFasta( by: params.chunkSize, file:true ) - - blast(ch_fasta, file(params.db)) - | collect - | sort - | subscribe { hits -> println hits } + Channel.fromPath(params.query) | + splitFasta( by: params.chunkSize, file:true ) | + blast | + collect | + sort | + subscribe { println it } } diff --git a/tests/blast-parallel-dsl2.nf b/tests/blast-parallel-dsl2.nf index 2b44327651..a2b3addbd7 100644 --- a/tests/blast-parallel-dsl2.nf +++ b/tests/blast-parallel-dsl2.nf @@ -4,16 +4,17 @@ params.db = "$baseDir/blast-db/tiny" params.query = "$baseDir/data/sample.fa" params.chunk = 1 +db = file(params.db) + /* * Extends a BLAST query for each entry in the 'chunks' channel */ process blast { input: path 'query.fa' - path db output: - path 'top_hits' + path top_hits """ blastp -db ${db} -query query.fa -outfmt 6 > blast_result @@ -27,15 +28,11 @@ process blast { process extract { input: path top_hits - path db output: path 'sequences' - script: - """ - blastdbcmd -db ${db} -entry_batch top_hits | head -n 10 > sequences - """ + "blastdbcmd -db ${db} -entry_batch top_hits | head -n 10 > sequences" } @@ -48,24 +45,18 @@ process align { input: path all_seq - script: - """ - t_coffee $all_seq 2>/dev/null | tee align_result - """ + "t_coffee $all_seq 2>/dev/null | tee align_result" } /* * main flow */ workflow { - db = file(params.db) - - ch_fasta = Channel.fromPath(params.query) - | splitFasta(by: params.chunk, file:true) - - ch_sequences = blast(ch_fasta, db) + Channel.fromPath(params.query) | + splitFasta(by: params.chunk, file:true) | + blast | + extract | + collectFile(name:'all_seq') | // Collect all hits to a single file called 'all_seq' + align - extract(ch_sequences, db) - | collectFile(name:'all_seq') // Collect all hits to a single file called 'all_seq' - | align } diff --git a/tests/collect_and_merge.nf b/tests/collect_and_merge.nf index 853afc85fb..b77a742373 100644 --- a/tests/collect_and_merge.nf +++ b/tests/collect_and_merge.nf @@ -44,7 +44,7 @@ process merge { debug true input: - tuple val(barcode), val(seq_id), path(bam), path(bai) + tuple val(barcode), val(seq_id), file(bam: 'bam?'), file(bai: 'bai?') """ echo barcode: $barcode diff --git a/tests/complex-names-dsl2.nf b/tests/complex-names-dsl2.nf index bd44448768..90e6cb0fa5 100644 --- a/tests/complex-names-dsl2.nf +++ b/tests/complex-names-dsl2.nf @@ -12,7 +12,7 @@ process foo { path '.alpha' script: - """ + $/ echo A > hello.txt echo B > sample.zip echo C > sample.html @@ -24,7 +24,7 @@ process foo { echo 3 > f3.fa mkdir .alpha echo "Hello world!" > .alpha/hello.txt - """ + /$ } process bar { @@ -34,10 +34,10 @@ process bar { path '*' script: - """ + $/ cat .alpha/hello.txt [ `cat * | grep -c ''` == 9 ] || false - """ + /$ } /* diff --git a/tests/config-labels.config b/tests/config-labels.config index 51af5f4f80..294fa3900a 100644 --- a/tests/config-labels.config +++ b/tests/config-labels.config @@ -32,6 +32,6 @@ profiles { } test3 { - includeConfig 'config-labels-included.config' + includeConfig 'config-labels.included' } } diff --git a/tests/config-labels-included.config b/tests/config-labels.included similarity index 100% rename from tests/config-labels-included.config rename to tests/config-labels.included diff --git a/tests/config-labels.nf b/tests/config-labels.nf index 7f261177bf..c152e9d76c 100644 --- a/tests/config-labels.nf +++ b/tests/config-labels.nf @@ -23,30 +23,30 @@ workflow { process alpha { debug true - """ + / echo alpha memry: ${task.memory} echo alpha queue: ${task.queue} - """ + / } process beta { debug true label 'small' - """ + / echo beta memry: ${task.memory} echo beta queue: ${task.queue} - """ + / } process delta { debug true label 'big' - """ + / echo delta memry: ${task.memory} echo delta queue: ${task.queue} - """ + / } process gamma { @@ -55,8 +55,8 @@ process gamma { memory 40.MB queue 'foo' - """ + / echo gamma memry: ${task.memory} echo gamma queue: ${task.queue} - """ + / } diff --git a/tests/dynamic-filename.nf b/tests/dynamic-filename.nf index ff0e849dd5..7a3b1c1346 100644 --- a/tests/dynamic-filename.nf +++ b/tests/dynamic-filename.nf @@ -17,6 +17,9 @@ params.prefix = 'my' +data = 'Hello\n' +list = ['alpha', 'delta', 'gamma', 'omega'] + process foo { input: @@ -33,7 +36,5 @@ process foo { } workflow { - data = 'Hello\n' - list = ['alpha', 'delta', 'gamma', 'omega'] foo(list, data) | subscribe { println "~ Saving ${it.name}"; it.copyTo('.') } } diff --git a/tests/env-out.nf b/tests/env-out.nf index 749d919081..e33b6e297d 100644 --- a/tests/env-out.nf +++ b/tests/env-out.nf @@ -17,14 +17,14 @@ process foo { output: - env 'FOO' + env FOO /FOO=Hello/ } process bar { debug true input: - env 'FOO' + env FOO 'echo "bar says $FOO"' } diff --git a/tests/env2.nf b/tests/env2.nf index 23df6bfca9..9f4cc8dfb4 100644 --- a/tests/env2.nf +++ b/tests/env2.nf @@ -19,7 +19,7 @@ process printEnv { debug true input: - env 'HELLO' + env HELLO ''' echo $HELLO world! diff --git a/tests/error-finish.nf b/tests/error-finish.nf index 88d2832fa1..8f70d60605 100644 --- a/tests/error-finish.nf +++ b/tests/error-finish.nf @@ -42,11 +42,12 @@ process bar { ''' } + +workflow.onError { + println "success: $workflow.success" + println "exitStatus: $workflow.exitStatus" +} + workflow { foo([1,2,3]) | bar - - workflow.onError { - println "success: $workflow.success" - println "exitStatus: $workflow.exitStatus" - } } diff --git a/tests/files.nf b/tests/files.nf index 8e6c09bf27..bda693dcd5 100644 --- a/tests/files.nf +++ b/tests/files.nf @@ -16,6 +16,7 @@ */ params.in = "$baseDir/data/sample.fa" +SPLIT = (System.properties['os.name'] == 'Mac OS X' ? 'gcsplit' : 'csplit') process split { input: @@ -24,8 +25,6 @@ process split { output: path 'seq_*' - script: - SPLIT = (System.properties['os.name'] == 'Mac OS X' ? 'gcsplit' : 'csplit') """ $SPLIT query.fa '%^>%' '/^>/' '{*}' -f seq_ """ diff --git a/tests/output-dsl.nf b/tests/output-dsl.nf index 908650405f..22d9cea365 100644 --- a/tests/output-dsl.nf +++ b/tests/output-dsl.nf @@ -26,7 +26,6 @@ process align { path("*.bam") path("${x}.bai") - script: """ echo ${x} > ${x}.bam echo ${x} | rev > ${x}.bai @@ -41,7 +40,6 @@ process my_combine { output: path 'result.txt' - script: """ cat $bamfile > result.txt cat $baifile >> result.txt @@ -52,7 +50,6 @@ process foo { output: path 'xxx' - script: ''' mkdir xxx touch xxx/A @@ -62,7 +59,6 @@ process foo { } workflow { - main: def input = Channel.of('alpha','beta','delta') align(input) diff --git a/tests/output-globs.nf b/tests/output-globs.nf index 4a3da72312..836d3323a8 100644 --- a/tests/output-globs.nf +++ b/tests/output-globs.nf @@ -1,21 +1,19 @@ -def getCmd() { - """ - mkdir -p a/a b/b c/c - touch a/1.txt - touch b/1.txt - touch c/1.txt - touch a/a/2.txt - touch b/b/2.txt - touch c/c/2.txt - """ -} +def CMD = """ + mkdir -p a/a b/b c/c + touch a/1.txt + touch b/1.txt + touch c/1.txt + touch a/a/2.txt + touch b/b/2.txt + touch c/c/2.txt + """ process foo { output: file("a/*/*.txt") script: - getCmd() + CMD } process bar { @@ -23,7 +21,7 @@ process bar { output: file("a/*/*.txt") script: - getCmd() + CMD } workflow { diff --git a/tests/output-val-dsl2.nf b/tests/output-val-dsl2.nf index 3bb587b5d3..e769f9e4d1 100644 --- a/tests/output-val-dsl2.nf +++ b/tests/output-val-dsl2.nf @@ -1,5 +1,8 @@ #!/usr/bin/env nextflow +x = 100 +y = 200 + process foo { input: path fastq @@ -11,7 +14,6 @@ process foo { val y script: - x = 100 y = 'two hundred' """ echo bar diff --git a/tests/profiles.config b/tests/profiles.config index 95f0ac4ee9..b74a3f87ef 100644 --- a/tests/profiles.config +++ b/tests/profiles.config @@ -16,7 +16,8 @@ echo = true -includeConfig "${'delta'}.config" +def x = 'delta' +includeConfig "${x}.config" profiles { diff --git a/tests/publish-saveas.nf b/tests/publish-saveas.nf index 047550f527..7b760db1cb 100644 --- a/tests/publish-saveas.nf +++ b/tests/publish-saveas.nf @@ -23,12 +23,12 @@ def rule( file ) { return null if( file == 'file_3.txt' ) - return "${System.getenv('PWD')}/results/gamma/$file" + return "$PWD/results/gamma/$file" } process foo { - publishDir path: 'results', saveAs: { file -> rule(file) } + publishDir path: 'results', saveAs: this.&rule input: each x output: path '*.txt' diff --git a/tests/race.nf b/tests/race.nf index b006e76625..090a90c842 100644 --- a/tests/race.nf +++ b/tests/race.nf @@ -15,9 +15,9 @@ * limitations under the License. */ -workflow { - seqs = channel.fromList(file("$baseDir/data/seqs/*.fastq")) +seqs = channel.fromList(file("$baseDir/data/seqs/*.fastq")) +workflow { seqs | proc1 seqs | proc2 seqs | proc3 diff --git a/tests/rnaseq-toy-dsl2.nf b/tests/rnaseq-toy-dsl2.nf index c10673205b..94e76aae20 100644 --- a/tests/rnaseq-toy-dsl2.nf +++ b/tests/rnaseq-toy-dsl2.nf @@ -61,9 +61,12 @@ process makeTranscript { /* * main flow */ -workflow { - read_pairs = Channel.fromFilePairs( params.reads, checkIfExists: true ) +read_pairs = Channel.fromFilePairs( params.reads, checkIfExists: true ) +/* + * main flow + */ +workflow { buildIndex(params.genome) mapping(params.genome, buildIndex.out, read_pairs) makeTranscript(mapping.out) diff --git a/tests/sets.nf b/tests/sets.nf index 1b56ac1c50..75bdeed69d 100644 --- a/tests/sets.nf +++ b/tests/sets.nf @@ -21,10 +21,10 @@ process touch { output: tuple val(id), path('file*') - """ + / echo Creating $id touch $fileName - """ + / } process makeFiles { @@ -34,10 +34,10 @@ process makeFiles { output: tuple val(id), path('*') - """ - cp file_x copy_$id - touch beta_$id - """ + / + cp file_x copy_$id + touch beta_$id + / } diff --git a/tests/singleton.nf b/tests/singleton.nf index 7fd4418b13..70f4bde70a 100644 --- a/tests/singleton.nf +++ b/tests/singleton.nf @@ -17,7 +17,7 @@ process foo { output: - file 'x' + file x ''' echo -n Hello > x diff --git a/tests/subworkflow-dsl2.nf b/tests/subworkflow-dsl2.nf index 9ce483f8b3..27d70ccc09 100644 --- a/tests/subworkflow-dsl2.nf +++ b/tests/subworkflow-dsl2.nf @@ -33,7 +33,6 @@ workflow flow2 { } workflow test1 { - main: flow1() flow2() ch1 = flow1.out.result @@ -42,9 +41,7 @@ workflow test1 { } workflow test2 { - main: - result = ( flow1 & flow2 ) | mix | collectFile(name:"${System.getenv('PWD')}/test2.txt") - emit: result + emit: ( flow1 & flow2 ) | mix | collectFile(name:"$PWD/test2.txt") } workflow { diff --git a/tests/task-escape-path-dsl2.nf b/tests/task-escape-path-dsl2.nf index af298f1561..29c89a7495 100644 --- a/tests/task-escape-path-dsl2.nf +++ b/tests/task-escape-path-dsl2.nf @@ -1,9 +1,8 @@ process foo1 { debug true - input: - path x - path y + input: path x + input: path y """ echo "FOO1: ${x}; ${y}" """ @@ -11,9 +10,8 @@ process foo1 { process foo2 { debug true - input: - path x - path y + input: path x + input: path y script: """ echo "FOO2: ${x}; ${y}" @@ -22,9 +20,8 @@ process foo2 { process foo3 { debug true - input: - path x - path y + input: path x + input: path y shell: ''' echo "FOO3: !{x}; !{y}" @@ -33,9 +30,8 @@ process foo3 { process foo4 { debug true - input: - path x - path y + input: path x + input: path y script: template("$baseDir/task-escape-path-dsl2.sh") } diff --git a/tests/task-retry.nf b/tests/task-retry.nf index 16ca73b369..174db33737 100644 --- a/tests/task-retry.nf +++ b/tests/task-retry.nf @@ -29,13 +29,13 @@ process foo { script: """ - if [[ -f marker ]]; then + if [[ -f $PWD/marker ]]; then echo DONE - mem: $task.memory - time: $task.time exit 0 else echo FAIL - touch marker - exit 5 + touch $PWD/marker + exit 5; fi """ diff --git a/tests/template-dyn.nf b/tests/template-dyn.nf index a3c0b053b0..fb738ab010 100644 --- a/tests/template-dyn.nf +++ b/tests/template-dyn.nf @@ -15,9 +15,9 @@ * limitations under the License. */ -workflow { - list = 'alpha,delta,gamma'.tokenize(',') +list = 'alpha,delta,gamma'.tokenize(',') +workflow { foo(list) bar(list) } diff --git a/tests/tuples-dsl2.nf b/tests/tuples-dsl2.nf index ed3e2e25c4..ceeb5e1d24 100644 --- a/tests/tuples-dsl2.nf +++ b/tests/tuples-dsl2.nf @@ -8,10 +8,10 @@ process touch { tuple val(id), path('file*') - """ + / echo Creating $id touch $fileName - """ + / } process makeFiles { @@ -21,16 +21,18 @@ process makeFiles { output: tuple val(id), path('*') - """ - cp file_x copy_$id - touch beta_$id - """ + / + cp file_x copy_$id + touch beta_$id + / } workflow { - Channel.from( ['a', 'file1'], ['b','file2'] ) - | touch - | makeFiles - | flatten - | subscribe { println it } + + Channel + .from( ['a', 'file1'], ['b','file2'] ) \ + | touch \ + | makeFiles \ + | flatten \ + | subscribe { println it } } diff --git a/tests/watch-dsl2.nf b/tests/watch-dsl2.nf index 92d5cb5ed7..4718f9c85d 100644 --- a/tests/watch-dsl2.nf +++ b/tests/watch-dsl2.nf @@ -9,7 +9,7 @@ process align { path fasta output: - path 'aln' + path aln """ t_coffee -in $fasta 1> aln @@ -22,10 +22,12 @@ process align { workflow { - Channel.watchPath(params.files, params.events) - | align - | subscribe { - println '------' - println it.text - } + Channel + .watchPath(params.files, params.events) \ + | align \ + | subscribe { + println '------' + println it.text + } + } diff --git a/tests/when-block.nf b/tests/when-block.nf index edda552ec2..3b6e685b7f 100644 --- a/tests/when-block.nf +++ b/tests/when-block.nf @@ -15,20 +15,17 @@ * limitations under the License. */ -def decode(i) { - ['zero','one','two','three','fourth'][i] -} +items = [0,1,2,3,4] +decode = ['zero','one','two','three','fourth'] workflow { - items = [0,1,2,3,4] - channel.fromList(items) | foo channel.fromList(items) | bar } process foo { debug true - tag "${decode(x)}" + tag "${decode[x]}" input: val x @@ -44,7 +41,7 @@ process foo { process bar { debug true - tag "${decode(x)}" + tag "${decode[x]}" input: val x diff --git a/tests/workdir-with-blank.nf b/tests/workdir-with-blank.nf index d9105b2d38..bfb6b99121 100644 --- a/tests/workdir-with-blank.nf +++ b/tests/workdir-with-blank.nf @@ -20,7 +20,7 @@ process foo { each x output: - file 'result_data' + file result_data """ echo Hello $x > result_data diff --git a/validation/test-complexpaths.nf b/validation/test-complexpaths.nf index f3b66b2abd..2050313196 100644 --- a/validation/test-complexpaths.nf +++ b/validation/test-complexpaths.nf @@ -1,5 +1,5 @@ workflow { - foo | mix | collect | bar + foo | mix | collect | bar } process foo { @@ -14,7 +14,7 @@ process foo { file '.alpha' script: - """ + $/ echo A > hello.txt echo B > sample.zip echo C > sample.html @@ -26,7 +26,7 @@ process foo { echo 3 > f3.fa mkdir .alpha echo "Hello world!" > .alpha/hello.txt - """ + /$ } process bar { @@ -36,8 +36,8 @@ process bar { file '*' script: - """ + $/ cat .alpha/hello.txt [ `cat * | grep -c ''` == 9 ] || false - """ + /$ } diff --git a/validation/test-overwrite.nf b/validation/test-overwrite.nf index aa9adfb326..db4ab02b00 100644 --- a/validation/test-overwrite.nf +++ b/validation/test-overwrite.nf @@ -3,7 +3,7 @@ workflow { } process foo { - container 'quay.io/nextflow/bash' + container = 'quay.io/nextflow/bash' publishDir "gs://rnaseq-nf/scratch/tests", overwrite: true output: path 'hello.txt' From 00ba5d15eafe494b8f74043ff21bafbb9f4bf802 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 28 Sep 2024 13:48:37 -0500 Subject: [PATCH 06/28] Cleanup Signed-off-by: Ben Sherman --- docs/channel.md | 2 +- docs/developer/plugins.md | 2 + docs/process.md | 4 +- docs/reference/feature-flags.md | 2 +- docs/reference/operator.md | 6 +- docs/reference/stdlib.md | 27 -------- docs/reference/syntax.md | 8 +-- docs/script.md | 111 +++++++++++++------------------- docs/sharing.md | 2 +- docs/workflow.md | 14 ++-- docs/working-with-files.md | 2 +- 11 files changed, 64 insertions(+), 116 deletions(-) diff --git a/docs/channel.md b/docs/channel.md index f63d123abc..11177015db 100644 --- a/docs/channel.md +++ b/docs/channel.md @@ -52,7 +52,7 @@ process foo { workflow { result = foo(1) - result.view { txt -> "Result: ${txt}" } + result.view { file -> "Result: ${file}" } } ``` diff --git a/docs/developer/plugins.md b/docs/developer/plugins.md index 47e551a565..bac0ccfd9d 100644 --- a/docs/developer/plugins.md +++ b/docs/developer/plugins.md @@ -7,6 +7,8 @@ This page describes how to create, test, and publish third-party plugins. The best way to get started with your own plugin is to refer to the [nf-hello](https://github.com/nextflow-io/nf-hello) repository. This repository provides a minimal plugin implementation with several examples of different extension points and instructions for building, testing, and publishing. +Plugins can be written in Java or Groovy. + The minimal dependencies are as follows: ```groovy diff --git a/docs/process.md b/docs/process.md index ee707c1049..ea98da9271 100644 --- a/docs/process.md +++ b/docs/process.md @@ -234,7 +234,7 @@ In the above example, `$USER` is treated as a Bash variable, while `!{str}` is t ### Native execution -Whereas the `script` block defines a script that is executed as a separate job, the `exec` block simply executes the code that it is given. +Whereas the `script` block defines a script that is executed as a separate job, the `exec` block simply executes the code that it is given, without launching a job. For example: @@ -876,7 +876,7 @@ process randomNum { workflow { numbers = randomNum() - numbers.view { v -> "Received: ${v.text}" } + numbers.view { file -> "Received: ${file.text}" } } ``` diff --git a/docs/reference/feature-flags.md b/docs/reference/feature-flags.md index 8af49f7d04..616f1adfef 100644 --- a/docs/reference/feature-flags.md +++ b/docs/reference/feature-flags.md @@ -1,4 +1,4 @@ -(feature-flags)= +(config-feature-flags)= # Feature flags diff --git a/docs/reference/operator.md b/docs/reference/operator.md index 591e858d40..0310224889 100644 --- a/docs/reference/operator.md +++ b/docs/reference/operator.md @@ -256,7 +256,7 @@ Channel .collectFile( name: 'result.fa', sort: { v -> v.size() } ) { v -> v.sequence } - .view { v -> v.text } + .view { fa -> fa.text } ``` :::{warning} @@ -748,10 +748,10 @@ Available options: : The zero-based index of each item to use as the matching key. Can also be a list of indices, e.g. `by: [0, 2]` (default: `[0]`). `failOnDuplicate` -: When `true`, an error is reported when the operator receives multiple items from the same channel with the same key (default: `false`). Value is set to `true` if {ref}`strict mode ` is enabled. +: When `true`, an error is reported when the operator receives multiple items from the same channel with the same key (default: `false`). Value is set to `true` if {ref}`strict mode ` is enabled. `failOnMismatch` -: When `true`, an error is reported when the operator receives an item from one channel for which there no matching item from the other channel (default: `false`). Value is set to `true` if {ref}`strict mode ` is enabled. This option cannot be used with `remainder`. +: When `true`, an error is reported when the operator receives an item from one channel for which there no matching item from the other channel (default: `false`). Value is set to `true` if {ref}`strict mode ` is enabled. This option cannot be used with `remainder`. `remainder` : When `true`, unmatched items are emitted at the end, otherwise they are discarded (default: `false`). diff --git a/docs/reference/stdlib.md b/docs/reference/stdlib.md index 977dbf2387..febee3de55 100644 --- a/docs/reference/stdlib.md +++ b/docs/reference/stdlib.md @@ -56,9 +56,6 @@ The following constants are globally available in a Nextflow script: `params` : Map of workflow parameters specified in the config file or as command line options. -: :::{note} - As a best practice, parameters should only be used in the entry workflow. - ::: `projectDir` : Alias of `workflow.projectDir`. @@ -330,18 +327,6 @@ The following methods are available for a `Duration` object: `getSeconds()`, `toSeconds()` : Get the duration value in seconds (rounded down). -(stdlib-list)= - -## List - -TODO - -(stdlib-map)= - -## Map - -TODO - (stdlib-types-memoryunit)= ## MemoryUnit @@ -700,15 +685,3 @@ The following methods are available for splitting and counting the records in fi `splitText()` : Splits a text file into a list of lines. See the {ref}`operator-splittext` operator for available options. - -(stdlib-set)= - -## Set - -TODO - -(stdlib-string)= - -## String - -TODO diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 15eab4970e..105f14ba20 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -63,7 +63,7 @@ The first line of a script can be a [shebang](https://en.wikipedia.org/wiki/Sheb ### Feature flag -A feature flag declaration is an assignment, where the target should be a valid {ref}`feature flag ` and the source should be a literal (e.g. number, string, boolean): +A feature flag declaration is an assignment, where the target should be a valid {ref}`feature flag ` and the source should be a literal (e.g. number, string, boolean): ```groovy nextflow.preview.topic = true @@ -629,8 +629,6 @@ Logic unconfined. Note that a slashy string cannot be empty because it would become a line comment. -Refer to {ref}`stdlib-string` for the set of available string operations. - ### Dynamic string Double-quoted strings can be interpolated using the `${}` placeholder, which can contain any expression: @@ -669,8 +667,6 @@ A list literal consists of square brackets with a comma-separated list of zero o [1, 2, 3] ``` -Refer to {ref}`stdlib-list` for the set of available list operations. - ### Map A map literal consists of square brackets with a comma-separated list of one or more key-value pairs, with the key and value separated by a colon: @@ -693,8 +689,6 @@ def x = 'foo' // -> ['foo': 1] ``` -Refer to {ref}`stdlib-map` for the set of available map operations. - ### Closure A closure, also known as an anonymous function, consists of a parameter list followed by zero or more statements, wrapped in curly braces: diff --git a/docs/script.md b/docs/script.md index b2b1a575a1..af4f721f07 100644 --- a/docs/script.md +++ b/docs/script.md @@ -19,38 +19,42 @@ Nextflow scripts have a maximum size of 64 KiB. To avoid this limit for large pi To print something is as easy as using one of the `print` or `println` methods. ```groovy -println "Hello, World!" +println 'Hello, World!' ``` The only difference between the two is that the `println` method implicitly appends a newline character to the printed string. ## Variables -To define a variable, simply assign a value to it: +Variables are declared using the `def` keyword: ```groovy -x = 1 -println x +def num = 1 +println num -x = new java.util.Date() -println x +def date = new java.util.Date() +println date -x = -3.1499392 +def x = -3.1499392 println x -x = false -println x +def flag = false +println flag -x = "Hi" -println x +def str = "Hi" +println str ``` +:::{warning} +In some cases, variables can be declared without `def`, but this practice is discouraged because it can lead to a {ref}`race condition `. +::: + ## Lists -A List object can be defined by placing the list items in square brackets: +Lists are defined using square brackets: ```groovy -myList = [1776, -1, 33, 99, 0, 928734928763] +def myList = [1776, -1, 33, 99, 0, 928734928763] ``` You can access a given item in the list with square-bracket notation (indexes start at 0): @@ -65,18 +69,14 @@ In order to get the length of the list use the `size` method: println myList.size() ``` -Learn more about lists: - -- [Groovy Lists tutorial](http://groovy-lang.org/groovy-dev-kit.html#Collections-Lists) -- [Groovy List API](http://docs.groovy-lang.org/latest/html/groovy-jdk/java/util/List.html) -- [Java List API](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/List.html) +Refer to the [Java](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/List.html) and [Groovy](http://docs.groovy-lang.org/latest/html/groovy-jdk/java/util/List.html) standard libraries for the set of available list operations. ## Maps Maps are used to store *associative arrays* (also known as *dictionaries*). They are unordered collections of heterogeneous, named data: ```groovy -scores = ["Brett": 100, "Pete": "Did not finish", "Andrew": 86.87934] +def scores = ["Brett": 100, "Pete": "Did not finish", "Andrew": 86.87934] ``` Note that each of the values stored in the map can be of a different type. `Brett` is an integer, `Pete` is a string, and `Andrew` is a floating-point number. @@ -98,7 +98,7 @@ scores["Cedric"] = 120 You can also use the `+` operator to add two maps together: ```groovy -new_scores = scores + ["Pete": 3, "Cedric": 120] +def new_scores = scores + ["Pete": 3, "Cedric": 120] ``` When adding two maps, the first map is copied and then appended with the keys from the second map. Any conflicting keys are overwritten by the second map. @@ -107,33 +107,14 @@ When adding two maps, the first map is copied and then appended with the keys fr Copying a map with the `+` operator is a safer way to modify maps in Nextflow, specifically when passing maps through channels. This way, a new instance of the map will be created, and any references to the original map won't be affected. ::: -Learn more about maps: - -- [Groovy Maps tutorial](http://groovy-lang.org/groovy-dev-kit.html#Collections-Maps) -- [Groovy Map API](http://docs.groovy-lang.org/latest/html/groovy-jdk/java/util/Map.html) -- [Java Map API](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Map.html) - -(script-multiple-assignment)= - -## Multiple assignment - -An array or a list object can used to assign to multiple variables at once: - -```groovy -(a, b, c) = [10, 20, 'foo'] -assert a == 10 && b == 20 && c == 'foo' -``` - -The three variables on the left of the assignment operator are initialized by the corresponding item in the list. - -Read more about [Multiple assignment](http://www.groovy-lang.org/semantics.html#_multiple_assignment) in the Groovy documentation. +Refer to the [Java](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Map.html) and [Groovy](http://docs.groovy-lang.org/latest/html/groovy-jdk/java/util/Map.html) standard libraries for the set of available map operations. ## Conditional execution One of the most important features of any programming language is the ability to execute different code under different conditions. The simplest way to do this is to use the `if` construct: ```groovy -x = Math.random() +def x = Math.random() if( x < 0.5 ) { println "You lost." } @@ -154,7 +135,7 @@ println 'he said "cheese!" again' Strings can be concatenated with `+`: ```groovy -a = "world" +def a = "world" print "hello " + a + "\n" ``` @@ -167,11 +148,11 @@ There is an important difference between single-quoted and double-quoted strings In practice, double-quoted strings can contain the value of an arbitrary variable by prefixing its name with the `$` character, or the value of any expression by using the `${expression}` syntax, similar to Bash/shell scripts: ```groovy -foxtype = 'quick' -foxcolor = ['b', 'r', 'o', 'w', 'n'] +def foxtype = 'quick' +def foxcolor = ['b', 'r', 'o', 'w', 'n'] println "The $foxtype ${foxcolor.join()} fox" -x = 'Hello' +def x = 'Hello' println '$x + $y' ``` @@ -187,7 +168,7 @@ $x + $y A block of text that span multiple lines can be defined by delimiting it with triple single or double quotes: ```groovy -text = """ +def text = """ hello there James how are you today? """ @@ -200,7 +181,7 @@ Like before, multi-line strings inside double quotes support variable interpolat As in Bash/shell scripts, terminating a line in a multi-line string with a `\` character prevents a newline character from separating that line from the one that follows: ```groovy -myLongCmdline = """ +def myLongCmdline = """ blastp \ -in $input_query \ -out $output_file \ @@ -208,7 +189,7 @@ myLongCmdline = """ -html """ -result = myLongCmdline.execute().text +def result = myLongCmdline.execute().text ``` In the preceding example, `blastp` and its `-in`, `-out`, `-db` and `-html` switches and their arguments are effectively a single line. @@ -243,7 +224,7 @@ assert 'foo' ==~ /foo/ // return TRUE assert 'foobar' ==~ /foo/ // return FALSE ``` -It is worth noting that the `~` operator creates a Java `Pattern` object from the given string, while the `=~` operator creates a Java `Matcher` object. +The `~` operator creates a [Pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html) from the given string, while the `=~` operator creates a [Matcher](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Matcher.html): ```groovy x = ~/abc/ @@ -255,20 +236,18 @@ println y.class // prints java.util.regex.Matcher ``` -Regular expression support is imported from Java. Java's regular expression language and API is documented in the [Pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html) class. - -You may also be interested in this post: [Groovy: Don't Fear the RegExp](https://web.archive.org/web/20170621185113/http://www.naleid.com/blog/2008/05/19/dont-fear-the-regexp). +Refer to the linked Java documentation for the available operations for these classes. ### String replacement To replace pattern occurrences in a given string, use the `replaceFirst` and `replaceAll` methods: ```groovy -x = "colour".replaceFirst(/ou/, "o") +def x = "colour".replaceFirst(/ou/, "o") println x // prints: color -y = "cheesecheese".replaceAll(/cheese/, "nice") +def y = "cheesecheese".replaceAll(/cheese/, "nice") println y // prints: nicenice ``` @@ -276,7 +255,7 @@ println y To remove part of a string, simply replace it with a blank string: ```groovy -z = 'Hello World!'.replaceFirst(/(?i)\s+Wo\w+/, '') +def z = 'Hello World!'.replaceFirst(/(?i)\s+Wo\w+/, '') println z // prints: Hello! ``` @@ -288,8 +267,8 @@ You can match a pattern that includes groups. First create a matcher object with Here's how it works: ```groovy -programVersion = '2.7.3-beta' -m = programVersion =~ /(\d+)\.(\d+)\.(\d+)-?(.+)/ +def programVersion = '2.7.3-beta' +def m = programVersion =~ /(\d+)\.(\d+)\.(\d+)-?(.+)/ assert m[0] == ['2.7.3-beta', '2', '7', '3', 'beta'] assert m[0][1] == '2' @@ -301,8 +280,8 @@ assert m[0][4] == 'beta' Applying some syntactic sugar, you can do the same in just one line of code: ```groovy -programVersion = '2.7.3-beta' -(full, major, minor, patch, flavor) = (programVersion =~ /(\d+)\.(\d+)\.(\d+)-?(.+)/)[0] +def programVersion = '2.7.3-beta' +def (full, major, minor, patch, flavor) = (programVersion =~ /(\d+)\.(\d+)\.(\d+)-?(.+)/)[0] println full // 2.7.3-beta println major // 2 @@ -320,7 +299,7 @@ A closure is a function that can be used like a regular value. Typically, closur For example: ```groovy -square = { v -> v * v } +def square = { v -> v * v } ``` The above example defines a closure, which takes one parameter named `v` and returns the "square" of `v` (`v * v`), and assigns the closure to the variable `square`. @@ -370,9 +349,9 @@ Sudha = Kumari Closures can access variables outside of their scope: ```groovy -counts = ["China": 1, "India": 2, "USA": 3] +def counts = ["China": 1, "India": 2, "USA": 3] -result = 0 +def result = 0 counts.keySet().each { v -> result += counts[v] } @@ -383,21 +362,17 @@ println result A closure can also declare local variables that exist only for the lifetime of each closure invocation: ```groovy -result = 0 +def result = 0 myMap.keySet().each { v -> def count = myMap[v] result += count } ``` -:::{warning} -Local variables should be declared using `def`, otherwise they will be interpreted as global variables, which could lead to a {ref}`race condition `. -::: - While the `each` method is a convenient way to iterate through a collection and build up some result, a more idiomatic way to do this is to use the `inject` method: ```groovy -result = counts.values().inject { sum, v -> sum + v } +def result = counts.values().inject { sum, v -> sum + v } ``` This way, the closure is fully "self-contained" because it doesn't access or mutate any variables outside of its scope. diff --git a/docs/sharing.md b/docs/sharing.md index 9e8cc38d98..021d0d768c 100644 --- a/docs/sharing.md +++ b/docs/sharing.md @@ -117,7 +117,7 @@ For example, shebang definitions `#!/usr/bin/python` and `#!/usr/local/bin/pytho #### The `lib` directory -Any Groovy scripts or Java libraries (JARs) in the `lib` directory will be automatically loaded and made available to your pipeline scripts. The `lib` directory is a useful way to provide utility code or external libraries without cluttering the pipeline scripts. +Any Groovy scripts or JAR files in the `lib` directory will be automatically loaded and made available to your pipeline scripts. The `lib` directory is a useful way to provide utility code or external libraries without cluttering the pipeline scripts. ### Data diff --git a/docs/workflow.md b/docs/workflow.md index 97b13f3da7..300ef45637 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -307,13 +307,15 @@ workflow { } ``` -The pipe chain shown above is equivalent to the following: +The above snippet defines a process named `foo` and invokes it with the input channel. The result is then piped to the {ref}`operator-map` operator, which converts each string to uppercase, and finally to the {ref}`operator-view` operator which prints it. + +The same code can also be written as: ```groovy workflow { - foo( Channel.of('Hello','Hola','Ciao') ) - .map { v -> v.toUpperCase() } - .view() + ch1 = Channel.of('Hello','Hola','Ciao') + ch2 = foo( ch1 ) + ch2.map { v -> v.toUpperCase() }.view() } ``` @@ -353,7 +355,9 @@ workflow { } ``` -The above example is equivalent to the following: +In the above snippet, the initial channel is piped to the {ref}`operator-map` operator, which reverses the string value. Then, the result is passed to the processes `foo` and `bar`, which are executed in parallel. Each process outputs a channel, and the two channels are combined using the {ref}`operator-mix` operator. Finally, the result is printed using the {ref}`operator-view` operator. + +The same code can also be written as: ```groovy workflow { diff --git a/docs/working-with-files.md b/docs/working-with-files.md index dd7f278327..09254ede7a 100644 --- a/docs/working-with-files.md +++ b/docs/working-with-files.md @@ -58,7 +58,7 @@ assert path.parent == '/some/path' ``` :::{tip} -When accessing an object property, any method that looks like `get*()` can also be accessed as a field. For example, `path.getName()` is equivalent to `path.name`, `path.getBaseName()` is equivalent to `path.baseName`, and so on. +When calling an object method, any method that looks like `get*()` can also be accessed as a field. For example, `path.getName()` is equivalent to `path.name`, `path.getBaseName()` is equivalent to `path.baseName`, and so on. ::: See the {ref}`stdlib-types-path` reference for the list of available methods. From 69772bf79f6c7463acbe570843f9d876006e6c52 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 28 Sep 2024 14:29:11 -0500 Subject: [PATCH 07/28] Add section on script definitions Signed-off-by: Ben Sherman --- docs/module.md | 30 +++++++++++++++--------------- docs/script.md | 45 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/docs/module.md b/docs/module.md index 66e577985c..d28468d6c7 100644 --- a/docs/module.md +++ b/docs/module.md @@ -2,7 +2,7 @@ # Modules -In Nextflow, a **module** is a script that may contain functions, processes, and workflows (collectively referred to as *components*). A module can be included in other modules or pipeline scripts and even shared across workflows. +Nextflow scripts can include **definitions** (workflows, processes, and functions) from other scripts. When a script is included in this way, it is referred to as a **module**. Modules can be included by other modules or pipeline scripts and can even be shared across workflows. :::{note} Modules were introduced in DSL2. If you are still using DSL1, see the {ref}`dsl1-page` page to learn how to migrate your Nextflow pipelines to DSL2. @@ -10,7 +10,7 @@ Modules were introduced in DSL2. If you are still using DSL1, see the {ref}`dsl1 ## Module inclusion -A component defined in a module script can be imported into another Nextflow script using the `include` keyword. +Any definition in a module can be included into another Nextflow script using the `include` keyword. For example: @@ -23,7 +23,7 @@ workflow { } ``` -The above snippet imports a process named `foo`, defined in the module script, into the main execution context. This way, `foo` can be invoked in the `workflow` scope. +The above snippet imports a process named `foo`, defined in the module, into the main execution context. This way, `foo` can be invoked in the `workflow` scope. Nextflow implicitly looks for the script file `./some/module.nf`, resolving the path against the *including* script location. @@ -57,7 +57,7 @@ Module directories allow the use of module scoped binaries scripts. See [Module ## Multiple inclusions -A Nextflow script can include any number of modules, and an `include` statement can import any number of components from a module. Multiple components can be included from the same module by using the syntax shown below: +A Nextflow script can include any number of modules, and an `include` statement can import any number of definitions from a module. Multiple definitions can be included from the same module by using the syntax shown below: ```groovy include { foo; bar } from './some/module' @@ -73,7 +73,7 @@ workflow { ## Module aliases -When including a module component, it's possible to specify an *alias* with the `as` keyword. Aliasing allows you to avoid module name clashes, by assigning them different names in the including context. For example: +When including definition from a module, it's possible to specify an *alias* with the `as` keyword. Aliasing allows you to avoid module name clashes, by assigning them different names in the including context. For example: ```groovy include { foo } from './some/module' @@ -85,7 +85,7 @@ workflow { } ``` -You can even include the same component multiple times under different names: +You can even include the same definition multiple times under different names: ```groovy include { foo; foo as bar } from './some/module' @@ -101,10 +101,10 @@ workflow { ## Module parameters :::{deprecated} 24.07.0-edge -As a best practice, parameters should be used in the entry workflow and passed to functions / processes / workflows as explicit inputs. +As a best practice, parameters should be used in the entry workflow and passed to workflows, processes, and functions as explicit inputs. ::: -A module script can define parameters using the same syntax as a Nextflow workflow script: +A module can define parameters using the same syntax as a Nextflow workflow script: ```groovy params.foo = 'Hello' @@ -184,9 +184,9 @@ Ciao world! ## Module templates -The module script can be defined in an external {ref}`template ` file. The template file can be placed in the `templates` directory where the module script is located. +Process script {ref}`templates ` can be included alongside a module in the `templates` directory. -For example, suppose we have a project L with a module script that defines two processes, P1 and P2, both of which use templates. The template files can be made available in the local `templates` directory: +For example, suppose we have a project L with a module that defines two processes, P1 and P2, both of which use templates. The template files can be made available in the local `templates` directory: ``` Project L @@ -210,15 +210,15 @@ Pipeline B └── main.nf ``` -With the possibility to keep the template files inside the project L, A and B can use the modules defined in L without any changes. A future project C would do the same, just cloning L (if not available on the system) and including its module script. +With the possibility to keep the template files inside the project L, A and B can use the modules defined in L without any changes. A future project C would do the same, just cloning L (if not available on the system) and including its module. Beside promoting the sharing of modules across pipelines, there are several advantages to keeping the module template under the script path: -1. module components are *self-contained*, -2. module components can be tested independently from the pipeline(s) that import them, -3. it is possible to create libraries of module components. +1. modules are *self-contained*, +2. modules can be tested independently from the pipeline(s) that import them, +3. it is possible to create libraries of modules. -Ultimately, having multiple template locations allows a more structured organization within the same project. If a project has several module components, and all of them use templates, the project could group module scripts and their templates as needed. For example: +Ultimately, having multiple template locations allows a more structured organization within the same project. If a project has several modules, and all of them use templates, the project could group module scripts and their templates as needed. For example: ``` baseDir diff --git a/docs/script.md b/docs/script.md index af4f721f07..31736e1e71 100644 --- a/docs/script.md +++ b/docs/script.md @@ -16,13 +16,13 @@ Nextflow scripts have a maximum size of 64 KiB. To avoid this limit for large pi ## Hello world -To print something is as easy as using one of the `print` or `println` methods. +To print something is as easy as using the `print` or `println` method: ```groovy println 'Hello, World!' ``` -The only difference between the two is that the `println` method implicitly appends a newline character to the printed string. +The only difference between the two is that `println` prints an extra newline. ## Variables @@ -46,7 +46,7 @@ println str ``` :::{warning} -In some cases, variables can be declared without `def`, but this practice is discouraged because it can lead to a {ref}`race condition `. +Variables can also be declared without `def` in many cases, but this practice is discouraged outside of simple code snippets because it can lead to a {ref}`race condition `. ::: ## Lists @@ -141,7 +141,7 @@ print "hello " + a + "\n" (string-interpolation)= -## String interpolation +### String interpolation There is an important difference between single-quoted and double-quoted strings: Double-quoted strings support variable interpolations, while single-quoted strings do not. @@ -163,7 +163,7 @@ The quick brown fox $x + $y ``` -## Multi-line strings +### Multi-line strings A block of text that span multiple lines can be defined by delimiting it with triple single or double quotes: @@ -376,3 +376,38 @@ def result = counts.values().inject { sum, v -> sum + v } ``` This way, the closure is fully "self-contained" because it doesn't access or mutate any variables outside of its scope. + +## Script definitions + +So far, we have been focusing on the basic building blocks of Nextflow code, like variables, lists, strings, and closures. + +In practice, however, Nextflow scripts are composed of *workflows*, *processes*, and *functions* (collectively known as *definitions*), and they can *include* definitions from other scripts. + +To transition a code snippet into a proper workflow script, simply wrap it in a `workflow` block: + +```groovy +workflow { + println 'Hello!' +} +``` + +This block is called the *entry workflow*. A script can only have one entry workflow, and it serves as the entrypoint when the script is executed. In fact, whenever a script contains only simple statements like `println 'Hello!'`, Nextflow simply treats it as an entry workflow! + +You can also break up code into functions, for example: + +```groovy +def sayHello() { + println 'Hello!' +} + +def add(a, b) { + a + b +} + +workflow { + sayHello() + println "2 + 2 = ${add(2, 2)}!" +} +``` + +Refer to {ref}`workflow-page`, {ref}`process-page`, and {ref}`module-page` to learn how to use these features in your Nextflow scripts. From 24e65e0b7b494be43a8bd51533a0aa7633a02a13 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 28 Sep 2024 16:02:25 -0500 Subject: [PATCH 08/28] Update config syntax docs Signed-off-by: Ben Sherman --- docs/config.md | 80 +++++++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/docs/config.md b/docs/config.md index 9b5388d857..c2a0552033 100644 --- a/docs/config.md +++ b/docs/config.md @@ -22,45 +22,63 @@ You can use the `-C ` option to use a single configuration file and ## Syntax -A Nextflow configuration file is a simple text file containing a set of properties defined using the syntax: +The Nextflow configuration syntax is based on the Nextflow script syntax. It is designed for setting configuration options in a declarative manner while also allowing for dynamic expressions where appropriate. Refer to {ref}`syntax-page`, particularly the sections on comments and expressions, for a full description of the available syntax. -```groovy -name = value -``` +A Nextflow config file consists of any number of *assignments*, *blocks*, and *includes*. Config files may also contain comments in the same manner as scripts. + +### Assignments -Please note, string values need to be wrapped in quotation characters while numbers and boolean values (`true`, `false`) do not. Also note that values are typed. This means that, for example, `1` is different from `'1'` — the former is interpreted as the number one, while the latter is interpreted as a string value. +A config assignment consists of a config option and an expression separated by an equals sign: -### Variables +```groovy +workDir = 'work' +docker.enabled = true +process.maxErrors = 10 +``` -Configuration properties can be used as variables in the configuration file by using the usual `$propertyName` or `${expression}` syntax. +A config option consists of one or more names separated by dots. The names other than the last one are known as *config scopes*. See {ref}`config-options` for the full set of config options organized by scope. -For example: +The right-hand side is typically a literal value such as a number, boolean, or string, but can be any expression, such as a dynamic string: ```groovy -propertyOne = 'world' -anotherProp = "Hello $propertyOne" -customPath = "$PATH:/my/app/folder" +params.helper_file = "${projectDir}/assets/helper.txt" ``` -Please note, the usual rules for {ref}`string-interpolation` are applied, thus a string containing a variable reference must be wrapped in double-quote chars instead of single-quote chars. +### Blocks -The same mechanism allows you to access environment variables defined in the hosting system. Any variable name not defined in the Nextflow configuration file(s) is interpreted to be a reference to an environment variable with that name. So, in the above example, the property `customPath` is defined as the current system `PATH` to which the string `/my/app/folder` is appended. +A config scope can also be specified as a block, in which case any number of config options within that scope can be assigned: -### Comments +```groovy +// dot syntax +docker.enabled = true +docker.runOptions = '-u $(id -u):$(id -g)' + +// block syntax +docker { + enabled = true + runOptions = '-u $(id -u):$(id -g)' +} +``` -You can use `//` to comment a single line, or `/* ... */` to comment a block on multiple lines: +As a result, deeply nested config options can be assigned in a variety of ways. For example, the following three assignments are equivalent: ```groovy -// single line comment +executor.retry.maxAttempt = 5 -/* - * multi-line comment - */ +executor { + retry.maxAttempt = 5 +} + +executor { + retry { + maxAttempt = 5 + } +} ``` ### Includes -A configuration file can include one or more configuration files using the keyword `includeConfig`. For example: +A config file can include any number of other config files using the `includeConfig` keyword: ```groovy process.executor = 'sge' @@ -70,7 +88,11 @@ process.memory = '10G' includeConfig 'path/foo.config' ``` -When a relative path is used, it is resolved against the actual location of the including file. +When a relative path is used, it is resolved against the location of the including file. + +:::{note} +Config includes can also be specified within config blocks. However, config files should only be included either at the top-level or in a [profile](#config-profiles), so that the included config file is valid both on its own and in the context in which it is included. +::: ## Constants @@ -87,22 +109,6 @@ The following constants are globally available in a Nextflow configuration file: `projectDir` : The directory where the main script is located. -## Config scopes - -Configuration settings can be organized in different scopes by dot prefixing the property names with a scope identifier, or grouping the properties in the same scope using the curly brackets notation. For example: - -```groovy -alpha.x = 1 -alpha.y = 'string value..' - -beta { - p = 2 - q = 'another string ..' -} -``` - -See {ref}`config-options` for the full list of config settings. - (config-params)= ## Parameters From e9faf58e7588b3a6ffaeaf4c8e285844d0142c3e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 30 Sep 2024 19:18:50 -0500 Subject: [PATCH 09/28] Apply suggestions from code review Co-authored-by: Christopher Hakkaart Signed-off-by: Ben Sherman --- docs/module.md | 10 +++++----- docs/process.md | 2 +- docs/reference/syntax.md | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/module.md b/docs/module.md index d28468d6c7..9d6fb8981c 100644 --- a/docs/module.md +++ b/docs/module.md @@ -10,7 +10,7 @@ Modules were introduced in DSL2. If you are still using DSL1, see the {ref}`dsl1 ## Module inclusion -Any definition in a module can be included into another Nextflow script using the `include` keyword. +You can include any definition from a module into a Nextflow script using the `include` keyword. For example: @@ -214,11 +214,11 @@ With the possibility to keep the template files inside the project L, A and B ca Beside promoting the sharing of modules across pipelines, there are several advantages to keeping the module template under the script path: -1. modules are *self-contained*, -2. modules can be tested independently from the pipeline(s) that import them, -3. it is possible to create libraries of modules. +1. Modules are self-contained +2. Modules can be tested independently from the pipeline(s) that import them +3. Modules can be made into libraries -Ultimately, having multiple template locations allows a more structured organization within the same project. If a project has several modules, and all of them use templates, the project could group module scripts and their templates as needed. For example: +Having multiple template locations enables a structured project organization. If a project has several modules, and they all use templates, the project could group module scripts and their templates as needed. For example: ``` baseDir diff --git a/docs/process.md b/docs/process.md index ea98da9271..d0b9f7b492 100644 --- a/docs/process.md +++ b/docs/process.md @@ -234,7 +234,7 @@ In the above example, `$USER` is treated as a Bash variable, while `!{str}` is t ### Native execution -Whereas the `script` block defines a script that is executed as a separate job, the `exec` block simply executes the code that it is given, without launching a job. +The `exec` block executes the given code without launching a job. For example: diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 105f14ba20..3d71322615 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -63,7 +63,7 @@ The first line of a script can be a [shebang](https://en.wikipedia.org/wiki/Sheb ### Feature flag -A feature flag declaration is an assignment, where the target should be a valid {ref}`feature flag ` and the source should be a literal (e.g. number, string, boolean): +A feature flag declaration is an assignment, where the target should be a valid {ref}`feature flag ` and the source should be a literal (i.e. number, string, boolean): ```groovy nextflow.preview.topic = true From 23e05b75750c75a80a1dcd03bfdaa77141e0e8c2 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 30 Sep 2024 19:20:01 -0500 Subject: [PATCH 10/28] Update docs/module.md Co-authored-by: Christopher Hakkaart Signed-off-by: Ben Sherman --- docs/module.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/module.md b/docs/module.md index 9d6fb8981c..bfdfbd6dc2 100644 --- a/docs/module.md +++ b/docs/module.md @@ -85,7 +85,7 @@ workflow { } ``` -You can even include the same definition multiple times under different names: +You can also include the same definition multiple times under different names: ```groovy include { foo; foo as bar } from './some/module' From 1a173a0aac0c3a3cefa2ec18a7dd2d4e67a9bea1 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 30 Sep 2024 19:21:37 -0500 Subject: [PATCH 11/28] Apply suggestions from code review Co-authored-by: Christopher Hakkaart Signed-off-by: Ben Sherman --- docs/script.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/script.md b/docs/script.md index 31736e1e71..954ab28543 100644 --- a/docs/script.md +++ b/docs/script.md @@ -16,13 +16,12 @@ Nextflow scripts have a maximum size of 64 KiB. To avoid this limit for large pi ## Hello world -To print something is as easy as using the `print` or `println` method: +You can use the `println` function to print to the console: ```groovy println 'Hello, World!' ``` -The only difference between the two is that `println` prints an extra newline. ## Variables From 74480fcadb2ee6f55697485f135acd191529d128 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 30 Sep 2024 18:51:23 -0500 Subject: [PATCH 12/28] Add remaining sections to syntax page Signed-off-by: Ben Sherman --- docs/reference/stdlib.md | 2 + docs/reference/syntax.md | 176 +++++++++++++++++++++++++++++++-------- 2 files changed, 145 insertions(+), 33 deletions(-) diff --git a/docs/reference/stdlib.md b/docs/reference/stdlib.md index febee3de55..36cb74d6e3 100644 --- a/docs/reference/stdlib.md +++ b/docs/reference/stdlib.md @@ -250,6 +250,8 @@ The following functions are available in Nextflow scripts: `workflow.onError( closure )` : Define an action to take if the workflow is terminated due to a runtime error or task failure. Refer to the `workflow` implicit variable to see which additional properties are available in the error handler. +(stdlib-default-imports)= + ## Default imports The following classes are imported by default in Nextflow scripts: diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 3d71322615..afc5df4e8e 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -535,9 +535,11 @@ The try block will be executed, and if an error is raised and matches the expect ## Expressions +An expression is a syntactic entity that produces a value. + ### Variable -A variable expression is a reference to a variable or other defined name: +A variable expression is a reference to a variable or other named value: ```groovy def x = 42 @@ -548,7 +550,7 @@ x ### Number -A number literal can be an integer or real (i.e. floating-point) number. Integers can specified in binary with `0b`, octal with `0`, or hexadecimal with `0x`. Real numbers can use scientific notation with the `e` or `E` prefix. Underscores can be used as thousands separators to make long numbers more readable. +A number literal can be an integer or floating-point number, and can be positive or negative. Integers can specified in binary with `0b`, octal with `0`, or hexadecimal with `0x`. Floating-point numbers can use scientific notation with the `e` or `E` prefix. Underscores can be used as thousands separators to make long numbers more readable. ```groovy // integer @@ -585,7 +587,7 @@ x = 42 ``` :::{note} -Attempting to use a null value (e.g. index or property access) will cause a "null reference" error. It is best to avoid the use of `null` where possible. +Using a null value in certain expressions (e.g. the object of a property expression or method call) will lead to a "null reference" error. It is best to avoid the use of `null` where possible. ::: ### String @@ -661,7 +663,7 @@ blastp \ ### List -A list literal consists of square brackets with a comma-separated list of zero or more expressions: +A list literal consists of a comma-separated list of zero or more expressions, enclosed in square brackets: ```groovy [1, 2, 3] @@ -669,19 +671,19 @@ A list literal consists of square brackets with a comma-separated list of zero o ### Map -A map literal consists of square brackets with a comma-separated list of one or more key-value pairs, with the key and value separated by a colon: +A map literal consists of a comma-separated list of one or more *map entries*, where each map entry consists of a *key expression* and *value expression* separated by a colon, enclosed in square brackets: ```groovy [foo: 1, bar: 2, baz: 3] ``` -The empty map contains a single colon to distinguish it from an empty list: +An empty map is specified with a single colon to distinguish it from an empty list: ```groovy [:] ``` -Both the key and value can be any expression. Identifier keys are treated as string literals (i.e. the quotes can be omitted). To reference a variable as a key, simply wrap it in parentheses: +Both the key and value can be any expression. Identifier keys are treated as string literals (i.e. the quotes can be omitted). A variable can be used as a key by enclosing it in parentheses: ```groovy def x = 'foo' @@ -691,7 +693,7 @@ def x = 'foo' ### Closure -A closure, also known as an anonymous function, consists of a parameter list followed by zero or more statements, wrapped in curly braces: +A closure, also known as an anonymous function, consists of a parameter list followed by zero or more statements, enclosed in curly braces: ```groovy { a, b -> a + b } @@ -731,7 +733,25 @@ println result // -> 14 ``` -Refer to the {ref}`standard library ` and {ref}`operator ` reference pages for examples of closures being used in practice. +Refer to the {ref}`standard library ` and {ref}`operator ` reference pages for examples of closures being used in practice. + +### Index expression + +An index expression consists of a *left expression* and a *right expression*, with the right expression enclosed in square brackets: + +```groovy +myList[0] +``` + +### Property expression + +A property expression consists of an *object expression* and a *property*, separated by a dot: + +```groovy +file.text +``` + +The property must be an identifier or string literal. ### Function call @@ -741,12 +761,34 @@ A function call consists of a name and argument list: printf('Hello %s!\n', 'World') ``` -TODO: object expression, named args +A *method call* consists of an *object expression* and a function call separated by a dot: + +```groovy +myList.size() +``` + +The argument list may contain any number of *positional arguments* and *named arguments*: + +```groovy +file('hello.txt', checkIfExists: true) +``` + +The named arguments are collected into a map and provided as the first positional argument to the function. Thus the above function call can be rewritten as: + +```groovy +file([checkIfExists: true], 'hello.txt') +``` + +The argument name must be an identifier or string literal. When the function call is also an [expression statement](#expression-statement) and there is at least one argument, the parentheses can be omitted: ```groovy +// positional args printf 'Hello %s!\n', 'World' + +// positional and named args +file 'hello.txt', checkIfExists: true ``` If the last argument is a closure, it can be specified outside of the parentheses: @@ -764,49 +806,90 @@ If the last argument is a closure, it can be specified outside of the parenthese ### Constructor call -TODO - -### Unary expressions - -TODO - -### Binary expressions - -TODO +A constructor call consists of the `new` keyword followed by a *type name* and an argument list enclosed in parentheses: -**Regex finder** +```groovy +new java.util.Date() +``` -The `=~` operator checks whether a string contains a pattern: +If the type is implicitly available in the script, the *fully-qualified type name* can be elided to the *simple type name*: ```groovy -assert 'foo' =~ /foo/ // true -assert 'foobar' =~ /foo/ // true +new Date() ``` -**Regex matcher** +Refer to {ref}`stdlib-default-imports` for the set of types which are implicitly available in Nextflow scripts. -The `==~` operator checks whether a string matches a pattern exactly: +### Unary expressions + +A unary expression consists of a *unary operator* followed by an expression: ```groovy -assert 'foo' ==~ /foo/ // true -assert 'foobar' ==~ /foo/ // false +!(2 + 2 == 4) ``` -**Index expression** +The following unary operators are available: -TODO +- `~`: bitwise NOT +- `!`: logical NOT +- `+`: unary plus +- `-`: unary minus -**Property expression** +### Binary expressions -TODO +A binary expression consists of a *left expression* and a *right expression* separated by a *binary operator*: + +```groovy +2 + 2 +``` + +The following binary operators are available: + +- `**`: power (i.e. exponentiation) +- `*`: multiplication +- `/`: division +- `%`: remainder (i.e. modulo) +- `+`: addition +- `-`: subtraction +- `<<`: left shift +- `>>`: right shift +- `>>>`: unsigned right shift +- `..`: inclusive range +- `..<`: right-exclusive range +- `as`: type cast +- `instanceof`: type relation +- `!instanceof`: negated type relation +- `<`: less than +- `>`: greater than +- `<=`: less than or equals +- `>=`: greater than or equals +- `in`: membership +- `!in`: negated membership +- `==`: equals +- `!=`: negated equals +- `<=>`: spaceship (i.e. three-way comparison) +- `=~`: regex find +- `==~`: regex match +- `&`: bitwise AND +- `^`: bitwise XOR (exclusive or) +- `|`: bitwise OR +- `&&`: logical AND +- `||`: logical OR +- `?:` elvis (i.e. short ternary) ### Ternary expression -TODO +A ternary expression consists of a *test expression*, a *true expression*, and a *false expression*, separated by a question mark and a colon: + +```groovy +x % 2 == 0 ? 'x is even!' : 'x is odd!' +``` + +If the test expression is true, the true expression is evaluated, otherwise the false expression is evaluated. ### Parentheses -Any expression can be wrapped in a set of parentheses to enforce a particular order of operations: +Any expression can be enclosed in parentheses: ```groovy 1 + 2 * 3 @@ -816,6 +899,33 @@ Any expression can be wrapped in a set of parentheses to enforce a particular or // -> 3 * 3 -> 9 ``` +### Precedence + +Compound expressions are evaluated in the following order: + +- parentheses +- property expressions +- function calls +- index expressions +- `~`, `!` +- `**` +- `+`, `-` (unary) +- `*`, `/`, `%` +- `+`, `-` (binary) +- `<<`, `>>>`, `>>`, `..`, `..<` +- `as` +- `instanceof`, `!instanceof` +- `<`, `>`, `<=`, `>=`, `in`, `!in` +- `==`, `!=`, `<=>` +- `=~`, `==~` +- `&` +- `^` +- `|` +- `&&` +- `||` +- `?:` (ternary) +- `?:` (elvis) + ## Deprecations The following legacy features were excluded from this page because they are deprecated: From 4aee05af545db334f80b07a625468a04c208cb57 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 1 Oct 2024 08:59:31 -0500 Subject: [PATCH 13/28] Apply suggestions from code review Co-authored-by: Christopher Hakkaart Signed-off-by: Ben Sherman --- docs/config.md | 12 +++++++----- docs/developer/plugins.md | 2 +- docs/dsl1.md | 2 +- docs/overview.md | 4 ++-- docs/process.md | 8 ++++---- docs/script.md | 18 +++++++++--------- docs/workflow.md | 16 ++++++++-------- 7 files changed, 32 insertions(+), 30 deletions(-) diff --git a/docs/config.md b/docs/config.md index c2a0552033..f1d6e2ffd1 100644 --- a/docs/config.md +++ b/docs/config.md @@ -22,9 +22,11 @@ You can use the `-C ` option to use a single configuration file and ## Syntax -The Nextflow configuration syntax is based on the Nextflow script syntax. It is designed for setting configuration options in a declarative manner while also allowing for dynamic expressions where appropriate. Refer to {ref}`syntax-page`, particularly the sections on comments and expressions, for a full description of the available syntax. +The Nextflow configuration syntax is based on the Nextflow script syntax. It is designed for setting configuration options in a declarative manner while also allowing for dynamic expressions where appropriate. -A Nextflow config file consists of any number of *assignments*, *blocks*, and *includes*. Config files may also contain comments in the same manner as scripts. +A Nextflow config file may consist of any number of *assignments*, *blocks*, and *includes*. Config files may also contain comments in the same manner as scripts. + +See {ref}`syntax-page` for more information about the Nextflow script syntax. ### Assignments @@ -46,7 +48,7 @@ params.helper_file = "${projectDir}/assets/helper.txt" ### Blocks -A config scope can also be specified as a block, in which case any number of config options within that scope can be assigned: +A config scope can also be specified as a block, allowing multiple configuration options to be set within that block. For example: ```groovy // dot syntax @@ -60,7 +62,7 @@ docker { } ``` -As a result, deeply nested config options can be assigned in a variety of ways. For example, the following three assignments are equivalent: +As a result, deeply nested config options can be assigned in various ways. For example, the following three assignments are equivalent: ```groovy executor.retry.maxAttempt = 5 @@ -91,7 +93,7 @@ includeConfig 'path/foo.config' When a relative path is used, it is resolved against the location of the including file. :::{note} -Config includes can also be specified within config blocks. However, config files should only be included either at the top-level or in a [profile](#config-profiles), so that the included config file is valid both on its own and in the context in which it is included. +Config includes can also be specified within config blocks. However, config files should only be included at the top level or in a [profile](#config-profiles) so that the included config file is valid on its own and in the context in which it is included. ::: ## Constants diff --git a/docs/developer/plugins.md b/docs/developer/plugins.md index bac0ccfd9d..336fd449ae 100644 --- a/docs/developer/plugins.md +++ b/docs/developer/plugins.md @@ -153,7 +153,7 @@ Refer to the source code of Nextflow's built-in executors to see how to implemen :::{versionadded} 22.09.0-edge ::: -Plugins can define custom functions, which can then be included into Nextflow pipelines. +Plugins can define custom functions, which can then be included in Nextflow pipelines. To implement a custom function, create a class in your plugin that extends the `PluginExtensionPoint` class, and implement your function with the `Function` annotation: diff --git a/docs/dsl1.md b/docs/dsl1.md index 8ec246c970..17e7f68f73 100644 --- a/docs/dsl1.md +++ b/docs/dsl1.md @@ -88,7 +88,7 @@ In DSL1, the entire Nextflow pipeline must be defined in a single file (e.g. `ma DSL2 introduces the concept of "module scripts" (or "modules" for short), which are Nextflow scripts that can be "included" by other scripts. While modules are not essential to migrating to DSL2, nor are they mandatory in DSL2 by any means, modules can help you organize a large pipeline into multiple smaller files, and take advantage of modules created by others. Check out the {ref}`module-page` to get started. :::{note} -With DSL2, Nextflow scripts cannot exceed 64KB in size, so if your DSL1 script is very large, you may need to split your script into modules anyway to avoid this limit. +DSL2 scripts cannot exceed 64 KB in size. Large DSL1 scripts may need to be split into modules to avoid this limit. ::: ## Deprecations diff --git a/docs/overview.md b/docs/overview.md index 9a9a6f4da2..2bfb585202 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -95,9 +95,9 @@ Read the {ref}`executor-page` to learn more about the Nextflow executors. ## Scripting language -Nextflow is a workflow language, based on [Java](https://en.wikipedia.org/wiki/Java_(programming_language)) and [Groovy](https://groovy-lang.org/), which is designed to make it as simple as possible to write scalable and reproducible pipelines. In most cases, users can leverage their existing programming skills to develop Nextflow pipelines, without the steep learning curve that usually comes with a new programming language. +Nextflow is a workflow language based on [Java](https://en.wikipedia.org/wiki/Java_(programming_language)) and [Groovy](https://groovy-lang.org/). It is designed to simplify writing scalable and reproducible pipelines. In most cases, users can leverage their existing programming skills to develop Nextflow pipelines without the steep learning curve that usually comes with a new programming language. -Read the {ref}`script-page` page to learn about the Nextflow scripting language. +See {ref}`script-page` for more information about the Nextflow scripting language. ## Configuration options diff --git a/docs/process.md b/docs/process.md index d0b9f7b492..9554575b9d 100644 --- a/docs/process.md +++ b/docs/process.md @@ -2,7 +2,7 @@ # Processes -In Nextflow, a **process** is a function that is specialized for executing scripts in a scalable and portable manner. +In Nextflow, a **process** is a specialized function for executing scripts in a scalable and portable manner. Here is an example process definition: @@ -18,7 +18,7 @@ process sayHello { } ``` -Refer to {ref}`syntax-process` in the syntax reference for a full description of the process syntax. +See {ref}`syntax-process` for a full description of the process syntax. (process-script)= @@ -123,9 +123,9 @@ Since the actual location of the interpreter binary file can differ across platf ### Conditional scripts -So far, the `script` block has just been a string, but in reality, the `script` block is like a function that returns a string. This means that you can write arbitrary code to determine the script, as long as the final statement is a string (remember that the `return` keyword is optional). +The `script` block is like a function that returns a string. This means that you can write arbitrary code to determine the script, as long as the final statement is a string. -For example, you can use if-else statements to produce a different script based on the task inputs. The only difference here is that you must explicitly declare the `script` guard, whereas before it was not required. Here is an example: +If-else statements based on task inputs can be used to produce a different script. For example: ```groovy mode = 'tcoffee' diff --git a/docs/script.md b/docs/script.md index 954ab28543..850310096b 100644 --- a/docs/script.md +++ b/docs/script.md @@ -2,9 +2,9 @@ # Scripts -Nextflow is a workflow language that runs on the Java virtual machine (JVM). Nextflow's syntax is very similar to [Groovy](https://groovy-lang.org/), a scripting language for the JVM, but Nextflow is specialized for writing computational pipelines in a declarative manner. Refer to the {ref}`syntax-page` page for a full description of the Nextflow language. +Nextflow is a workflow language that runs on the Java virtual machine (JVM). Nextflow's syntax is very similar to [Groovy](https://groovy-lang.org/), a scripting language for the JVM, but Nextflow is specialized for writing computational pipelines in a declarative manner. See {ref}`syntax-page` for a full description of the Nextflow language. -Nextflow scripts can also make full use of the Java and Groovy standard libraries; see the {ref}`stdlib-page` page for more information. +Nextflow scripts can also make full use of the Java and Groovy standard libraries. See {ref}`stdlib-page` for more information. :::{warning} Nextflow uses UTF-8 as the default character encoding for source files. Make sure to use UTF-8 encoding when editing Nextflow scripts with your preferred text editor. @@ -106,7 +106,7 @@ When adding two maps, the first map is copied and then appended with the keys fr Copying a map with the `+` operator is a safer way to modify maps in Nextflow, specifically when passing maps through channels. This way, a new instance of the map will be created, and any references to the original map won't be affected. ::: -Refer to the [Java](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Map.html) and [Groovy](http://docs.groovy-lang.org/latest/html/groovy-jdk/java/util/Map.html) standard libraries for the set of available map operations. +See the [Java](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Map.html) and [Groovy](http://docs.groovy-lang.org/latest/html/groovy-jdk/java/util/Map.html) standard libraries for the set of available map operations. ## Conditional execution @@ -194,7 +194,7 @@ def result = myLongCmdline.execute().text In the preceding example, `blastp` and its `-in`, `-out`, `-db` and `-html` switches and their arguments are effectively a single line. :::{warning} -When using backslashes to continue a multi-line command, make sure to not put any spaces after the backslash, otherwise it will be interpreted as an escaped space instead of a backslash, which will make your script incorrect. It will also print this warning: +Do not put any spaces after the backslash when using backslashes to continue a multi-line command. Spaces after the backslash will be interpreted as an escaped space and will make your script incorrect. It will also print this warning: ``` unknown recognition error type: groovyjarjarantlr4.v4.runtime.LexerNoViableAltException @@ -235,7 +235,7 @@ println y.class // prints java.util.regex.Matcher ``` -Refer to the linked Java documentation for the available operations for these classes. +See the linked Java documentation for the available operations for these classes. ### String replacement @@ -301,15 +301,15 @@ For example: def square = { v -> v * v } ``` -The above example defines a closure, which takes one parameter named `v` and returns the "square" of `v` (`v * v`), and assigns the closure to the variable `square`. +The above example defines a closure, which takes one parameter named `v` and returns the "square" of `v` (`v * v`). The closure is assigned to the variable `square`. -Now we can call `square` like a function: +`square` can now be called like a function: ```groovy println square(9) ``` -which prints `81`. +The above example prints `81`. The main use case for a closure, however, is as an argument to a higher-order function: @@ -409,4 +409,4 @@ workflow { } ``` -Refer to {ref}`workflow-page`, {ref}`process-page`, and {ref}`module-page` to learn how to use these features in your Nextflow scripts. +See {ref}`workflow-page`, {ref}`process-page`, and {ref}`module-page` for more information about how to use these features in your Nextflow scripts. diff --git a/docs/workflow.md b/docs/workflow.md index 300ef45637..4c3f85e9e3 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -27,17 +27,17 @@ workflow { } ``` -The above example defines a workflow named `my_workflow`, that can be called from another workflow as `my_workflow()`, just like any other function or process. Both `foo` and `bar` could be any other process or workflow. +The above example defines a workflow named `my_workflow` which can be called from another workflow as `my_workflow()`. Both `foo` and `bar` could be any other process or workflow. -Refer to {ref}`syntax-workflow` in the syntax reference for a full description of the workflow syntax. +See {ref}`syntax-workflow` for a full description of the workflow syntax. :::{note} -Workflows were introduced in DSL2. If you are still using DSL1, see the {ref}`dsl1-page` page to learn how to migrate your Nextflow pipelines to DSL2. +Workflows were introduced in DSL2. If you are still using DSL1, see {ref}`dsl1-page` for more information about how to migrate your Nextflow pipelines to DSL2. ::: ## Using parameters -Parameters can be defined in the script with a default value, which can be overridden by params from the CLI, params file, or config file. They can then be used by the entry workflow: +Parameters can be defined in the script with a default value that can be overridden from the CLI, params file, or config file. Params should only be used by the entry workflow: ```groovy params.data = '/some/data/file' @@ -111,7 +111,7 @@ workflow my_workflow { The result of the above workflow can be accessed using `my_workflow.out.my_data`. :::{note} -When multiple output channels are declared, every output must be assigned to a name. +Every output must be assigned to a name when multiple outputs are declared. ::: (workflow-process-invocation)= @@ -159,7 +159,7 @@ workflow { } ``` -Processes and workflows have a few extra rules around how they are called: +Processes and workflows have a few extra rules for how they can be called: - Processes and workflows can only be called by workflows @@ -230,11 +230,11 @@ workflow { ``` :::{note} -Process named outputs are defined using the `emit` option on a process output. See {ref}`naming process outputs ` for more details. +Process named outputs are defined using the `emit` option on a process output. See {ref}`naming process outputs ` for more information. ::: :::{note} -Process and workflow outputs can also be accessed by index (e.g. `foo.out[0]`, `foo.out[1]`, etc), but this practice is deprecated and will not be supported in the future. Access multiple outputs by name instead. +Process and workflow outputs can also be accessed by index (e.g., `foo.out[0]`, `foo.out[1]`, etc.). However, this practice is deprecated and will not be supported in the future. Multiple outputs should instead be accessed by name. ::: Workflows can be composed in the same way: From e0737009d71ecb46f20f0c8a212af1df52b60138 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 30 Sep 2024 23:13:35 -0500 Subject: [PATCH 14/28] minor edits Signed-off-by: Ben Sherman --- docs/snippets/collectfile-closure.nf | 6 +++--- docs/snippets/collectfile.nf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/snippets/collectfile-closure.nf b/docs/snippets/collectfile-closure.nf index 3bec5e06fa..eeb949ec31 100644 --- a/docs/snippets/collectfile-closure.nf +++ b/docs/snippets/collectfile-closure.nf @@ -2,7 +2,7 @@ Channel.of('Hola', 'Ciao', 'Hello', 'Bonjour', 'Halo') .collectFile { item -> [ "${item[0]}.txt", item + '\n' ] } - .subscribe { txt -> - println "File '${txt.name}' contains:" - println txt.text + .subscribe { file -> + println "File '${file.name}' contains:" + println file.text } \ No newline at end of file diff --git a/docs/snippets/collectfile.nf b/docs/snippets/collectfile.nf index 00dc5342b2..ce931c87b3 100644 --- a/docs/snippets/collectfile.nf +++ b/docs/snippets/collectfile.nf @@ -1,6 +1,6 @@ Channel.of('alpha', 'beta', 'gamma') .collectFile(name: 'sample.txt', newLine: true) - .subscribe { txt -> - println "Entries are saved to file: $txt" - println "File content is: ${txt.text}" + .subscribe { file -> + println "Entries are saved to file: $file" + println "File content is: ${file.text}" } \ No newline at end of file From 6a66f71ea91ab6974fb93053395afb1f66c4b222 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 3 Oct 2024 16:17:02 -0500 Subject: [PATCH 15/28] Apply suggestions from review Signed-off-by: Ben Sherman --- docs/config.md | 4 ++-- docs/process.md | 2 +- docs/workflow.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/config.md b/docs/config.md index f1d6e2ffd1..a5acbe6bb6 100644 --- a/docs/config.md +++ b/docs/config.md @@ -38,9 +38,9 @@ docker.enabled = true process.maxErrors = 10 ``` -A config option consists of one or more names separated by dots. The names other than the last one are known as *config scopes*. See {ref}`config-options` for the full set of config options organized by scope. +A config option consists of an *option name* prefixed by any number of *scopes* separated by dots. Config scopes are used to group related config options. See {ref}`config-options` for the full set of config options. -The right-hand side is typically a literal value such as a number, boolean, or string, but can be any expression, such as a dynamic string: +The expression is typically a literal value such as a number, boolean, or string. However, any expression can be used: ```groovy params.helper_file = "${projectDir}/assets/helper.txt" diff --git a/docs/process.md b/docs/process.md index 9554575b9d..7897e6da44 100644 --- a/docs/process.md +++ b/docs/process.md @@ -478,7 +478,7 @@ In this case, `x.name` returns the file name with the parent directory (e.g. `my ### Multiple input files -A `path` input can also accept a collection of files instead of a single value. In this case, the input variable will be a list, and you can use it as such. +A `path` input can also accept a collection of files instead of a single value. In this case, the input variable will be a list. When the input has a fixed file name and a collection of files is received by the process, the file name will be appended with a numerical suffix representing its ordinal position in the list. For example: diff --git a/docs/workflow.md b/docs/workflow.md index 4c3f85e9e3..ecd140ac32 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -4,7 +4,7 @@ In Nextflow, a **workflow** is a function that is specialized for composing processes and dataflow logic (i.e. channels and operators). -A script can define a workflow without a name, known as the *entry workflow*, which is the entrypoint of the script: +A script can define up to one *entry workflow*, which does not have a name and serves as the entrypoint of the script: ```groovy workflow { From 0d9159409cb9e3c6a0d83fc8809e690711ea8719 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 3 Oct 2024 16:49:59 -0500 Subject: [PATCH 16/28] Apply suggestions from code review Signed-off-by: Ben Sherman --- docs/reference/syntax.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index afc5df4e8e..60fcb981ca 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -661,6 +661,13 @@ blastp \ """ ``` +Single-quoted strings are not interpolated: + +```groovy +println 'Hello, ${names.join(" and ")}!' +// -> Hello, ${names.join(" and ")}! +``` + ### List A list literal consists of a comma-separated list of zero or more expressions, enclosed in square brackets: @@ -882,7 +889,7 @@ The following binary operators are available: A ternary expression consists of a *test expression*, a *true expression*, and a *false expression*, separated by a question mark and a colon: ```groovy -x % 2 == 0 ? 'x is even!' : 'x is odd!' +println x % 2 == 0 ? 'x is even!' : 'x is odd!' ``` If the test expression is true, the true expression is evaluated, otherwise the false expression is evaluated. From 3d29edbe67ee771308db3612713e16aba32325c2 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 8 Oct 2024 08:21:44 -0500 Subject: [PATCH 17/28] Update description of expressions Signed-off-by: Ben Sherman --- docs/reference/syntax.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 60fcb981ca..0bf29acadb 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -535,7 +535,9 @@ The try block will be executed, and if an error is raised and matches the expect ## Expressions -An expression is a syntactic entity that produces a value. +An expression represents a value. A *literal* value is an expression whose value is known at compile-time, such as a number, string, or boolean. All other expressions must be evaluated at run-time. + +Every expression has a *type*, which may be resolved at compile-time or run-time. ### Variable From 71236c9028b27184981194eb2b24aa2c984ce2bc Mon Sep 17 00:00:00 2001 From: Christopher Hakkaart Date: Wed, 9 Oct 2024 13:24:25 +0200 Subject: [PATCH 18/28] Suggestions for syntax page Signed-off-by: Christopher Hakkaart --- docs/reference/syntax.md | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 0bf29acadb..7a25659699 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -73,13 +73,24 @@ nextflow.preview.topic = true An include declaration consists of an *include source* and one or more *include clauses*: +The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`): + ```groovy -include { foo ; bar as baz } from './some/module' +include { foo } from './some/module' ``` -The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`). +Each include clause should specify a name, and may also specify an *alias*: -Each include clause should specify a name, and may also specify an *alias*. In the example above, `bar` is included under the alias `baz`. +```groovy +include { bar as baz } from './some/module' +``` + +Include clauses may include multiple names: + +```groovy +// together +include { foo ; bar as baz } from './some/module' +``` Include clauses can be separated by newlines or semi-colons, or they can be specified as separate includes: From 505c98e4e11c5ceb4115ff31bee77a34de6e57e6 Mon Sep 17 00:00:00 2001 From: Christopher Hakkaart Date: Wed, 9 Oct 2024 13:28:10 +0200 Subject: [PATCH 19/28] Revert "Suggestions for syntax page" This reverts commit 597e06e410a79c20881936ecf7e994b382464de5. Signed-off-by: Christopher Hakkaart --- docs/reference/syntax.md | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 7a25659699..0bf29acadb 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -73,24 +73,13 @@ nextflow.preview.topic = true An include declaration consists of an *include source* and one or more *include clauses*: -The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`): - -```groovy -include { foo } from './some/module' -``` - -Each include clause should specify a name, and may also specify an *alias*: - ```groovy -include { bar as baz } from './some/module' +include { foo ; bar as baz } from './some/module' ``` -Include clauses may include multiple names: +The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`). -```groovy -// together -include { foo ; bar as baz } from './some/module' -``` +Each include clause should specify a name, and may also specify an *alias*. In the example above, `bar` is included under the alias `baz`. Include clauses can be separated by newlines or semi-colons, or they can be specified as separate includes: From 06457465129dd6d3e389df30817128f1628969c8 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 17 Oct 2024 16:52:40 -0500 Subject: [PATCH 20/28] Apply suggestions from review Signed-off-by: Ben Sherman --- docs/reference/syntax.md | 105 ++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 46 deletions(-) diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 0bf29acadb..1863afd6b8 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -6,11 +6,15 @@ This page provides a comprehensive description of the Nextflow language. ## Comments -Nextflow uses Java-style comments: `//` for a line comment, and `/* ... */` for a block comment: +A line comment starts with `//` and includes the rest of the line. ```groovy println 'Hello world!' // line comment +``` + +A block comment starts with `/*` and includes all subsequent characters up to the first `*/`. +```groovy /* * block comment */ @@ -31,11 +35,9 @@ A Nextflow script may contain the following top-level declarations: - Enum types - Output block -These declarations are in turn composed of statements and expressions. +Script declarations are in turn composed of statements and expressions. -Alternatively, a script may contain one or more [statements](#statements), as long as there are no top-level declarations. In this case, the entire script will be treated as an entry workflow. - -For example, the following script: +A script may contain one or more [statements](#statements), if there are no top-level declarations. In this case, the entire script will be treated as an entry workflow. For example: ```groovy println 'Hello world!' @@ -50,7 +52,7 @@ workflow { ``` :::{warning} -Top-level declarations and statements can not be mixed at the same level. If your script has top-level declarations, all statements must be contained within top-level declarations such as the entry workflow. +Statements and top-level declarations can not be mixed at the same level. If your script has top-level declarations, all statements must be contained within top-level declarations such as the entry workflow. ::: ### Shebang @@ -74,23 +76,27 @@ nextflow.preview.topic = true An include declaration consists of an *include source* and one or more *include clauses*: ```groovy -include { foo ; bar as baz } from './some/module' +include { foo as bar } from './some/module' ``` -The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`). +The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`). Each include clause should specify a name, and may also specify an *alias*. In the example above, `foo` is included under the alias `bar`. -Each include clause should specify a name, and may also specify an *alias*. In the example above, `bar` is included under the alias `baz`. - -Include clauses can be separated by newlines or semi-colons, or they can be specified as separate includes: +Include clauses can be separated by semi-colons or newlines: ```groovy +// semi-colons +include { foo ; bar as baz } from './some/module' + // newlines include { foo bar as baz } from './some/module' +``` + +Include clauses can also be specified as separate includes: -// separate includes +```groovy include { foo } from './some/module' include { bar as baz } from './some/module' ``` @@ -115,7 +121,9 @@ Parameters supplied via command line options, params files, and config files tak ### Workflow -A workflow consists of a name and a body. The workflow body consists of a *main* section, with additional sections for *takes*, *emits*, and *publishers* (shown later): +A workflow can be a *named workflow* or an *entry workflow*. + +A *named workflow* consists of a name and a body, and may consist of a *take*, *main*, *emit*, and *publish* section: ```groovy workflow greet { @@ -130,7 +138,7 @@ workflow greet { } ``` -- The take, emit, and publish sections are optional. If they are not specified, the `main:` section label can be omitted. +- The take, emit, and publish sections are optional. The `main:` section label can be omitted if they are not specified. - The take section consists of one or more parameters. @@ -138,7 +146,10 @@ workflow greet { - The emit section consists of one or more *emit statements*. An emit statement can be a [variable name](#variable), an [assignment](#assignment), or an [expression statement](#expression-statement). If an emit statement is an expression statement, it must be the only emit. -An alternative workflow form, known as an *entry workflow*, has no name and may only define a main and publish section: +- The publish section can be specified but is intended to be used in the entry workflow (see below). + + +An *entry workflow* has no name and may consist of a *main* and *publish* section: ```groovy workflow { @@ -158,8 +169,6 @@ workflow { - The publish section consists of one or more *publish statements*. A publish statement is a [right-shift expression](#binary-expressions), where the left-hand side is an expression that refers to a value in the workflow body, and the right-hand side is an expression that returns a string. -- The publish section can also be specified in named workflows as a convenience, but is intended mainly to be used in the entry workflow. - In order for a script to be executable, it must either define an entry workflow or use the implicit workflow syntax described [above](#top-level-declarations). Entry workflow definitions are ignored when a script is included as a module. This way, the same script can be included as a module or executed as a pipeline. @@ -205,13 +214,13 @@ process greet { } ``` -- Each of these additional sections are optional. Directives do not have an explicit section label, but are simply defined first. +- A process must define a script, shell, or exec section (see below). All other sections are optional. Directives do not have an explicit section label, but must be defined first. - The `script:` section label can be omitted only when there are no other sections in the body. -- Sections must be defined in the order shown above, with the exception of the output section, which can alternatively be specified after the script and stub. +- Sections must be defined in the order shown above, with the exception of the output section, which can also be specified after the script and stub. -Each section may contain one or more statements. For directives, inputs, and outputs, these statements must be [function calls](#function-call). Refer to {ref}`process-reference` for the set of available input qualifiers, output qualifiers, and directives. +Each section may contain one or more statements. For directives, inputs, and outputs, these statements must be [function calls](#function-call). See {ref}`process-reference` for the set of available input qualifiers, output qualifiers, and directives. The script section can be substituted with a shell or exec section: @@ -241,7 +250,7 @@ process greetExec { The script, shell, and stub sections must return a string in the same manner as a [function](#function). -Refer to {ref}`process-page` for more information on the semantics of each process section. +See {ref}`process-page` for more information on the semantics of each process section. (syntax-function)= @@ -277,7 +286,7 @@ def fib(x) { ### Enum type -An enum type declaration consists of a name and a body, which consists of a comma-separated list of identifiers: +An enum type declaration consists of a name and a body. The body consists of a comma-separated list of identifiers: ```groovy enum Day { @@ -291,7 +300,7 @@ enum Day { } ``` -Enum values can be accessed as `Day.MONDAY`, `Day.TUESDAY`, and so on. +Enum values in the above example can be accessed as `Day.MONDAY`, `Day.TUESDAY`, and so on. :::{note} Enum types cannot be included across modules at this time. @@ -312,11 +321,11 @@ output { } ``` -Only one output block may be defined in a script. Refer to {ref}`workflow-output-def` for the set of available target directives. +Only one output block may be defined in a script. See {ref}`workflow-output-def` for the set of available target directives. ## Statements -Statements should be separated by a newline or semi-colon: +Statements can be separated by either a newline or a semi-colon: ```groovy // newline @@ -335,19 +344,19 @@ Variables can be declared with the `def` keyword: def x = 42 ``` -Multiple variables can be declared in a single statement as long as the initializer is a [list literal](#list) with as many elements as declared variables: +Multiple variables can be declared in a single statement if the initializer is a [list literal](#list) with the same number of elements and declared variables: ```groovy def (x, y) = [ 1, 2 ] ``` -Every variable has a *scope*, which determines the region of code in which the variable is defined. +Each variable has a *scope*, which is the region of code in which the variable can be used. Variables declared in a function, as well as the parameters of that function, exist for the duration of that function call. The same applies to closures. Workflow inputs exist for the entire workflow body. Variables declared in the main section exist for the main, emit, and publish sections. Named outputs are not considered variable declarations and therefore do not have any scope. -Process input variables exist for the entire process body. Variables declared in the process script, shell, exec, and stub sections exist only in their respective section, with one exception -- in these sections, a variable can be declared with the `def` keyword, in which case it will also exist in the output section. +Process input variables exist for the entire process body. Variables declared in the process script, shell, exec, and stub sections exist only in their respective section, with one exception -- variables declared without the `def` keyword also exist in the output section. Variables declared in an if or else branch exist only within that branch: @@ -363,7 +372,7 @@ if( true ) println x ``` -A variable cannot be declared with the same name as another variable in the same scope or any enclosing scope: +A variable cannot be declared with the same name as another variable in the same scope or an enclosing scope: ```groovy def clash(x) { @@ -385,7 +394,7 @@ map.key = 'value' The target expression must be a [variable](#variable), [index](#binary-expressions), or [property](#binary-expressions) expression. The source expression can be any expression. -Multiple variables can be assigned in a single statement as long as the source expression is a [list literal](#list) with as many elements as assigned variables: +Multiple variables can be assigned in a single statement as long as the source expression is a [list literal](#list) with the same number of elements and assigned variables: ```groovy (x, y) = [ 1, 2 ] @@ -393,7 +402,7 @@ Multiple variables can be assigned in a single statement as long as the source e ### Expression statement -Any [expression](#expressions) can also be a statement. +Any [expression](#expressions) can be a statement. In general, the only expressions that can have any effect as expression statements are function calls that have side effects (e.g. `println`) or an implicit return statement (e.g. in a function or closure). @@ -407,9 +416,9 @@ assert 2 + 2 == 4 : 'The math broke!' If the condition is false, an error will be raised with the given error message. -### if / else +### if/else -An if/else statement consists of an *if branch* and an optional *else branch*. Each branch consists of a boolean expression in parentheses, followed by either a single statement or a *block statement* (one or more statements in curly braces). +An if/else statement consists of an *if branch* and an optional *else branch*. Each branch consists of a boolean expression in parentheses, followed by either a single statement or a *block statement* (one or more statements in curly braces). For example: ```groovy def x = Math.random() @@ -423,7 +432,7 @@ else { If the condition is true, the if branch will be executed, otherwise the else branch will be executed. -If / else statements can be chained any number of times by making the else branch another if / else statement: +If/else statements can be chained any number of times by making the else branch another if/else statement: ```groovy def grade = 89 @@ -439,7 +448,7 @@ else println 'You failed.' ``` -A more verbose way to write the same code would be: +A more verbose way to write the same code is: ```groovy def grade = 89 @@ -500,7 +509,9 @@ def isEven2(n) { } ``` -Note that if the last statement is not a return or expression statement (implicit return), it is equivalent to appending an empty return. +:::{note} +If the last statement is not a return or expression statement (implicit return), it is equivalent to appending an empty return. +::: ### throw @@ -517,9 +528,9 @@ error 'something failed!' ``` ::: -### try / catch +### try/catch -A try / catch statement consists of a *try block* followed by any number of *catch clauses*: +A try/catch statement consists of a *try block* followed by any number of *catch clauses*: ```groovy def text = null @@ -531,7 +542,7 @@ catch( IOException e ) { } ``` -The try block will be executed, and if an error is raised and matches the expected error type of a catch clause, the code in that catch clause will be executed. If no catch clause is matched, the error will be raised to the next enclosing try / catch statement, or to the Nextflow runtime. +The try block will be executed, and if an error is raised and matches the expected error type of a catch clause, the code in that catch clause will be executed. If no catch clause is matched, the error will be raised to the next enclosing try/catch statement, or to the Nextflow runtime. ## Expressions @@ -631,11 +642,13 @@ Logic unconfined. / ``` -Note that a slashy string cannot be empty because it would become a line comment. +:::{note} +A slashy string cannot be empty because it would become a line comment. +::: ### Dynamic string -Double-quoted strings can be interpolated using the `${}` placeholder, which can contain any expression: +Double-quoted strings can be interpolated using the `${}` placeholder with an expression: ```groovy def names = ['Thing 1', 'Thing 2'] @@ -729,7 +742,7 @@ println [1, 2, 3].collect { v -> factor * v } // -> [2, 4, 6] ``` -And they can declare local variables that exist only for the lifetime of each closure invocation: +Closures can declare local variables that exist only for the lifetime of each closure invocation: ```groovy def result = 0 @@ -742,7 +755,7 @@ println result // -> 14 ``` -Refer to the {ref}`standard library ` and {ref}`operator ` reference pages for examples of closures being used in practice. +See {ref}`standard library ` and {ref}`operator ` for more examples of how closures are used in practice. ### Index expression @@ -782,7 +795,7 @@ The argument list may contain any number of *positional arguments* and *named ar file('hello.txt', checkIfExists: true) ``` -The named arguments are collected into a map and provided as the first positional argument to the function. Thus the above function call can be rewritten as: +The named arguments are collected into a map and provided as the first positional argument to the function. The above function call can be rewritten as: ```groovy file([checkIfExists: true], 'hello.txt') @@ -790,7 +803,7 @@ file([checkIfExists: true], 'hello.txt') The argument name must be an identifier or string literal. -When the function call is also an [expression statement](#expression-statement) and there is at least one argument, the parentheses can be omitted: +The parentheses can be omitted when the function call is also an [expression statement](#expression-statement) and there is at least one argument: ```groovy // positional args @@ -827,7 +840,7 @@ If the type is implicitly available in the script, the *fully-qualified type nam new Date() ``` -Refer to {ref}`stdlib-default-imports` for the set of types which are implicitly available in Nextflow scripts. +See {ref}`stdlib-default-imports` for the set of types which are implicitly available in Nextflow scripts. ### Unary expressions From 9444ed1f89acb0b82d764f1ba8844cef5844d43f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 18 Oct 2024 14:11:19 -0500 Subject: [PATCH 21/28] Apply suggestions from review Signed-off-by: Ben Sherman --- docs/config.md | 2 +- docs/process.md | 2 +- docs/reference/syntax.md | 16 ++++++++-------- docs/script.md | 10 +++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/config.md b/docs/config.md index a5acbe6bb6..7b56edaaac 100644 --- a/docs/config.md +++ b/docs/config.md @@ -48,7 +48,7 @@ params.helper_file = "${projectDir}/assets/helper.txt" ### Blocks -A config scope can also be specified as a block, allowing multiple configuration options to be set within that block. For example: +A config scope can also be specified as a block, which may contain multiple configuration options. For example: ```groovy // dot syntax diff --git a/docs/process.md b/docs/process.md index 7897e6da44..cef57d76bd 100644 --- a/docs/process.md +++ b/docs/process.md @@ -260,7 +260,7 @@ Hello Mr. a Hello Mr. c ``` -A native process is very similar to a {ref}`function `, but provides additional capabilities such as parallelism, caching, and progress logging. +A native process is very similar to a {ref}`function `. However, it provides additional capabilities such as parallelism, caching, and progress logging. (process-stub)= diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 1863afd6b8..3d355696ee 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -37,7 +37,7 @@ A Nextflow script may contain the following top-level declarations: Script declarations are in turn composed of statements and expressions. -A script may contain one or more [statements](#statements), if there are no top-level declarations. In this case, the entire script will be treated as an entry workflow. For example: +If there are no top-level declarations, a script may contain one or more [statements](#statements), in which case the entire script is treated as an entry workflow. For example: ```groovy println 'Hello world!' @@ -65,7 +65,7 @@ The first line of a script can be a [shebang](https://en.wikipedia.org/wiki/Sheb ### Feature flag -A feature flag declaration is an assignment, where the target should be a valid {ref}`feature flag ` and the source should be a literal (i.e. number, string, boolean): +A feature flag declaration is an assignment. The target should be a valid {ref}`feature flag ` and the source should be a literal (i.e. number, string, boolean): ```groovy nextflow.preview.topic = true @@ -79,7 +79,7 @@ An include declaration consists of an *include source* and one or more *include include { foo as bar } from './some/module' ``` -The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`). Each include clause should specify a name, and may also specify an *alias*. In the example above, `foo` is included under the alias `bar`. +The include source should be a string literal and should refer to either a local path (e.g. `./module.nf`) or a plugin (e.g. `plugin/nf-hello`). Each include clause should specify a name, and may also specify an *alias*. In the above example, `foo` is included under the alias `bar`. Include clauses can be separated by semi-colons or newlines: @@ -109,7 +109,7 @@ The following definitions can be included: ### Parameter -A parameter declaration is an assignment, where the target should be a pipeline parameter and the source should be an expression: +A parameter declaration is an assignment. The target should be a pipeline parameter and the source should be an expression: ```groovy params.message = 'Hello world!' @@ -404,7 +404,7 @@ Multiple variables can be assigned in a single statement as long as the source e Any [expression](#expressions) can be a statement. -In general, the only expressions that can have any effect as expression statements are function calls that have side effects (e.g. `println`) or an implicit return statement (e.g. in a function or closure). +In general, the only expressions that can have any effect as expression statements are function calls that have side effects (e.g. `println`) or an implicit return statement in a [function](#function) or [closure](#closure). ### assert @@ -693,7 +693,7 @@ A list literal consists of a comma-separated list of zero or more expressions, e ### Map -A map literal consists of a comma-separated list of one or more *map entries*, where each map entry consists of a *key expression* and *value expression* separated by a colon, enclosed in square brackets: +A map literal consists of a comma-separated list of one or more *map entries*, enclosed in square brackets. Each map entry consists of a *key expression* and *value expression* separated by a colon: ```groovy [foo: 1, bar: 2, baz: 3] @@ -952,6 +952,6 @@ Compound expressions are evaluated in the following order: The following legacy features were excluded from this page because they are deprecated: -- The `addParams` and `params` clauses of include declarations (see {ref}`module-params`) -- The `when:` section of a process definition (see {ref}`process-when`) +- The `addParams` and `params` clauses of include declarations. See {ref}`module-params` for more information. +- The `when:` section of a process definition. See {ref}`process-when` for more information. - The implicit `it` closure parameter diff --git a/docs/script.md b/docs/script.md index 850310096b..5476882b45 100644 --- a/docs/script.md +++ b/docs/script.md @@ -2,7 +2,7 @@ # Scripts -Nextflow is a workflow language that runs on the Java virtual machine (JVM). Nextflow's syntax is very similar to [Groovy](https://groovy-lang.org/), a scripting language for the JVM, but Nextflow is specialized for writing computational pipelines in a declarative manner. See {ref}`syntax-page` for a full description of the Nextflow language. +Nextflow is a workflow language that runs on the Java virtual machine (JVM). Nextflow's syntax is very similar to [Groovy](https://groovy-lang.org/), a scripting language for the JVM. However, Nextflow is specialized for writing computational pipelines in a declarative manner. See {ref}`syntax-page` for a full description of the Nextflow language. Nextflow scripts can also make full use of the Java and Groovy standard libraries. See {ref}`stdlib-page` for more information. @@ -45,7 +45,7 @@ println str ``` :::{warning} -Variables can also be declared without `def` in many cases, but this practice is discouraged outside of simple code snippets because it can lead to a {ref}`race condition `. +Variables can also be declared without `def` in some cases. However, this practice is discouraged outside of simple code snippets because it can lead to a {ref}`race condition `. ::: ## Lists @@ -311,7 +311,7 @@ println square(9) The above example prints `81`. -The main use case for a closure, however, is as an argument to a higher-order function: +The main use case for a closure is as an argument to a higher-order function: ```groovy [ 1, 2, 3, 4 ].collect(square) @@ -380,7 +380,7 @@ This way, the closure is fully "self-contained" because it doesn't access or mut So far, we have been focusing on the basic building blocks of Nextflow code, like variables, lists, strings, and closures. -In practice, however, Nextflow scripts are composed of *workflows*, *processes*, and *functions* (collectively known as *definitions*), and they can *include* definitions from other scripts. +In practice, however, Nextflow scripts are composed of *workflows*, *processes*, and *functions* (collectively known as *definitions*), and can *include* definitions from other scripts. To transition a code snippet into a proper workflow script, simply wrap it in a `workflow` block: @@ -390,7 +390,7 @@ workflow { } ``` -This block is called the *entry workflow*. A script can only have one entry workflow, and it serves as the entrypoint when the script is executed. In fact, whenever a script contains only simple statements like `println 'Hello!'`, Nextflow simply treats it as an entry workflow! +This block is called the *entry workflow*. It serves as the entrypoint when the script is executed. A script can only have one entry workflow. Whenever a script contains only simple statements like `println 'Hello!'`, Nextflow simply treats it as an entry workflow. You can also break up code into functions, for example: From 110ede7ca58d163ce7fc283b383cd5b1e0a51725 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 18 Oct 2024 14:15:41 -0500 Subject: [PATCH 22/28] Apply suggestions from review Signed-off-by: Ben Sherman --- docs/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/config.md b/docs/config.md index 7b56edaaac..fc98c085ef 100644 --- a/docs/config.md +++ b/docs/config.md @@ -90,7 +90,7 @@ process.memory = '10G' includeConfig 'path/foo.config' ``` -When a relative path is used, it is resolved against the location of the including file. +Relative paths are resolved against the location of the including file. :::{note} Config includes can also be specified within config blocks. However, config files should only be included at the top level or in a [profile](#config-profiles) so that the included config file is valid on its own and in the context in which it is included. From 48ac8b5309cc06474d94006bd55532ad191c82f7 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 21 Oct 2024 10:04:15 -0500 Subject: [PATCH 23/28] Update docs/dsl1.md Signed-off-by: Ben Sherman --- docs/dsl1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dsl1.md b/docs/dsl1.md index 17e7f68f73..6083a9686a 100644 --- a/docs/dsl1.md +++ b/docs/dsl1.md @@ -88,7 +88,7 @@ In DSL1, the entire Nextflow pipeline must be defined in a single file (e.g. `ma DSL2 introduces the concept of "module scripts" (or "modules" for short), which are Nextflow scripts that can be "included" by other scripts. While modules are not essential to migrating to DSL2, nor are they mandatory in DSL2 by any means, modules can help you organize a large pipeline into multiple smaller files, and take advantage of modules created by others. Check out the {ref}`module-page` to get started. :::{note} -DSL2 scripts cannot exceed 64 KB in size. Large DSL1 scripts may need to be split into modules to avoid this limit. +Nextflow scripts cannot exceed 64 KB in size. ::: ## Deprecations From 30ea996e8cb469759bd318f5469f835bdba77d62 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 21 Oct 2024 10:10:50 -0500 Subject: [PATCH 24/28] Revert "Update docs/dsl1.md" This reverts commit 48ac8b5309cc06474d94006bd55532ad191c82f7. Signed-off-by: Ben Sherman --- docs/dsl1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dsl1.md b/docs/dsl1.md index 6083a9686a..17e7f68f73 100644 --- a/docs/dsl1.md +++ b/docs/dsl1.md @@ -88,7 +88,7 @@ In DSL1, the entire Nextflow pipeline must be defined in a single file (e.g. `ma DSL2 introduces the concept of "module scripts" (or "modules" for short), which are Nextflow scripts that can be "included" by other scripts. While modules are not essential to migrating to DSL2, nor are they mandatory in DSL2 by any means, modules can help you organize a large pipeline into multiple smaller files, and take advantage of modules created by others. Check out the {ref}`module-page` to get started. :::{note} -Nextflow scripts cannot exceed 64 KB in size. +DSL2 scripts cannot exceed 64 KB in size. Large DSL1 scripts may need to be split into modules to avoid this limit. ::: ## Deprecations From 93154ee65fbbd6cd00eeea8aa38bb9174203029e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 21 Oct 2024 11:58:33 -0500 Subject: [PATCH 25/28] Update docs/config.md Signed-off-by: Ben Sherman --- docs/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/config.md b/docs/config.md index fc98c085ef..36b8c5f33a 100644 --- a/docs/config.md +++ b/docs/config.md @@ -80,7 +80,7 @@ executor { ### Includes -A config file can include any number of other config files using the `includeConfig` keyword: +A configuration file can include any number of other configuration files using the `includeConfig` keyword: ```groovy process.executor = 'sge' From 04a311d6c18c97b792a44876be509cd2bba19237 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 21 Oct 2024 18:15:20 -0500 Subject: [PATCH 26/28] Remove redundant subscriber code snippet Signed-off-by: Ben Sherman --- docs/reference/operator.md | 10 ---------- docs/snippets/subscribe-with-param.nf | 5 ----- docs/snippets/subscribe-with-param.out | 3 --- docs/snippets/subscribe.nf | 2 +- docs/snippets/subscribe.out | 6 +++--- 5 files changed, 4 insertions(+), 22 deletions(-) delete mode 100644 docs/snippets/subscribe-with-param.nf delete mode 100644 docs/snippets/subscribe-with-param.out diff --git a/docs/reference/operator.md b/docs/reference/operator.md index 0310224889..1f6d9483f1 100644 --- a/docs/reference/operator.md +++ b/docs/reference/operator.md @@ -1405,16 +1405,6 @@ The `subscribe` operator invokes a custom function for each item from a source c :language: console ``` -The closure parameter can be defined explicitly if needed, using a name other than `it` and, optionally, the expected type: - -```{literalinclude} ../snippets/subscribe-with-param.nf -:language: groovy -``` - -```{literalinclude} ../snippets/subscribe-with-param.out -:language: console -``` - The `subscribe` operator supports multiple types of event handlers: ```{literalinclude} ../snippets/subscribe-with-on-complete.nf diff --git a/docs/snippets/subscribe-with-param.nf b/docs/snippets/subscribe-with-param.nf deleted file mode 100644 index 26a82d0c18..0000000000 --- a/docs/snippets/subscribe-with-param.nf +++ /dev/null @@ -1,5 +0,0 @@ -Channel - .of( 'alpha', 'beta', 'lambda' ) - .subscribe { str -> - println "Got: ${str}; len: ${str.length()}" - } \ No newline at end of file diff --git a/docs/snippets/subscribe-with-param.out b/docs/snippets/subscribe-with-param.out deleted file mode 100644 index 3be852e6b9..0000000000 --- a/docs/snippets/subscribe-with-param.out +++ /dev/null @@ -1,3 +0,0 @@ -Got: alpha; len: 5 -Got: beta; len: 4 -Got: lambda; len: 6 \ No newline at end of file diff --git a/docs/snippets/subscribe.nf b/docs/snippets/subscribe.nf index 4d436d7df3..f17b933132 100644 --- a/docs/snippets/subscribe.nf +++ b/docs/snippets/subscribe.nf @@ -2,4 +2,4 @@ source = Channel.of( 'alpha', 'beta', 'delta' ) // subscribe to the channel with a function that prints each value -source.subscribe { v -> println "Got: $v" } \ No newline at end of file +source.subscribe { str -> println "Got: ${str}; len: ${str.length()}" } \ No newline at end of file diff --git a/docs/snippets/subscribe.out b/docs/snippets/subscribe.out index 27a68821f4..3be852e6b9 100644 --- a/docs/snippets/subscribe.out +++ b/docs/snippets/subscribe.out @@ -1,3 +1,3 @@ -Got: alpha -Got: beta -Got: delta \ No newline at end of file +Got: alpha; len: 5 +Got: beta; len: 4 +Got: lambda; len: 6 \ No newline at end of file From 63633d1f825872bdffca6d150f15b63c1df74f63 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 23 Oct 2024 08:02:41 -0500 Subject: [PATCH 27/28] Add note about implicit closure parameter Signed-off-by: Ben Sherman --- docs/reference/syntax.md | 2 +- docs/script.md | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 3d355696ee..1d136f60ec 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -954,4 +954,4 @@ The following legacy features were excluded from this page because they are depr - The `addParams` and `params` clauses of include declarations. See {ref}`module-params` for more information. - The `when:` section of a process definition. See {ref}`process-when` for more information. -- The implicit `it` closure parameter +- The implicit `it` closure parameter. See {ref}`script-closure` for more information. diff --git a/docs/script.md b/docs/script.md index 5476882b45..c0d3994fb3 100644 --- a/docs/script.md +++ b/docs/script.md @@ -376,6 +376,16 @@ def result = counts.values().inject { sum, v -> sum + v } This way, the closure is fully "self-contained" because it doesn't access or mutate any variables outside of its scope. +:::{note} +When a closure takes a single parameter, the parameter can be omitted, in which case the implicit `it` parameter will be used: + +```groovy +[1, 2, 3].each { println it } +``` + +However, this practice is deprecated and will not be supported in the future. +::: + ## Script definitions So far, we have been focusing on the basic building blocks of Nextflow code, like variables, lists, strings, and closures. From 3f6f37796065fd644813a1b75ed91fea623080c7 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 23 Oct 2024 18:47:35 +0200 Subject: [PATCH 28/28] Update docs [ci skip] Signed-off-by: Paolo Di Tommaso --- docs/script.md | 2 -- docs/workflow.md | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/script.md b/docs/script.md index c0d3994fb3..8ea70050a6 100644 --- a/docs/script.md +++ b/docs/script.md @@ -382,8 +382,6 @@ When a closure takes a single parameter, the parameter can be omitted, in which ```groovy [1, 2, 3].each { println it } ``` - -However, this practice is deprecated and will not be supported in the future. ::: ## Script definitions diff --git a/docs/workflow.md b/docs/workflow.md index ecd140ac32..69a66b166e 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -51,7 +51,7 @@ workflow { ``` :::{note} -While params can also be used by named workflows, this practice is deprecated and will not be supported in the future. Named workflows should receive their inputs explicitly through the `take:` section. +While params can also be used by named workflows, this practice is discouraged. Named workflows should receive their inputs explicitly through the `take:` section. ::: ## Workflow inputs (`take`) @@ -234,7 +234,7 @@ Process named outputs are defined using the `emit` option on a process output. S ::: :::{note} -Process and workflow outputs can also be accessed by index (e.g., `foo.out[0]`, `foo.out[1]`, etc.). However, this practice is deprecated and will not be supported in the future. Multiple outputs should instead be accessed by name. +Process and workflow outputs can also be accessed by index (e.g., `foo.out[0]`, `foo.out[1]`, etc.). Multiple outputs should instead be accessed by name. ::: Workflows can be composed in the same way: