diff --git a/.depend b/.depend index e34797649bb9..70c50965806a 100644 --- a/.depend +++ b/.depend @@ -62,16 +62,6 @@ utils/config.cmo : \ utils/config.cmx : \ utils/config.cmi utils/config.cmi : -utils/config_boot.cmo : \ - utils/config_boot.cmi -utils/config_boot.cmx : \ - utils/config_boot.cmi -utils/config_boot.cmi : -utils/config_main.cmo : \ - utils/config_main.cmi -utils/config_main.cmx : \ - utils/config_main.cmi -utils/config_main.cmi : utils/consistbl.cmo : \ utils/misc.cmi \ utils/consistbl.cmi @@ -221,6 +211,16 @@ utils/warnings.cmx : \ utils/warnings.cmi utils/warnings.cmi : \ utils/format_doc.cmi +utils/config/config_boot.cmo : \ + utils/config/config_boot.cmi +utils/config/config_boot.cmx : \ + utils/config/config_boot.cmi +utils/config/config_boot.cmi : +utils/config/config_main.cmo : \ + utils/config/config_main.cmi +utils/config/config_main.cmx : \ + utils/config/config_main.cmi +utils/config/config_main.cmi : parsing/ast_helper.cmo : \ parsing/syntaxerr.cmi \ parsing/parsetree.cmi \ @@ -2395,6 +2395,7 @@ bytecomp/bytelink.cmo : \ utils/consistbl.cmi \ utils/config.cmi \ utils/compression.cmi \ + driver/compenv.cmi \ file_formats/cmo_format.cmi \ utils/clflags.cmi \ utils/ccomp.cmi \ @@ -2414,6 +2415,7 @@ bytecomp/bytelink.cmx : \ utils/consistbl.cmx \ utils/config.cmx \ utils/compression.cmx \ + driver/compenv.cmx \ file_formats/cmo_format.cmi \ utils/clflags.cmx \ utils/ccomp.cmx \ @@ -2575,14 +2577,6 @@ bytecomp/instruct.cmi : \ lambda/lambda.cmi \ typing/ident.cmi \ typing/env.cmi -bytecomp/meta.cmo : \ - bytecomp/instruct.cmi \ - bytecomp/meta.cmi -bytecomp/meta.cmx : \ - bytecomp/instruct.cmx \ - bytecomp/meta.cmi -bytecomp/meta.cmi : \ - bytecomp/instruct.cmi bytecomp/opcodes.cmo : \ bytecomp/opcodes.cmi bytecomp/opcodes.cmx : \ @@ -2608,7 +2602,6 @@ bytecomp/symtable.cmo : \ lambda/runtimedef.cmi \ typing/predef.cmi \ utils/misc.cmi \ - bytecomp/meta.cmi \ parsing/location.cmi \ lambda/lambda.cmi \ typing/ident.cmi \ @@ -2622,7 +2615,6 @@ bytecomp/symtable.cmx : \ lambda/runtimedef.cmx \ typing/predef.cmx \ utils/misc.cmx \ - bytecomp/meta.cmx \ parsing/location.cmx \ lambda/lambda.cmx \ typing/ident.cmx \ @@ -2818,6 +2810,7 @@ asmcomp/asmlink.cmo : \ utils/consistbl.cmi \ utils/config.cmi \ middle_end/compilenv.cmi \ + driver/compenv.cmi \ file_formats/cmx_format.cmi \ asmcomp/cmm_helpers.cmi \ asmcomp/cmm.cmi \ @@ -2839,6 +2832,7 @@ asmcomp/asmlink.cmx : \ utils/consistbl.cmx \ utils/config.cmx \ middle_end/compilenv.cmx \ + driver/compenv.cmx \ file_formats/cmx_format.cmi \ asmcomp/cmm_helpers.cmx \ asmcomp/cmm.cmx \ @@ -7627,9 +7621,9 @@ toplevel/byte/topeval.cmo : \ typing/outcometree.cmi \ typing/out_type.cmi \ utils/misc.cmi \ - bytecomp/meta.cmi \ parsing/location.cmi \ utils/load_path.cmi \ + bytecomp/instruct.cmi \ typing/ident.cmi \ typing/env.cmi \ bytecomp/emitcode.cmi \ @@ -7657,9 +7651,9 @@ toplevel/byte/topeval.cmx : \ typing/outcometree.cmi \ typing/out_type.cmx \ utils/misc.cmx \ - bytecomp/meta.cmx \ parsing/location.cmx \ utils/load_path.cmx \ + bytecomp/instruct.cmx \ typing/ident.cmx \ typing/env.cmx \ bytecomp/emitcode.cmx \ @@ -7714,7 +7708,6 @@ toplevel/byte/trace.cmo : \ typing/predef.cmi \ typing/path.cmi \ utils/misc.cmi \ - bytecomp/meta.cmi \ parsing/longident.cmi \ typing/ctype.cmi \ parsing/asttypes.cmi \ @@ -7727,7 +7720,6 @@ toplevel/byte/trace.cmx : \ typing/predef.cmx \ typing/path.cmx \ utils/misc.cmx \ - bytecomp/meta.cmx \ parsing/longident.cmx \ typing/ctype.cmx \ parsing/asttypes.cmx \ @@ -9533,6 +9525,7 @@ ocamldoc/odoc_misc.cmo : \ typing/path.cmi \ ocamldoc/odoc_types.cmi \ ocamldoc/odoc_messages.cmi \ + ocamldoc/odoc_global.cmi \ parsing/longident.cmi \ typing/btype.cmi \ ocamldoc/odoc_misc.cmi @@ -9543,6 +9536,7 @@ ocamldoc/odoc_misc.cmx : \ typing/path.cmx \ ocamldoc/odoc_types.cmx \ ocamldoc/odoc_messages.cmx \ + ocamldoc/odoc_global.cmx \ parsing/longident.cmx \ typing/btype.cmx \ ocamldoc/odoc_misc.cmi diff --git a/.gitattributes b/.gitattributes index 194f03db903e..622a2600a0a5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -44,6 +44,21 @@ # the lines involved in the conflict, which is arguably worse #/Changes merge=union +testsuite/Makefile export-subst + +# Files and directories excluded from git-generated tarballs. +.github export-ignore +manual export-ignore +release-info export-ignore +testsuite/tests export-ignore +tools/ci export-ignore +.gitattributes export-ignore +.gitignore export-ignore +.gitmodules export-ignore +.mailmap export-ignore +ocaml-variants.install export-ignore +ocaml-variants.opam export-ignore + # No header for text and META files (would be too obtrusive). *.md typo.missing-header README* typo.missing-header @@ -92,6 +107,8 @@ build-aux typo.prune /manual typo.prune /manual/** typo.prune +tools/ci/appveyor/parallel linguist-vendored typo.prune + # configure is generated so do not check it configure typo.prune @@ -109,6 +126,7 @@ otherlibs/unix/symlink_win32.c typo.long-line # Some Unicode characters here and there utils/misc.ml typo.non-ascii runtime/sak.c typo.non-ascii +tools/opam/process.sh typo.non-ascii testsuite/tests/** typo.missing-header typo.long-line=may testsuite/tests/lib-bigarray-2/bigarrf.f typo.tab linguist-language=Fortran @@ -215,7 +233,6 @@ runtime/caml/sizeclasses.h typo.missing-header /tools/gdb_ocamlrun.py typo.long-line /tools/opam/gen_ocaml-system_config.ml.in typo.missing-header -/tools/opam/gen_ocaml_config.ml.in typo.very-long-line typo.missing-header # Tests which include references spanning multiple lines fail with \r\n # endings, so use \n endings only, even on Windows. diff --git a/.github/workflows/build-msvc.yml b/.github/workflows/build-msvc.yml index 6035ff3703c4..f32c2b7eff88 100644 --- a/.github/workflows/build-msvc.yml +++ b/.github/workflows/build-msvc.yml @@ -33,10 +33,12 @@ jobs: with: script: | // # Always test cl and clang-cl - let compilers = ['cl', 'clang-cl']; + let compilers = ['clang-cl']; // # Also test i686 MSVC let include = [ - {cc: 'cl', arch: 'i686', libdir: 'relative'}]; + {os: 'windows-latest', cc: 'cl', arch: 'i686', opam: 'false', prefix: '$PROGRAMFILES/Бактріан🐫', libdir: 'relative'}, + {os: 'windows-2025', cc: 'cl', arch: 'x86_64', opam: 'true', prefix: 'C:\\\\Бактріан🐫'}, + {os: 'windows-2025', cc: 'cl', arch: 'i686', opam: 'true', prefix: 'C:\\\\Бактріан🐫'}]; let libdir = ['absolute']; // # If this is a pull request, see if the PR has the // # 'CI: Full matrix' label. This is done using an API request, @@ -53,14 +55,14 @@ jobs: // # Test Cygwin as well compilers.push('gcc'); // # Test bytecode-only Cygwin - include.push({cc: 'gcc', arch: 'x86_64', libdir: 'absolute', config_arg: '--disable-native-toplevel --disable-native-compiler'}); + include.push({os: 'windows-latest', prefix: '$PROGRAMFILES/Бактріан🐫', opam: ['false'], cc: 'gcc', arch: 'x86_64', libdir: 'absolute', config_arg: '--disable-native-toplevel --disable-native-compiler --with-target-sh=sh'}); // # Test i686 MSVC absolute - include.push({cc: 'cl', arch: 'i686', libdir: 'absolute'}); + include.push({os: 'windows-latest', prefix: '$PROGRAMFILES/Бактріан🐫', opam: ['false'], cc: 'cl', arch: 'i686', libdir: 'absolute'}); // # Expand the main matrix to include relative testing libdir.push('relative'); } } - return {config_arg: [''], arch: ['x86_64'], cc: compilers, libdir: libdir, include: include}; + return {os: ['windows-latest'], prefix: ['$PROGRAMFILES/Бактріан🐫'], opam: ['false'], config_arg: [''], arch: ['x86_64'], cc: compilers, libdir: libdir, include: include}; - name: Determine if the testsuite should be skipped id: skip uses: actions/github-script@v7 @@ -78,7 +80,7 @@ jobs: build: permissions: {} - runs-on: windows-latest + runs-on: ${{ matrix.os }} needs: config @@ -88,7 +90,7 @@ jobs: strategy: matrix: ${{ fromJSON(needs.config.outputs.matrix) }} - fail-fast: true + fail-fast: false steps: @@ -107,7 +109,7 @@ jobs: - name: Install Cygwin uses: cygwin/cygwin-install-action@v3 with: - packages: make,${{ matrix.cc != 'gcc' && 'mingw64-x86_64-' || 'gcc-g++,gcc-fortran,' }}gcc-core + packages: make,${{ matrix.cc != 'gcc' && 'mingw64-x86_64-' || 'gcc-g++,gcc-fortran,' }}gcc-core,rsync,unzip install-dir: 'D:\cygwin' - name: Save Cygwin cache @@ -123,6 +125,13 @@ jobs: arch: ${{ matrix.arch == 'x86_64' && 'x64' || 'x86' }} if: matrix.cc != 'gcc' + - name: Install opam + if: matrix.opam == 'true' + shell: pwsh + run: | + winget install opam --accept-source-agreements + Add-Content -Path $env:GITHUB_PATH -Value "$env:LOCALAPPDATA\Microsoft\WinGet\Links" + - name: Compute a key to cache configure results id: autoconf-cache-key env: @@ -141,7 +150,8 @@ jobs: env: CONFIG_ARGS: >- --cache-file=config.cache - --prefix "${{ matrix.cc != 'gcc' && '$PROGRAMFILES\\Бактріан🐫' || '$(cygpath "$PROGRAMFILES/Бактріан🐫")'}}" + --prefix ${{ matrix.cc != 'gcc' && format('"{0}/_opam"', matrix.prefix) || format('"$(cygpath "{0}")"', matrix.prefix) }} + --docdir ${{ format((matrix.cc != 'gcc' && '"{0}/_opam/doc/ocaml"' || '"$(cygpath "{0}/doc/ocaml")"'), matrix.prefix) }} ${{ matrix.cc != 'gcc' && format('--host={0}-pc-windows', matrix.arch) || '' }} ${{ matrix.cc != 'gcc' && format('CC={0}', matrix.cc) || '' }} --enable-ocamltest @@ -200,8 +210,25 @@ jobs: make tests - name: Install the compiler + if: matrix.opam != 'true' run: make install + - name: Create opam switch + if: matrix.opam == 'true' + env: + OPAMSWITCH: ${{ matrix.prefix }} + run: | + make OPAM_PACKAGE_NAME=ocaml-variants INSTALL_MODE=opam install + opam init --cli=2.4 --bare --yes --disable-sandboxing --auto-setup --cygwin-local-install + # These commands intentionally run using opam's "default" CLI + opam switch create '${{ env.OPAMSWITCH }}' --empty + opam pin add --no-action --kind=path ocaml-variants . + opam pin add --no-action flexdll flexdll + opam pin add --no-action winpthreads winpthreads + opam install --yes flexdll winpthreads + opam install --yes --assume-built ocaml-variants + opam exec -- ocamlc -v + - name: Test in prefix run: | eval $(tools/msvs-promote-path) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b04161cb8dc8..9ce2cc998d84 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -78,6 +78,7 @@ jobs: # debug runtime and minor heap verification. # debug-s4096: select testsuite run with the debug runtime and a small # minor heap. +# opam: constructs an opam local switch from the compiler. normal: name: ${{ matrix.name }} needs: [build, config] @@ -88,11 +89,13 @@ jobs: - id: normal name: normal dependencies: texlive-latex-extra texlive-fonts-recommended texlive-luatex hevea sass gdb lldb + - id: opam + name: opam installation - id: debug name: extra (debug) - id: debug-s4096 name: extra (debug-s4096) - fail-fast: true + fail-fast: false steps: - name: Download Artifact uses: actions/download-artifact@v4 @@ -134,15 +137,19 @@ jobs: - name: Install if: matrix.id == 'normal' run: | - MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh install + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh install + - name: Create opam switch + if: matrix.id == 'opam' + run: | + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh opam - name: Test in prefix - if: matrix.id == 'normal' + if: matrix.id == 'normal' || matrix.id == 'opam' run: | - MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh test-in-prefix + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh test-in-prefix - name: Test in prefix (alternate configuration) if: matrix.id == 'normal' && needs.config.outputs.full-matrix == 'true' run: | - MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh re-test-in-prefix + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh re-test-in-prefix - name: Build the manual if: matrix.id == 'normal' && needs.build.outputs.manual_changed == 'true' run: | @@ -186,6 +193,7 @@ jobs: config_arg: '--with-target-sh=exe --enable-runtime-search-target=fallback', 'test-in-prefix': true}, {name: 'macos-x86_64', os: 'macos-15-intel', + config_arg: '--with-target-sh=exe', 'test-in-prefix': true}, {name: 'macos-arm64', os: 'macos-latest', config_arg: '--with-relative-libdir --enable-runtime-search', @@ -236,7 +244,7 @@ jobs: strategy: matrix: include: ${{ fromJSON(needs.config.outputs.jobs) }} - fail-fast: true + fail-fast: false steps: - name: Checkout uses: actions/checkout@v5 @@ -304,7 +312,7 @@ jobs: - name: configure tree run: | chown -R ocaml:ocaml . - MAKE_ARG=-j CONFIG_ARG='--disable-native-toplevel' su ocaml -c "bash -xe tools/ci/actions/runner.sh configure" + MAKE_ARG=-j CONFIG_ARG='--disable-native-toplevel --with-target-sh=exe' su ocaml -c "bash -xe tools/ci/actions/runner.sh configure" - name: Build run: | MAKE_ARG=-j su ocaml -c "bash -xe tools/ci/actions/runner.sh build" diff --git a/.gitignore b/.gitignore index c2a7936a3961..a86e005a5fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ *.cmx[as] *.cmti *.annot +*.stripped *.exe *.exe.manifest .DS_Store @@ -320,12 +321,12 @@ META /toplevel/native/trace.mli /toplevel/native/topmain.mli -/utils/config_boot.ml -/utils/config_boot.mli +/utils/config/config_boot.ml +/utils/config/config_boot.mli +/utils/config/config_main.ml +/utils/config/config_main.mli /utils/config.common.ml /utils/config.generated.ml -/utils/config_main.ml -/utils/config_main.mli /utils/config.ml /utils/domainstate.ml /utils/domainstate.mli diff --git a/Changes b/Changes index d3cd0665e9d7..f365488539f1 100644 --- a/Changes +++ b/Changes @@ -120,6 +120,35 @@ Working version continuations without calling `caml_continuation_use`. (Max Slater, review by Nick Barnes and Stephen Dolan) +- #14???: The Windows version of the executable launcher (stdlib/header.c) no + longer assumes that paths are limited to 260 (MAX_PATH) characters. + (David Allsopp, review by ???) + +- #14???: Change semantics of the internal caml_attempt_open bytecode runtime + function to require the caller to perform PATH-resolution prior to calling the + function. Affects the resolution of CAML_DEBUG_FILE, which is still searched + in PATH, but at program startup instead of when the debug information is first + loaded. + (David Allsopp, review by ???) + +- #14???: The CAML_DEBUG_FILE environment variable is now ignored if it set to + the empty string. + (David Allsopp, review by ???) + +* #14???: When set to the empty string, OCAMLRUNPARAM, OCAMLLIB and CAMLLIB are + now ignored (i.e. treated as if they were unset). In particular, this means + that a blank value for OCAMLLIB no longer prevents the compiler's default + Standard Library from being used. CAML_LD_LIBRARY_PATH now ignores empty + segments (instead of interpreting them as being ".") which also means that + CAML_LD_LIBRARY_PATH is ignored if it is just set to the empty string. Blank + lines in ld.conf are now likewise ignored. + (David Allsopp, review by ???) + +* #14???: Windows and Unix. When configured with --disable-shared, the runtime + now rejects bytecode executables which require DLLs to be loaded earlier, and + with a less unclear error message. + (David Allsopp, review by ???) + ### Code generation and optimizations: ### Standard library: @@ -178,6 +207,11 @@ Working version (Émile Trotignon, review by Nicolás Ojeda Bär, Jan Midtgaard and Damien Doligez) +- #14???: On Unix, the Filename module now treats TMPDIR='' (i.e. TMPDIR set, + but to an empty string) as TMPDIR=/tmp (as it already does if TMPDIR is not + set at all). + (David Allsopp, review by ???) + ### Type system: - #13781: Set scope of internal type nodes during abbreviation expansion @@ -243,6 +277,24 @@ Working version do so. (Fixes #14323) (Nicolás Ojeda Bär, review by Vincent Laviron) +- #14???: Improved the error message when bytecode Dynlink is asked to load an + archive which needs dynamic loading on a runtime which doesn't support it. + Previously, the full machinery to search for DLLs was invoked, and the error + confusingly included the name of a non-existent DLL. The entire process is now + skipped, with a general message that dynamic loading is needed and is + unavailable. + (David Allsopp, review by ???) + +- #14???: Remove duplicated linker options from unix.cma and unix.cmxa on + Windows. + (David Allsopp, review by ???) + +- #14???: Build and install threads.cmxs. + (David Allsopp, review by ???) + +- #14???: Stop installing "nat" versions of the stubs DLLs. + (David Allsopp, review by ???) + ### Tools: - #14055: Invert BUILD_PATH_PREFIX_MAP in directories loaded at startup @@ -282,6 +334,17 @@ Working version executable (either from the RNTM section or by analysing the shebang lines) (David Allsopp, review by Damien Doligez and Samuel Hym) +- #14???: ocamlyacc treats TMPDIR='' (i.e. set, but to the empty string) as + TMPDIR=/tmp (as it already does if TMPDIR is not set at all). + (David Allsopp, review by ???) + +### Toplevel: + +- #14???: Empty segments in OCAMLTOP_INCLUDE_PATH are no longer interpreted as + adding the current working directory to the search path. The current working + directory can of course still be included in the search path by adding ".". + (David Allsopp, review by ???) + ### Manual and documentation: - #14397: Improve documentation of type-directed disambiguation of array @@ -379,6 +442,10 @@ Working version hard-coded path. (David Allsopp, review by Damien Doligez and Samuel Hym) +- #14???: Extend -set-runtime-default to allow the default values of all runtime + parameters to be overridden for any given binary. + (David Allsopp and Antonin Décimo, review by ???) + - #14190: `ocaml -e` now also processes `-init` (previously it was ignored). (Emile Trotignon, review by David Allsopp and @ygrek) @@ -456,6 +523,12 @@ Working version * #14388: Remove support for `let rec (module M : S) = e1 in e2`. (Alistair O'Brien, review by Vincent Laviron and Gabriel Scherer) +- #14???: `-l` libraries are no longer given to the partial linker (for + `-output-complete-obj`) if `ocamlc`/`ocamlopt` cannot resolve them in the + current search path. Allows system libraries (e.g. `-lzstd`) to be specified + in .cma or .cmxa files without breaking `-output-complete-obj`. + (David Allsopp, review by ???) + ### Build system: - #13810: Support build of cross compilers to native freestanding targets @@ -547,6 +620,11 @@ Working version - #14370: Fix headers for C++ inclusion. (Antonin Décimo, review by Gabriel Scherer) +- #14???: Fix the hand-off of the bytecode image to the runtime from the + executable header. In particular, running `ocamlc.byte` rather than + `ocamlc.byte.exe` now invokes the compiler instead of ocamlrun itself! + (David Allsopp, review by ???) + OCaml 5.4.0 (9 October 2025) ---------------------------- diff --git a/Makefile b/Makefile index 96938fdc317f..6120b0d6835f 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,8 @@ # The main Makefile ROOTDIR = . +SUBDIR_NAME = + # NOTE: it is important that the OCAMLDEP and OCAMLLEX variables # are defined *before* Makefile.common gets included, so that # their local definitions here take precedence over their @@ -52,8 +54,6 @@ PERVASIVES=$(STDLIB_MODULES) outcometree topprinters topdirs toploop LIBFILES=stdlib.cma std_exit.cmo *.cmi $(HEADER_NAME) -COMPLIBDIR=$(LIBDIR)/compiler-libs - TOPINCLUDES=$(addprefix -I otherlibs/,$(filter-out %threads,$(OTHERLIBRARIES))) expunge := expunge$(EXE) @@ -188,7 +188,6 @@ comp_SOURCES = \ file_formats/cmo_format.mli \ file_formats/cmx_format.mli \ file_formats/cmxs_format.mli \ - bytecomp/meta.mli bytecomp/meta.ml \ bytecomp/opcodes.mli bytecomp/opcodes.ml \ bytecomp/bytesections.mli bytecomp/bytesections.ml \ bytecomp/dll.mli bytecomp/dll.ml \ @@ -414,12 +413,12 @@ partialclean:: cd toplevel/byte ; rm -f $(TOPLEVEL_SHARED_ARTEFACTS) cd toplevel/native ; rm -f $(TOPLEVEL_SHARED_ARTEFACTS) -ALL_CONFIG_CMO = utils/config_main.cmo utils/config_boot.cmo +ALL_CONFIG_CMO = utils/config/config_main.cmo utils/config/config_boot.cmo -utils/config_%.mli: utils/config.mli +utils/config/config_%.mli: utils/config.mli | utils/config cp $^ $@ -beforedepend:: utils/config_main.mli utils/config_boot.mli +beforedepend:: utils/config/config_main.mli utils/config/config_boot.mli $(addprefix compilerlibs/ocamlcommon., cma cmxa): \ OC_COMMON_LINKFLAGS += -linkall @@ -476,12 +475,15 @@ partialclean:: # The configuration file utils/config.ml: \ - utils/config_$(if $(filter true,$(IN_COREBOOT_CYCLE)),boot,main).ml + utils/config/config_$(if $(filter true,$(IN_COREBOOT_CYCLE)),boot,main).ml $(V_GEN)cp $< $@ -utils/config_boot.ml: utils/config.fixed.ml utils/config.common.ml +utils/config: + $(MKDIR) $@ +utils/config/config_boot.ml: \ + utils/config.fixed.ml utils/config.common.ml | utils/config $(V_GEN)cat $^ > $@ - -utils/config_main.ml: utils/config.generated.ml utils/config.common.ml +utils/config/config_main.ml: \ + utils/config.generated.ml utils/config.common.ml | utils/config $(V_GEN)cat $^ > $@ ADDITIONAL_CONFIGURE_ARGS ?= @@ -502,13 +504,13 @@ configure: tools/autogen configure.ac aclocal.m4 build-aux/ocaml_version.m4 .PHONY: partialclean partialclean:: rm -f utils/config.ml \ - utils/config_main.ml utils/config_main.mli \ - utils/config_boot.ml utils/config_boot.mli \ + utils/config/config_main.ml utils/config/config_main.mli \ + utils/config/config_boot.ml utils/config/config_boot.mli \ utils/domainstate.ml utils/domainstate.mli .PHONY: beforedepend beforedepend:: \ - utils/config.ml utils/config_boot.ml utils/config_main.ml \ + utils/config.ml utils/config/config_boot.ml utils/config/config_main.ml \ utils/domainstate.ml utils/domainstate.mli ocamllex_PROGRAMS = $(addprefix lex/,ocamllex ocamllex.opt) @@ -634,7 +636,7 @@ USE_STDLIB = -nostdlib -I ../stdlib FLEXDLL_OBJECTS = \ flexdll_$(FLEXDLL_CHAIN).$(O) flexdll_initer_$(FLEXDLL_CHAIN).$(O) FLEXLINK_BUILD_ENV = \ - MSVCC_ROOT= \ + MSVCC_ROOT= MSVCC=$(CC) MSVCC64=$(CC) \ MSVC_DETECT=0 OCAML_CONFIG_FILE=../Makefile.config \ CHAINS=$(FLEXDLL_CHAIN) ROOTDIR=.. ifneq ($(RC),) @@ -712,7 +714,7 @@ boot/ocamlrun$(EXE): # Start up the system from the distribution compiler .PHONY: coldstart -coldstart: boot/ocamlrun$(EXE) runtime/libcamlrun.$(A) +coldstart: boot/ocamlrun$(EXE) stdlib/libcamlrun.$(A) $(MAKE) -C stdlib OCAMLRUN='$$(ROOTDIR)/$<' USE_BOOT_OCAMLC=true all rm -f $(addprefix boot/, libcamlrun.$(A) $(LIBFILES)) cp $(addprefix stdlib/, $(LIBFILES)) boot @@ -737,13 +739,6 @@ CMPCMD ?= $(OCAMLRUN) tools/cmpbyt$(EXE) .PHONY: compare compare: -# The core system has to be rebuilt after bootstrap anyway, so strip ocamlc -# and ocamllex, which means the artefacts should be identical. - mv ocamlc$(EXE) ocamlc.tmp - $(OCAMLRUN) tools/stripdebug$(EXE) -all ocamlc.tmp ocamlc$(EXE) - mv lex/ocamllex$(EXE) ocamllex.tmp - $(OCAMLRUN) tools/stripdebug$(EXE) -all ocamllex.tmp lex/ocamllex$(EXE) - rm -f ocamllex.tmp ocamlc.tmp @if $(CMPCMD) boot/ocamlc ocamlc$(EXE) \ && $(CMPCMD) boot/ocamllex lex/ocamllex$(EXE); \ then echo "Fixpoint reached, bootstrap succeeded."; \ @@ -754,12 +749,10 @@ compare: # Promote a compiler -PROMOTE ?= cp - .PHONY: promote-common promote-common: - $(PROMOTE) ocamlc$(EXE) boot/ocamlc - $(PROMOTE) lex/ocamllex$(EXE) boot/ocamllex + cp ocamlc$(EXE) boot/ocamlc + cp lex/ocamllex$(EXE) boot/ocamllex cd stdlib; cp $(LIBFILES) ../boot # Promote the newly compiled system to the rank of cross compiler @@ -770,7 +763,6 @@ promote-cross: promote-common # Promote the newly compiled system to the rank of bootstrap compiler # (Runs on the new runtime, produces code for the new runtime) .PHONY: promote -promote: PROMOTE = $(OCAMLRUN) tools/stripdebug$(EXE) -all promote: promote-common rm -f boot/ocamlrun$(EXE) cp runtime/ocamlrun$(EXE) boot/ocamlrun$(EXE) @@ -935,8 +927,6 @@ partialclean:: rm -f flexlink.opt flexlink.opt.exe \ $(OPT_BINDIR)/flexlink $(OPT_BINDIR)/flexlink.exe -INSTALL_COMPLIBDIR = $(DESTDIR)$(COMPLIBDIR) -INSTALL_FLEXDLLDIR = $(INSTALL_LIBDIR)/flexdll FLEXDLL_MANIFEST = default$(filter-out _i386,_$(ARCH)).manifest DOC_FILES=\ @@ -960,11 +950,14 @@ clean:: # Build the manual latex files from the etex source files # (see manual/README.md) .PHONY: manual-pregen -manual-pregen: opt.opt - cd manual; $(MAKE) clean && $(MAKE) pregen-etex +manual-pregen: opt.opt | manual + $(MAKE) -C manual clean + $(MAKE) -C manual pregen-etex +ifneq "$(wildcard manual)" "" clean:: $(MAKE) -C manual clean +endif # The clean target clean:: partialclean @@ -984,11 +977,12 @@ ocamlc_SOURCES = driver/main.mli driver/main.ml ocamlc_BYTECODE_LINKFLAGS = -compat-32 -g ifeq "$(IN_COREBOOT_CYCLE)" "true" -ocamlc_BYTECODE_LINKFLAGS += -set-runtime-default standard_library_default=. +ocamlc_BYTECODE_LINKFLAGS += \ +-no-g -without-runtime -set-runtime-default standard_library_default=. endif partialclean:: - rm -f ocamlc ocamlc.exe ocamlc.opt ocamlc.opt.exe + rm -f ocamlc ocamlc.exe ocamlc.opt ocamlc.opt.exe ocamlc*.stripped # The native-code compiler @@ -999,7 +993,7 @@ ocamlopt_SOURCES = driver/optmain.mli driver/optmain.ml ocamlopt_BYTECODE_LINKFLAGS = -g partialclean:: - rm -f ocamlopt ocamlopt.exe ocamlopt.opt ocamlopt.opt.exe + rm -f ocamlopt ocamlopt.exe ocamlopt.opt ocamlopt.opt.exe ocamlopt*.stripped # The toplevel @@ -1264,6 +1258,10 @@ runtime_COMMON_C_SOURCES = \ $(UNIX_OR_WIN32) \ weak +ifeq "$(UNIX_OR_WIN32)" "unix" +runtime_COMMON_C_SOURCES += unix_executable +endif + runtime_BYTECODE_ONLY_C_SOURCES = \ backtrace_byt \ fail_byt \ @@ -1631,11 +1629,11 @@ include $(addprefix $(DEPDIR)/, $(runtime_DEP_FILES)) endif .PHONY: runtime -runtime: stdlib/libcamlrun.$(A) +runtime: stdlib/libcamlrun.$(A) runtime-all .PHONY: makeruntime makeruntime: runtime-all -stdlib/libcamlrun.$(A): runtime-all +stdlib/libcamlrun.$(A): runtime/libcamlrun.$(A) cd stdlib; $(LN) ../runtime/libcamlrun.$(A) . clean:: rm -f $(addprefix runtime/, *.o *.obj *.a *.lib *.so *.dll) @@ -1685,7 +1683,7 @@ library: ocamlc $(MAKE) -C stdlib all .PHONY: library-cross -library-cross: +library-cross: stdlib/libcamlrun.$(A) $(MAKE) -C stdlib OCAMLRUN=../runtime/ocamlrun$(EXE) all .PHONY: libraryopt @@ -1729,7 +1727,8 @@ ocamllex.opt: ocamlopt ocamllex_BYTECODE_LINKFLAGS = -compat-32 ifeq "$(IN_COREBOOT_CYCLE)" "true" -ocamllex_BYTECODE_LINKFLAGS += -set-runtime-default standard_library_default=. +ocamllex_BYTECODE_LINKFLAGS += \ +-no-g -without-runtime -set-runtime-default standard_library_default=. endif partialclean:: @@ -2646,8 +2645,8 @@ endif $(V_OCAMLOPT)$(COMPILE_NATIVE_MODULE) -c $< partialclean:: - for d in utils parsing typing bytecomp asmcomp middle_end file_formats \ - lambda middle_end/closure middle_end/flambda \ + for d in utils utils/config parsing typing bytecomp asmcomp middle_end \ + file_formats lambda middle_end/closure middle_end/flambda \ middle_end/flambda/base_types \ driver toplevel toplevel/byte toplevel/native tools debugger; do \ rm -f $$d/*.cm[ioxt] $$d/*.cmti $$d/*.annot $$d/*.s $$d/*.asm \ @@ -2708,10 +2707,11 @@ $(foreach file, asmcomp/emit.ml $(ARCH_SPECIFIC),\ $(eval $(call MV_FILE,$(file).depend,$(file)))) DEP_DIRS = \ - utils parsing typing bytecomp asmcomp middle_end lambda file_formats \ - middle_end/closure middle_end/flambda middle_end/flambda/base_types driver \ - toplevel toplevel/byte toplevel/native lex tools debugger ocamldoc ocamltest \ - testsuite/lib testsuite/tools otherlibs/dynlink + utils utils/config parsing typing bytecomp asmcomp middle_end lambda \ + file_formats middle_end/closure middle_end/flambda \ + middle_end/flambda/base_types driver toplevel toplevel/byte toplevel/native \ + lex tools debugger ocamldoc ocamltest testsuite/lib testsuite/tools \ + otherlibs/dynlink DEP_FILES = $(addsuffix .depend, $(DEP_DIRS)) @@ -2727,7 +2727,9 @@ distclean: clean ifneq "$(FLEXDLL_SUBMODULE_PRESENT)" "" $(MAKE) -C flexdll distclean MSVC_DETECT=0 endif +ifneq "$(wildcard manual)" "" $(MAKE) -C manual distclean +endif rm -f ocamldoc/META rm -f $(addprefix ocamltest/,ocamltest_config.ml ocamltest_unix.ml) rm -f otherlibs/dynlink/META otherlibs/dynlink/dynlink_config.ml \ @@ -2740,6 +2742,7 @@ endif $(MAKE) -C testsuite distclean rm -f tools/eventlog_metadata tools/*.bak rm -f utils/config.common.ml utils/config.generated.ml + rm -rf utils/config rm -f compilerlibs/META rm -f boot/ocamlrun boot/ocamlrun.exe boot/$(HEADER_NAME) \ boot/flexdll_*.o boot/flexdll_*.obj \ @@ -2749,21 +2752,69 @@ endif $(BYTE_BUILD_TREE) $(OPT_BUILD_TREE) rm -f config.log config.status libtool -INSTALL_LIBDIR_DYNLINK = $(INSTALL_LIBDIR)/dynlink +# COMPILER_ARTEFACT_DIRS adds the common compiler-libs directories as prefixes +# to a sequence of patterns in the first argument, e.g. +# $(call COMPILER_ARTEFACT_DIRS, *.cmi) expands to utils/*.cmi, parsing/*.cmi, +# and so forth. Multiple wildcard patterns may be supplied. An optional second +# argument includes additional directories beyond the common ones (e.g. asmcomp, +# etc.) +COMPILER_ARTEFACT_DIRS = \ + $(foreach dir, \ + utils parsing typing bytecomp file_formats lambda driver toplevel \ + $(if $(filter-out undefined, $(origin 2)), $(2)), \ + $(addprefix $(dir)/, $(1))) +NATIVE_ARTEFACT_DIRS = \ + asmcomp toplevel/native \ + middle_end middle_end/closure middle_end/flambda middle_end/flambda/base_types # Installation +# Historically, the install target dynamically installed what had been built, +# for example, if only world had been built then make install simply didn't +# install the native tools. That infrastructure is potentially convenient when +# working on the compiler, but potentially masks bugs. It is better to have the +# installation targets require everything configure mandated to have built. +# There are three entry points to installation: +# install - installs everything +# installopt - installs the native code compiler _and_ the extra .opt tools +# installoptopt - installs just the extra .opt tools +# The installopt targets have been maintained for now, but may be removed in the +# future. -.PHONY: install -install:: - $(MKDIR) "$(INSTALL_BINDIR)" - $(MKDIR) "$(INSTALL_LIBDIR)" -ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" - $(MKDIR) "$(INSTALL_STUBLIBDIR)" -endif - $(MKDIR) "$(INSTALL_COMPLIBDIR)" - $(MKDIR) "$(INSTALL_DOCDIR)" - $(MKDIR) "$(INSTALL_INCDIR)" - $(MKDIR) "$(INSTALL_LIBDIR_PROFILING)" +ifeq "$(NATIVE_COMPILER)" "true" +install: full-installoptopt + $(call INSTALL_END) +else +install: common-install + $(call INSTALL_END) +endif + +# These three targets are the slightly esoteric special sauce that avoid +# recursive make invocations in the install targets. +# There are three basic install recipes: +# - The old install target is available to common-install, but never recurses to +# installopt +# - The old installopt target is available as both full-installopt and +# native-install +# - The old installoptopt target is also available as full-installoptopt and +# installopt +# These sets of recipes are then welded together by these three dependency +# specifications +# - When configured with --disable-native-compiler, the install target simply +# depends on common-install (see above) +# - Otherwise, install depends on full-installoptopt (see above) +# - The recipe for full-installoptopt installs the .opt versions of the tools, +# but it _depends on_ full-installopt. +# - full-installopt installs the native compiler, but it _depends on_ +# common-install +installopt: native-install + +full-installopt:: common-install + +full-installoptopt: full-installopt + +.PHONY: common-install +common-install:: + $(call INSTALL_BEGIN) ifeq "$(SUFFIXING)" "true" MANGLE_RUNTIME_NAME = $(TARGET)-$(1)-$(BYTECODE_RUNTIME_ID)$(EXE) @@ -2774,341 +2825,264 @@ MANGLE_RUNTIME_DLL_NAME = lib$(1)_shared$(EXT_DLL) endif define INSTALL_RUNTIME -install:: - $(INSTALL_PROG) \ - runtime/$(1)$(EXE) \ - "$(INSTALL_BINDIR)/$(call MANGLE_RUNTIME_NAME,$(1))" -ifeq "$(SUFFIXING)" "true" - cd "$(INSTALL_BINDIR)" && \ - $(LN) "$(TARGET)-$(1)-$(BYTECODE_RUNTIME_ID)$(EXE)" "$(1)$(EXE)" - cd "$(INSTALL_BINDIR)" && \ - $(LN) "$(TARGET)-$(1)-$(BYTECODE_RUNTIME_ID)$(EXE)" \ - "$(1)-$(ZINC_RUNTIME_ID)$(EXE)" -endif +common-install:: + $$(call INSTALL_ITEM, runtime/$(1)$(EXE), bin, , \ + $(call MANGLE_RUNTIME_NAME,$(1)), $(if $(filter true, $(SUFFIXING)), \ + $(1)$(EXE) $(1)-$(ZINC_RUNTIME_ID)$(EXE))) endef define INSTALL_RUNTIME_LIB ifeq "$(2)" "BYTECODE" -install:: +common-install:: else -installopt:: -endif - $(INSTALL_PROG) \ - runtime/lib$(1)_shared$(EXT_DLL) \ - "$(INSTALL_LIBDIR)/$(call MANGLE_RUNTIME_DLL_NAME,$(1),$(2))" -ifeq "$(SUFFIXING)" "true" - cd "$(INSTALL_LIBDIR)" && \ - $(LN) "$(call MANGLE_RUNTIME_DLL_NAME,$(1),$(2))" \ - "lib$(1)_shared$(EXT_DLL)" +full-installopt native-install:: endif + $$(call INSTALL_ITEM, runtime/lib$(1)_shared$(EXT_DLL), libexec, , \ + $(call MANGLE_RUNTIME_DLL_NAME,$(1),$(2)), \ + $(if $(filter true, $(SUFFIXING)), lib$(1)_shared$(EXT_DLL))) endef $(foreach runtime, $(runtime_PROGRAMS), \ $(eval $(call INSTALL_RUNTIME,$(runtime)))) -install:: - $(INSTALL_DATA) runtime/ld.conf $(runtime_BYTECODE_STATIC_LIBRARIES) \ - "$(INSTALL_LIBDIR)" +common-install:: + $(call INSTALL_ITEMS, runtime/ld.conf $(runtime_BYTECODE_STATIC_LIBRARIES), \ + lib) $(foreach shared_runtime, $(runtime_BYTECODE_SHARED_LIBRARIES), \ $(eval $(call INSTALL_RUNTIME_LIB,$(shared_runtime),BYTECODE))) -install:: - $(INSTALL_DATA) runtime/caml/domain_state.tbl runtime/caml/*.h \ - "$(INSTALL_INCDIR)" - $(INSTALL_PROG) ocaml$(EXE) "$(INSTALL_BINDIR)" +common-install:: + $(call INSTALL_ITEMS, \ + runtime/caml/domain_state.tbl runtime/caml/*.h, \ + lib, $(INSTALL_LIBDIR_CAML)) + $(call INSTALL_ITEMS, ocaml$(EXE), bin) ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(call INSTALL_STRIPPED_BYTE_PROG,\ - ocamlc$(EXE),"$(INSTALL_BINDIR)/ocamlc.byte$(EXE)") + $(call STRIP_BYTE_PROG, ocamlc$(EXE)) +ifeq "$(NATIVE_COMPILER)" "true" + $(call INSTALL_ITEM, \ + ocamlc$(EXE).stripped, bin, , ocamlc.byte$(EXE)) +else + $(call INSTALL_ITEM, \ + ocamlc$(EXE).stripped, bin, , ocamlc.byte$(EXE), ocamlc$(EXE)) +endif endif $(MAKE) -C stdlib install + +define INSTALL_ONE_NAT_TOOL +common-install:: +ifeq "$(NATIVE_COMPILER)" "true" ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(INSTALL_PROG) lex/ocamllex$(EXE) \ - "$(INSTALL_BINDIR)/ocamllex.byte$(EXE)" - for i in $(TOOLS_TO_INSTALL_NAT); \ - do \ - $(INSTALL_PROG) "tools/$$i$(EXE)" "$(INSTALL_BINDIR)/$$i.byte$(EXE)";\ - if test -f "tools/$$i".opt$(EXE); then \ - $(INSTALL_PROG) "tools/$$i.opt$(EXE)" "$(INSTALL_BINDIR)" && \ - (cd "$(INSTALL_BINDIR)" && $(LN) "$$i.opt$(EXE)" "$$i$(EXE)"); \ - else \ - (cd "$(INSTALL_BINDIR)" && $(LN) "$$i.byte$(EXE)" "$$i$(EXE)"); \ - fi; \ - done + $$(call INSTALL_ITEM, tools/$(1)$(EXE), bin, , $(1).byte$(EXE)) +endif + $$(call INSTALL_ITEM, tools/$(1).opt$(EXE), bin, , , $(1)$(EXE)) else - for i in $(TOOLS_TO_INSTALL_NAT); \ - do \ - if test -f "tools/$$i".opt$(EXE); then \ - $(INSTALL_PROG) "tools/$$i.opt$(EXE)" "$(INSTALL_BINDIR)"; \ - (cd "$(INSTALL_BINDIR)" && $(LN) "$$i.opt$(EXE)" "$$i$(EXE)"); \ - fi; \ - done + $$(call INSTALL_ITEM, tools/$(1)$(EXE), bin, , $(1).byte$(EXE), $(1)$(EXE)) endif - for i in $(TOOLS_TO_INSTALL_BYT); \ - do \ - $(INSTALL_PROG) "tools/$$i$(EXE)" "$(INSTALL_BINDIR)";\ - done - $(INSTALL_PROG) $(ocamlyacc_PROGRAM)$(EXE) "$(INSTALL_BINDIR)" - $(INSTALL_DATA) \ - utils/*.cmi \ - parsing/*.cmi \ - typing/*.cmi \ - bytecomp/*.cmi \ - file_formats/*.cmi \ - lambda/*.cmi \ - driver/*.cmi \ - toplevel/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - toplevel/byte/*.cmi \ - "$(INSTALL_COMPLIBDIR)" +endef + +ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" +common-install:: +ifeq "$(NATIVE_COMPILER)" "true" + $(call INSTALL_ITEM, \ + lex/ocamllex$(EXE), bin, , ocamllex.byte$(EXE)) +else + $(call INSTALL_ITEM, \ + lex/ocamllex$(EXE), bin, , ocamllex.byte$(EXE), ocamllex$(EXE)) +endif +endif + +$(foreach tool, $(TOOLS_TO_INSTALL_NAT), \ + $(eval $(call INSTALL_ONE_NAT_TOOL,$(tool)))) + +define INSTALL_ONE_BYT_TOOL +common-install:: + $$(call INSTALL_ITEMS, tools/$(1)$(EXE), bin) +endef + +$(foreach tool, $(TOOLS_TO_INSTALL_BYT), \ + $(eval $(call INSTALL_ONE_BYT_TOOL,$(tool)))) + +common-install:: + $(call INSTALL_ITEMS, $(ocamlyacc_PROGRAM)$(EXE), bin) + $(call INSTALL_ITEMS, \ + $(call COMPILER_ARTEFACT_DIRS, *.cmi), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - utils/*.cmt utils/*.cmti utils/*.mli \ - parsing/*.cmt parsing/*.cmti parsing/*.mli \ - typing/*.cmt typing/*.cmti typing/*.mli \ - file_formats/*.cmt file_formats/*.cmti file_formats/*.mli \ - lambda/*.cmt lambda/*.cmti lambda/*.mli \ - bytecomp/*.cmt bytecomp/*.cmti bytecomp/*.mli \ - driver/*.cmt driver/*.cmti driver/*.mli \ - toplevel/*.cmt toplevel/*.cmti toplevel/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - toplevel/byte/*.cmt \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - tools/profiling.cmt tools/profiling.cmti \ - "$(INSTALL_LIBDIR_PROFILING)" -endif - $(INSTALL_DATA) \ - compilerlibs/*.cma compilerlibs/META \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - $(ocamlc_CMO_FILES) $(ocaml_CMO_FILES) \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_PROG) $(expunge) "$(INSTALL_LIBDIR)" + $(call INSTALL_ITEMS, \ + $(call COMPILER_ARTEFACT_DIRS, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, toplevel/byte/*.cmt, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, tools/profiling.cmt tools/profiling.cmti, \ + lib, $(INSTALL_LIBDIR_PROFILING)) +endif + $(call INSTALL_ITEMS, compilerlibs/*.cma compilerlibs/META, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, $(ocamlc_CMO_FILES) $(ocaml_CMO_FILES), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, $(expunge), libexec) # If installing over a previous OCaml version, ensure some modules are removed # from the previous installation. - rm -f "$(INSTALL_LIBDIR)"/topdirs.cm* "$(INSTALL_LIBDIR)/topdirs.mli" - rm -f "$(INSTALL_LIBDIR)"/profiling.cm* "$(INSTALL_LIBDIR)/profiling.$(O)" - $(INSTALL_DATA) \ - tools/profiling.cmi tools/profiling.cmo \ - "$(INSTALL_LIBDIR_PROFILING)" + $(call INSTALL_RM, \ + "$(INSTALL_LIBDIR)"/topdirs.cm* "$(INSTALL_LIBDIR)/topdirs.mli") + $(call INSTALL_RM, \ + "$(INSTALL_LIBDIR)"/profiling.cm* "$(INSTALL_LIBDIR)/profiling.$(O)") + $(call INSTALL_ITEMS, tools/profiling.cmi tools/profiling.cmo, \ + lib, $(INSTALL_LIBDIR_PROFILING)) ifeq "$(UNIX_OR_WIN32)" "unix" # Install manual pages only on Unix $(MAKE) -C man install endif # For dynlink, if installing over a previous OCaml version, ensure # dynlink is removed from the previous installation. - rm -f "$(INSTALL_LIBDIR)"/dynlink.cm* "$(INSTALL_LIBDIR)/dynlink.mli" \ - "$(INSTALL_LIBDIR)/dynlink.$(A)" \ - $(addprefix "$(INSTALL_LIBDIR)/", $(notdir $(dynlink_CMX_FILES))) - $(MKDIR) "$(INSTALL_LIBDIR_DYNLINK)" - $(INSTALL_DATA) \ - otherlibs/dynlink/dynlink.cmi otherlibs/dynlink/dynlink.cma \ - otherlibs/dynlink/META \ - "$(INSTALL_LIBDIR_DYNLINK)" + $(call INSTALL_RM, \ + "$(INSTALL_LIBDIR)"/dynlink.cm* \ + "$(INSTALL_LIBDIR)/dynlink.mli" \ + "$(INSTALL_LIBDIR)/dynlink.$(A)" \ + $(addprefix "$(INSTALL_LIBDIR)/", $(notdir $(dynlink_CMX_FILES)))) + $(call INSTALL_ITEMS, \ + otherlibs/dynlink/dynlink.cmi otherlibs/dynlink/dynlink.cma \ + otherlibs/dynlink/META, \ + lib, $(INSTALL_LIBDIR_DYNLINK)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - otherlibs/dynlink/dynlink.cmti otherlibs/dynlink/dynlink.mli \ - "$(INSTALL_LIBDIR_DYNLINK)" + $(call INSTALL_ITEMS, \ + otherlibs/dynlink/dynlink.cmti otherlibs/dynlink/dynlink.mli, \ + lib, $(INSTALL_LIBDIR_DYNLINK)) endif for i in $(OTHERLIBS); do \ $(MAKE) -C otherlibs/$$i install || exit $$?; \ done ifeq "$(build_ocamldoc)" "true" - $(MKDIR) "$(INSTALL_LIBDIR)/ocamldoc" - $(INSTALL_PROG) $(OCAMLDOC) "$(INSTALL_BINDIR)" - $(INSTALL_DATA) \ - ocamldoc/ocamldoc.hva ocamldoc/*.cmi ocamldoc/odoc_info.cma \ - ocamldoc/META \ - "$(INSTALL_LIBDIR)/ocamldoc" - $(INSTALL_DATA) \ - $(OCAMLDOC_LIBCMIS) \ - "$(INSTALL_LIBDIR)/ocamldoc" + $(call INSTALL_ITEMS, ocamldoc/ocamldoc$(EXE), bin) + $(call INSTALL_ITEMS, \ + ocamldoc/ocamldoc.hva ocamldoc/*.cmi ocamldoc/odoc_info.cma \ + ocamldoc/META, \ + lib, $(INSTALL_LIBDIR_OCAMLDOC)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - $(OCAMLDOC_LIBMLIS) $(OCAMLDOC_LIBCMTS) \ - "$(INSTALL_LIBDIR)/ocamldoc" + $(call INSTALL_ITEMS, $(OCAMLDOC_LIBMLIS) $(OCAMLDOC_LIBCMTS), \ + lib, $(INSTALL_LIBDIR_OCAMLDOC)) endif endif ifeq "$(build_libraries_manpages)" "true" $(MAKE) -C api_docgen install endif - if test -n "$(WITH_DEBUGGER)"; then \ - $(INSTALL_PROG) debugger/ocamldebug$(EXE) "$(INSTALL_BINDIR)"; \ - fi +ifneq "$(WITH_DEBUGGER)" "" + $(call INSTALL_ITEMS, debugger/ocamldebug$(EXE), bin) +endif ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" ifeq "$(TOOLCHAIN)" "msvc" - $(INSTALL_DATA) $(FLEXDLL_SOURCE_DIR)/$(FLEXDLL_MANIFEST) \ - "$(INSTALL_BINDIR)/" + # Technically this should not be installed with "executable" + # permissions, but in practice that request will be ignored. + $(call INSTALL_ITEMS, $(FLEXDLL_SOURCE_DIR)/$(FLEXDLL_MANIFEST), bin) endif ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(INSTALL_PROG) \ - flexlink.byte$(EXE) "$(INSTALL_BINDIR)" -endif # ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(MKDIR) "$(INSTALL_FLEXDLLDIR)" - $(INSTALL_DATA) $(FLEXDLL_OBJECTS) "$(INSTALL_FLEXDLLDIR)" -endif # ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" - $(INSTALL_DATA) Makefile.config "$(INSTALL_LIBDIR)" - $(INSTALL_DATA) $(DOC_FILES) "$(INSTALL_DOCDIR)" -ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - if test -f ocamlopt$(EXE); then $(MAKE) installopt; else \ - cd "$(INSTALL_BINDIR)"; \ - $(LN) ocamlc.byte$(EXE) ocamlc$(EXE); \ - $(LN) ocamllex.byte$(EXE) ocamllex$(EXE); \ - (test -f flexlink.byte$(EXE) && \ - $(LN) flexlink.byte$(EXE) flexlink$(EXE)) || true; \ - fi +ifeq "$(NATIVE_COMPILER)" "true" + $(call INSTALL_ITEMS, flexlink.byte$(EXE), bin) else - if test -f ocamlopt$(EXE); then $(MAKE) installopt; fi + $(call INSTALL_ITEM, flexlink.byte$(EXE), bin, , , flexlink$(EXE)) endif +endif # ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" + $(call INSTALL_ITEMS, $(FLEXDLL_OBJECTS), lib, $(INSTALL_LIBDIR_FLEXDLL)) +endif # ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" + $(call INSTALL_ITEMS, Makefile.config, lib) + $(call INSTALL_ITEMS, $(DOC_FILES), doc) # Installation of the native-code compiler -.PHONY: installopt -installopt:: - $(INSTALL_DATA) $(runtime_NATIVE_STATIC_LIBRARIES) "$(INSTALL_LIBDIR)" +.PHONY: full-installopt native-install +full-installopt native-install:: + $(call INSTALL_ITEMS, $(runtime_NATIVE_STATIC_LIBRARIES), lib) $(foreach shared_runtime, $(runtime_NATIVE_SHARED_LIBRARIES), \ $(eval $(call INSTALL_RUNTIME_LIB,$(shared_runtime),NATIVE))) -installopt:: +full-installopt native-install:: ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(call INSTALL_STRIPPED_BYTE_PROG,\ - ocamlopt$(EXE),"$(INSTALL_BINDIR)/ocamlopt.byte$(EXE)") + $(call STRIP_BYTE_PROG, ocamlopt$(EXE)) + $(call INSTALL_ITEM, ocamlopt$(EXE).stripped, bin, , ocamlopt.byte$(EXE)) endif $(MAKE) -C stdlib installopt - $(INSTALL_DATA) \ - middle_end/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/closure/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/flambda/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/flambda/base_types/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - asmcomp/*.cmi \ - "$(INSTALL_COMPLIBDIR)" + $(call INSTALL_ITEMS, \ + middle_end/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + middle_end/closure/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + middle_end/flambda/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + middle_end/flambda/base_types/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + asmcomp/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - middle_end/*.cmt middle_end/*.cmti \ - middle_end/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/closure/*.cmt middle_end/closure/*.cmti \ - middle_end/closure/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/flambda/*.cmt middle_end/flambda/*.cmti \ - middle_end/flambda/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/flambda/base_types/*.cmt \ - middle_end/flambda/base_types/*.cmti \ - middle_end/flambda/base_types/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - asmcomp/*.cmt asmcomp/*.cmti \ - asmcomp/*.mli \ - "$(INSTALL_COMPLIBDIR)" -endif - $(INSTALL_DATA) \ - $(ocamlopt_CMO_FILES) \ - "$(INSTALL_COMPLIBDIR)" + $(call INSTALL_ITEMS, \ + $(addprefix middle_end/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(addprefix middle_end/closure/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(addprefix middle_end/flambda/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(addprefix middle_end/flambda/base_types/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(addprefix asmcomp/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) +endif + $(call INSTALL_ITEMS, $(ocamlopt_CMO_FILES), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) ifeq "$(build_ocamldoc)" "true" - $(MKDIR) "$(INSTALL_LIBDIR)/ocamldoc" - $(INSTALL_PROG) $(OCAMLDOC_OPT) "$(INSTALL_BINDIR)" - $(INSTALL_DATA) \ - $(OCAMLDOC_LIBCMIS) \ - "$(INSTALL_LIBDIR)/ocamldoc" -ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - $(OCAMLDOC_LIBMLIS) $(OCAMLDOC_LIBCMTS) \ - "$(INSTALL_LIBDIR)/ocamldoc" -endif - $(INSTALL_DATA) \ - ocamldoc/ocamldoc.hva ocamldoc/*.cmx ocamldoc/odoc_info.$(A) \ - ocamldoc/odoc_info.cmxa \ - "$(INSTALL_LIBDIR)/ocamldoc" + $(call INSTALL_ITEMS, ocamldoc/ocamldoc.opt$(EXE), bin) + $(call INSTALL_ITEMS, \ + ocamldoc/*.cmx ocamldoc/odoc_info.$(A) ocamldoc/odoc_info.cmxa, \ + lib, $(INSTALL_LIBDIR_OCAMLDOC)) endif ifeq "$(strip $(NATDYNLINK))" "true" - $(INSTALL_DATA) \ - $(dynlink_CMX_FILES) otherlibs/dynlink/dynlink.cmxa \ - otherlibs/dynlink/dynlink.$(A) \ - "$(INSTALL_LIBDIR_DYNLINK)" + $(call INSTALL_ITEMS, \ + $(dynlink_CMX_FILES) otherlibs/dynlink/dynlink.cmxa \ + otherlibs/dynlink/dynlink.$(A), \ + lib, $(INSTALL_LIBDIR_DYNLINK)) endif for i in $(OTHERLIBS); do \ $(MAKE) -C otherlibs/$$i installopt || exit $$?; \ done -ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - if test -f ocamlopt.opt$(EXE); then $(MAKE) installoptopt; else \ - cd "$(INSTALL_BINDIR)"; \ - $(LN) ocamlc.byte$(EXE) ocamlc$(EXE); \ - $(LN) ocamlopt.byte$(EXE) ocamlopt$(EXE); \ - $(LN) ocamllex.byte$(EXE) ocamllex$(EXE); \ - (test -f flexlink.byte$(EXE) && \ - $(LN) flexlink.byte$(EXE) flexlink$(EXE)) || true; \ - fi -else - if test -f ocamlopt.opt$(EXE); then $(MAKE) installoptopt; fi -endif - $(INSTALL_DATA) \ - tools/profiling.cmx tools/profiling.$(O) \ - "$(INSTALL_LIBDIR_PROFILING)" - -.PHONY: installoptopt -installoptopt: - $(INSTALL_PROG) ocamlc.opt$(EXE) "$(INSTALL_BINDIR)" - $(INSTALL_PROG) ocamlopt.opt$(EXE) "$(INSTALL_BINDIR)" - $(INSTALL_PROG) lex/ocamllex.opt$(EXE) "$(INSTALL_BINDIR)" - cd "$(INSTALL_BINDIR)"; \ - $(LN) ocamlc.opt$(EXE) ocamlc$(EXE); \ - $(LN) ocamlopt.opt$(EXE) ocamlopt$(EXE); \ - $(LN) ocamllex.opt$(EXE) ocamllex$(EXE) + $(call INSTALL_ITEMS, tools/profiling.cmx tools/profiling.$(O), \ + lib, $(INSTALL_LIBDIR_PROFILING)) + +.PHONY: full-installoptopt installopt installoptopt +full-installoptopt installopt installoptopt: + $(call INSTALL_ITEM, ocamlc.opt$(EXE), bin, , , ocamlc$(EXE)) + $(call INSTALL_ITEM, ocamlopt.opt$(EXE), bin, , , ocamlopt$(EXE)) + $(call INSTALL_ITEM, lex/ocamllex.opt$(EXE), bin, , , ocamllex$(EXE)) ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" - $(INSTALL_PROG) flexlink.opt$(EXE) "$(INSTALL_BINDIR)" - cd "$(INSTALL_BINDIR)"; \ - $(LN) flexlink.opt$(EXE) flexlink$(EXE) -endif - $(INSTALL_DATA) \ - utils/*.cmx parsing/*.cmx typing/*.cmx bytecomp/*.cmx \ - toplevel/*.cmx toplevel/native/*.cmx \ - toplevel/native/tophooks.cmi \ - file_formats/*.cmx \ - lambda/*.cmx \ - driver/*.cmx asmcomp/*.cmx middle_end/*.cmx \ - middle_end/closure/*.cmx \ - middle_end/flambda/*.cmx \ - middle_end/flambda/base_types/*.cmx \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - compilerlibs/*.cmxa compilerlibs/*.$(A) \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - $(ocamlc_CMX_FILES) $(ocamlc_CMX_FILES:.cmx=.$(O)) \ - $(ocamlopt_CMX_FILES) $(ocamlopt_CMX_FILES:.cmx=.$(O)) \ - $(ocamlnat_CMX_FILES:.cmx=.$(O)) \ - "$(INSTALL_COMPLIBDIR)" + $(call INSTALL_ITEM, flexlink.opt$(EXE), bin, , , flexlink$(EXE)) +endif + $(call INSTALL_ITEMS, \ + $(call COMPILER_ARTEFACT_DIRS, *.cmx, $(NATIVE_ARTEFACT_DIRS)) \ + toplevel/native/tophooks.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, compilerlibs/*.cmxa compilerlibs/*.$(A), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(ocamlc_CMX_FILES:.cmx=.$(O)) \ + $(ocamlopt_CMX_FILES:.cmx=.$(O)) \ + $(ocamlnat_CMX_FILES:.cmx=.$(O)), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) ifeq "$(INSTALL_OCAMLNAT)" "true" - $(INSTALL_PROG) ocamlnat$(EXE) "$(INSTALL_BINDIR)" + $(call INSTALL_ITEMS, ocamlnat$(EXE), bin) endif # Installation of the *.ml sources of compiler-libs .PHONY: install-compiler-sources install-compiler-sources: ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - utils/*.ml parsing/*.ml typing/*.ml bytecomp/*.ml driver/*.ml \ - file_formats/*.ml \ - lambda/*.ml \ - toplevel/*.ml toplevel/byte/*.ml \ - middle_end/*.ml middle_end/closure/*.ml \ - middle_end/flambda/*.ml middle_end/flambda/base_types/*.ml \ - asmcomp/*.ml \ - asmcmp/debug/*.ml \ - "$(INSTALL_COMPLIBDIR)" + $(call INSTALL_ITEMS, \ + $(call COMPILER_ARTEFACT_DIRS, *.ml, $(NATIVE_ARTEFACT_DIRS)) \ + toplevel/byte/*.ml, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) endif include .depend diff --git a/Makefile.common b/Makefile.common index 59b7f5b62ae5..ae9a1cb0d762 100644 --- a/Makefile.common +++ b/Makefile.common @@ -86,15 +86,351 @@ V_ODOC = endif DESTDIR ?= -INSTALL_BINDIR := $(DESTDIR)$(BINDIR) -INSTALL_LIBDIR := $(DESTDIR)$(LIBDIR) -INSTALL_INCDIR=$(INSTALL_LIBDIR)/caml -INSTALL_STUBLIBDIR := $(DESTDIR)$(STUBLIBDIR) -INSTALL_LIBDIR_PROFILING = $(INSTALL_LIBDIR)/profiling -INSTALL_MANDIR := $(DESTDIR)$(MANDIR) -INSTALL_PROGRAMS_MAN_DIR := $(DESTDIR)$(PROGRAMS_MAN_DIR) -INSTALL_LIBRARIES_MAN_DIR := $(DESTDIR)$(LIBRARIES_MAN_DIR) -INSTALL_DOCDIR := $(DESTDIR)$(DOCDIR) + +# Augment directories from Makefile.config / Makefile.build_config with +# $(DESTDIR). i.e. each of these 5 directories may be overridden by the user, +# and the compiler distribution makes no assumptions about where they are +# relative to each other. +INSTALL_BINDIR = $(DESTDIR)$(BINDIR) +INSTALL_DOCDIR = $(DESTDIR)$(DOCDIR) +INSTALL_LIBDIR = $(DESTDIR)$(LIBDIR) +INSTALL_MANDIR = $(DESTDIR)$(MANDIR) +INSTALL_STUBLIBDIR = $(DESTDIR)$(STUBLIBDIR) + +# Library subdirectories. The compiler distribution does make assumptions about +# these, and they cannot be freely overridden by the user. +INSTALL_LIBDIR_CAML = caml +INSTALL_LIBDIR_COMPILERLIBS = compiler-libs +INSTALL_LIBDIR_DYNLINK = dynlink +INSTALL_LIBDIR_FLEXDLL = flexdll +INSTALL_LIBDIR_OCAMLDOC = ocamldoc +INSTALL_LIBDIR_PROFILING = profiling +INSTALL_LIBDIR_STDLIB = stdlib +INSTALL_LIBDIR_SYSTHREADS = threads + +INSTALL_MANDIR_PROGRAMS = man1 +INSTALL_MANDIR_LIBRARIES = man3 + +INSTALL_MODE ?= install + +# The scripts and commands generated by this installation system allow the user +# to be installing OCaml to any kind of tortuously difficult path they choose, +# but it is written assuming that the directory and file names which the +# distribution is in control of will follow some more restrictive rules, for +# simplicity. +# Paths in the installation system should always use forward slashes (these will +# be automatically translated to backslashes on Windows where required). Both +# the single and double quote characters are prohibited in all names and paths +# (this vastly simplifies the quoting assumptions between Unix/Windows). The @ +# symbol is not permitted in directory names because it used internally in +# filename mangling to represent forward slashes; it is permitted in filenames. +# Principally owing to escaping limitations of GNU make, it is not possible to +# use spaces in either source or target file names or in subdirectory names. +# tools/opam/generate.ml contains some sanity checking on the paths and names +# generated by these macros - make INSTALL_MODE=clone install is a good +# confidence check that all rules have been adhered to. + +# INSTALL_ITEM installs a single file, possibly with a different name and +# possibly creating additional symlinks/copies +# $1 = source file (may include directories) +# $2 = section (bin, doc, lib, libexec, man, stublibs) +# $3 = directory within section (may be empty) +# $4 = target basename (may be empty) +# $5 = additional basenames (either symlinked or copied, depending on what the +# platform supports) +# The $(origin n) dance is necessary to suppress warnings about undefined +# variables. +INSTALL_ITEM = \ + $(INSTALL_$(INSTALL_MODE)_PREFIX)$(call INSTALL_ENSURE_DIR,$\ + $(strip $(2)),$(if $(filter-out undefined,$(origin 3)),$(strip $(3))))$\ + $(call INSTALL_DESPATCH_$(INSTALL_MODE)_ITEM,$\ + $(strip $(1)),$\ + $(strip $(2)),$\ + $(if $(filter-out undefined,$(origin 3)),$(strip $(3))),$\ + $(if $(filter-out undefined,$(origin 4)),$(strip $(4))),$\ + $(if $(filter-out undefined,$(origin 5)),$(strip $(5)))) + +# INSTALL_ITEMS installs a series of files to a single directory +# $1 = source file(s) (may include directories and glob patterns) +# $2 = section (as for INSTALL_ITEM) +# $3 = directory within section (may be omitted) +# INSTALL_ITEMS is sometimes an alias for INSTALL_ITEM. For simplicity with +# undefined variable warnings, INSTALL_DESPATCH_foo_ITEMS is passed 5 parameters +# but $4 and $5 are always empty. +INSTALL_ITEMS = \ + $(INSTALL_$(INSTALL_MODE)_PREFIX)$(call INSTALL_ENSURE_DIR,$\ + $(strip $(2)),$(if $(filter-out undefined,$(origin 3)),$(strip $(3))))$\ + $(call INSTALL_DESPATCH_$(INSTALL_MODE)_ITEMS,$\ + $(strip $(1)),$\ + $(strip $(2)),$\ + $(if $(filter-out undefined,$(origin 3)),$(strip $(3))),,) + +# INSTALL_ITEMS_OPT is INSTALL_ITEMS, but does nothing if the source file(s) do +# not exist +INSTALL_ITEMS_OPT = \ + $(if $(wildcard $(1)),$(call INSTALL_ITEMS, \ + $(1), $(2), $(if $(filter-out undefined,$(origin 3)), $(3)))) + +INSTALL_ENSURE_DIR = \ + $(if $(filter undefined,$(origin DIR_CREATED_$(subst exec,,$(1))_$(2))),$\ + $(eval DIR_CREATED_$(subst exec,,$(1))_$(2):=)$\ + $(call INSTALL_DESPATCH_$(INSTALL_MODE)_MKDIR,$\ + $(subst exec,,$(1)),$(2))) + +# INSTALL_RM takes a single argument which may include glob patterns of files to +# be removed when performing a physical install. +INSTALL_RM = $(call INSTALL_DESPATCH_$(INSTALL_MODE)_RM,$(strip $(1))) + +# INSTALL_BEGIN and INSTALL_END are used in the root Makefile's install target +INSTALL_BEGIN = $(INSTALL_DESPATCH_$(INSTALL_MODE)_BEGIN) +INSTALL_END = $(INSTALL_DESPATCH_$(INSTALL_MODE)_END) + +# Normal installation +INSTALL_CMD_bin = $(INSTALL_PROG) +INSTALL_CMD_doc = $(INSTALL_DATA) +INSTALL_CMD_lib = $(INSTALL_DATA) +INSTALL_CMD_libexec = $(INSTALL_PROG) +INSTALL_CMD_man = $(INSTALL_DATA) +INSTALL_CMD_stublibs = $(INSTALL_PROG) + +INSTALL_SECTION_bin = $(INSTALL_BINDIR) +INSTALL_SECTION_doc = $(INSTALL_DOCDIR) +INSTALL_SECTION_lib = $(INSTALL_LIBDIR) +INSTALL_SECTION_libexec = $(INSTALL_LIBDIR) +INSTALL_SECTION_man = $(INSTALL_MANDIR) +INSTALL_SECTION_stublibs = $(INSTALL_STUBLIBDIR) + +QUOTE_SINGLE = '$(subst ','\'',$(1))' + +define NEWLINE + + +endef +SH_AND = && \$(NEWLINE) + +INSTALL_install_PREFIX = + +INSTALL_DESPATCH_install_RM = rm -f $(1) + +INSTALL_DESPATCH_install_MKDIR = \ + $(MKDIR) $(call QUOTE_SINGLE,$(INSTALL_SECTION_$(1))$(addprefix /,$(2))) \ + $(SH_AND) + +MK_LINK = \ + (cd "$(INSTALL_SECTION_$(2))$(addprefix /,$(3))" && \ + $(LN) $(call QUOTE_SINGLE,$(1)) $(call QUOTE_SINGLE,$(4))) + +INSTALL_DESPATCH_install_ITEM = \ + $(INSTALL_CMD_$(2)) $(1) \ + $(call QUOTE_SINGLE,$\ + $(INSTALL_SECTION_$(2))$(addprefix /,$(3))$(addprefix /,$(4))) \ + $(foreach link, $(5),$(SH_AND)$\ + $(call MK_LINK,$(if $(4),$(4),$(notdir $(1))),$(2),$(3),$(link))) + +INSTALL_DESPATCH_install_ITEMS = $(INSTALL_DESPATCH_install_ITEM) + +INSTALL_DESPATCH_install_BEGIN = @ +INSTALL_DESPATCH_install_END = @ + +INSTALL_display_PREFIX = @ + +INSTALL_DESPATCH_display_RM = @ + +INSTALL_DESPATCH_display_MKDIR = \ + echo $(call QUOTE_SINGLE,$\ + -> MKDIR $(INSTALL_SECTION_$(1))$(addprefix /,$(2))) $(SH_AND) + +MKLINK_display = \ + echo $(call QUOTE_SINGLE,-> LN \ + $(abspath $(INSTALL_SECTION_$(2))$(addprefix /,$(3))/$(1)) -> \ + $(if $(4),$(4),$(notdir $(1)))) + +INSTALL_DESPATCH_display_ITEM = \ + echo $(call QUOTE_SINGLE,-> INSTALL $(1) \ + $(INSTALL_SECTION_$(2))$(addprefix /,$(3))$(addprefix /,$(4))) \ + $(foreach link, $(5), && \ + $(call MKLINK_display,$(if $(4),$(4),$(notdir $(1))),$(2),$(3),$(link))) + +INSTALL_DESPATCH_display_ITEMS = $(INSTALL_DESPATCH_display_ITEM) + +INSTALL_DESPATCH_display_BEGIN = @ +INSTALL_DESPATCH_display_END = @ + +INSTALL_list_PREFIX = @ + +INSTALL_DESPATCH_list_RM = @ + +INSTALL_DESPATCH_list_MKDIR = + +MKLINK_list = \ + echo $(call QUOTE_SINGLE,-> \ + $(abspath $(INSTALL_SECTION_$(2))$(addprefix /,$(3))/$\ + $(if $(4),$(4),$(notdir $(1))))) + +INSTALL_DESPATCH_list_ITEM = \ + echo $(call QUOTE_SINGLE,-> \ + $(abspath $(INSTALL_SECTION_$(2))$(addprefix /,$(3))/$\ + $(if $(4),$(4),$(notdir $(1))))) \ + $(foreach link, $(5), && \ + $(call MKLINK_list,$(if $(4),$(4),$(notdir $(1))),$(2),$(3),$(link))) + +INSTALL_DESPATCH_list_ITEMS = \ + $(foreach file, $(wildcard $(1)), \ + echo $(call QUOTE_SINGLE,-> \ + $(INSTALL_SECTION_$(2))$(addprefix /,$(3))/$(notdir $(file)));) \ + true + +INSTALL_DESPATCH_list_BEGIN = @ +INSTALL_DESPATCH_list_END = @ + +OPAM_PACKAGE_NAME ?= ocaml-compiler + +# Generate $(OPAM_PACKAGE_NAME).install and $(OPAM_PACKAGE_NAME)-fixup.sh +# (INSTALL_MODE=opam) +# opam's .install format isn't quite rich enough at present to express the +# installation of the compiler. In particular, we can't install the doc files to +# doc/ocaml using a .install and we can't create symlinks. The things which +# can't be handled by the .install file are dealt with by the fixup script +# instead. + +INVOKE = $(strip $(1)) $(call QUOTE_SINGLE,$(strip $(2))) +ADD_LINE = $(call INVOKE, echo, $(2)) >> $(1) + +# RECORD_SYMLINK_TO_INSTALL +# $1 = file to install, implicitly relative to $(ROOTDIR) +# $2 = section +# $3 = subdirectory within $2 (may be empty) +# $4 = name to install $1 (must be specified) +# $5 = single name of symlink +# If symlinks are supported, $1 is ignored and the three pieces of information +# are recorded in create-symlinks: the directory, implicitly relative to the +# prefix, in which the symlink is to be created, the source file and name of the +# symlink. +# These can then be munged to a cd + ln combination in the fixup script. +# If symlinks are not supported, $1 is instead used to create an additional copy +# of the file, using the .install file. +ifeq "$(firstword $(LN))" "ln" +RECORD_SYMLINK_TO_INSTALL = \ + $(call ADD_LINE, $(ROOTDIR)/create-symlinks, \ + $(patsubst lib%,lib,$(2))$(addprefix /,$(3)) $(4) $(5)) +else +# Symlinks aren't available, so copy the file again using the target name +RECORD_SYMLINK_TO_INSTALL = \ + $(call RECORD_$(INSTALL_MODE)_ITEM_TO_INSTALL,$(1),$(2),$(3),$(5)) +endif + +# Process the arguments to pass to RECORD_$(INSTALL_MODE)_ITEM_TO_INSTALL: +# - Items installed to the stublibs section need to be remapped to the stublibs +# subdirectory of libexec (since we install to lib/ocaml/stublibs rather than +# opam's default lib/stublibs) +# - Source files must be given implicitly relative to $(ROOTDIR), so prefix with +# $(SUBDIR_NAME) if necessary +# - Items installed to the lib/libexec sections will in fact be installed to +# lib_root/libexec_root, so remap the installation directory to ocaml (i.e. to +# install to lib/ocaml rather than lib) +# - If no target basename has been explictly given, use the source's basename +RECORD_ITEM_TO_INSTALL = \ + $(if $(filter stublibs,$(2)),\ + $(call RECORD_ITEM_TO_INSTALL,$\ + $(1),libexec,stublibs$(addprefix /,$(3)),$(4),$(5)),\ + $(call RECORD_$(INSTALL_MODE)_ITEM_TO_INSTALL,$\ + $(addsuffix /,$(SUBDIR_NAME))$(1),$\ + $(2),$\ + $(if $(filter doc lib%,$(2)),ocaml$(addprefix /,$(3)),$(3)),$\ + $(if $(4),$(4),$(notdir $(1))),$\ + $(5))) + +# All files must be explicitly installed, so evaluate the wildcards and call +# INSTALL_DESPATCH_opam_ITEM for each file. +INSTALL_EVALUATE_GLOBS = \ + $(foreach file, $(wildcard $(1)), \ + $(call INSTALL_DESPATCH_$(INSTALL_MODE)_ITEM,$(file),$(2),$(3));) \ + true + +# RECORD_FILE_TO_INSTALL +# $1 = file to install, implicitly relative to $(ROOTDIR) +# $2 = bin/lib/libexec/man +# $3 = subdirectory within $2 (may be empty, but otherwise must end with "/") +# $4 = name to install $1 (must be specified) +# Writes an opam .install line to the section file for $(2). Each line consists +# of a double-quoted implicit filename relative to $(ROOTDIR) and optionally a +# second double-quoted implicit filename relative to the $(2) for the name to +# install the file under. +# e.g. "lex/ocamllex" {"ocamllex.byte"} or "expunge" {"ocaml/expunge"} +RECORD_FILE_TO_INSTALL = \ + $(call ADD_LINE, $(ROOTDIR)/opam-$(2), \ + "$(1)" $(if $(3)$(filter-out $(notdir $(1)),$(4)), {"$(3)$(4)"})) + +# RECORD_FILE_TO_CLONE +# $1 = file to install, implicitly relative to $(ROOTDIR) +# $2 = subdirectory (may be empty, but otherwise must end with "/") +# $3 = name to install $1 (must be specified) +# The compiler is installed as the ocaml package in opam, but the actual files +# are installed from other packages (typically ocaml-compiler). For the lib +# directory, the lib_root and libexec_root sections allow files to be installed +# to lib/ocaml, but there's no equivalent mechanism for the doc directory. These +# files are recorded to be copied manually in the fixup script. +RECORD_FILE_TO_CLONE = \ + $(call ADD_LINE, $(ROOTDIR)/clone-$(subst /,@,$(2)), $(1) $(3)) + +# RECORD_opam_ITEM_TO_INSTALL despatches the processed arguments of +# INSTALL_DESPATCH_opam_ITEM to the appropriate RECORD_ macro. +RECORD_opam_ITEM_TO_INSTALL = \ + $(if $(filter doc,$(2)),\ + $(call RECORD_FILE_TO_CLONE,$(1),doc/$(3),$(4)), \ + $(call RECORD_FILE_TO_INSTALL,$(1),$(2),$(addsuffix /,$(3)),$(4))) \ + $(foreach link, $(5), && \ + $(call RECORD_SYMLINK_TO_INSTALL,$(1),$(2),$(3),$(4),$(link))) + +INSTALL_DESPATCH_opam_ITEM = $(RECORD_ITEM_TO_INSTALL) + +INSTALL_DESPATCH_opam_ITEMS = $(INSTALL_EVALUATE_GLOBS) + +INSTALL_opam_PREFIX = @ + +INSTALL_DESPATCH_opam_RM = @ + +# INSTALL_MKDIR is ignored (opam creates them when executing the .install file) +INSTALL_DESPATCH_opam_MKDIR = + +INSTALL_DESPATCH_opam_BEGIN = \ + rm -f opam-bin clone-* opam-lib opam-libexec opam-man create-symlinks + +# Munge opam-bin, opam-lib, opam-libexec and opam-man into a .install file and +# then munge clone-* and create-symlinks into the fixup script. +INSTALL_DESPATCH_opam_END = \ + $(OCAMLRUN) ./ocaml$(EXE) $(STDLIBFLAGS) \ + tools/opam/generate.ml $(INSTALL_MODE) $(OPAM_PACKAGE_NAME) '$(LN)' + +# Generate $(OPAM_PACKAGE_NAME)-clone.sh (INSTALL_MODE=clone) + +# ld.conf is explicitly copied, rather than cloned, to allow (in principle, if +# not in practice) the cloning installation to edit it. +RECORD_clone_ITEM_TO_INSTALL = \ + $(if $(filter runtime/ld.conf Makefile.config, $(1)), true, \ + $(if $(filter libexec,$(2)), \ + $(call RECORD_clone_ITEM_TO_INSTALL,$(1),lib,$(3),$(4),$(5)), \ + $(call ADD_LINE, \ + $(ROOTDIR)/clone-$(2)$(addprefix @,$(subst /,@,$(3))), \ + $(2)$(addprefix /,$(3))/$(if $(4),$(4),$(notdir $(1)))) \ + $(foreach link, $(5), && \ + $(call RECORD_SYMLINK_TO_INSTALL,$(1),$(2),$(3),$(4),$(link))))) + +INSTALL_DESPATCH_clone_ITEM = $(RECORD_ITEM_TO_INSTALL) + +INSTALL_DESPATCH_clone_ITEMS = $(INSTALL_EVALUATE_GLOBS) + +INSTALL_clone_PREFIX = @ + +INSTALL_DESPATCH_clone_RM = @ + +# INSTALL_MKDIR is ignored - INSTALL_DESPATCH_clone_END automatically creates +# directories for each cp file. +INSTALL_DESPATCH_clone_MKDIR = + +INSTALL_DESPATCH_clone_BEGIN = rm -f clone-* create-symlinks + +INSTALL_DESPATCH_clone_END = $(INSTALL_DESPATCH_opam_END) FLEXDLL_SUBMODULE_PRESENT := $(wildcard $(ROOTDIR)/flexdll/Makefile) @@ -510,12 +846,11 @@ $(eval $(call _OCAML_BYTECODE_LIBRARY,$(1))) $(eval $(call _OCAML_NATIVE_LIBRARY,$(1))) endef # OCAML_LIBRARY -# Installing a bytecode executable, with debug information removed -define INSTALL_STRIPPED_BYTE_PROG -$(OCAMLRUN) $(ROOTDIR)/tools/stripdebug$(EXE) $(1) $(1).tmp \ -&& $(INSTALL_PROG) $(1).tmp $(2) \ -&& rm $(1).tmp -endef # INSTALL_STRIPPED_BYTE_PROG +# Strip debug information from a bytecode executable +define STRIP_BYTE_PROG +$(OCAMLRUN) $(ROOTDIR)/tools/stripdebug$(EXE) \ + $(strip $(1)) $(strip $(1)).stripped +endef # STRIP_BYTE_PROG # ocamlc has several mechanisms for linking a bytecode image to the runtime # which executes it. The exact mechanism depends on the platform and the precise diff --git a/Makefile.cross b/Makefile.cross index 71a15b0bd2ef..dff7b38021fa 100644 --- a/Makefile.cross +++ b/Makefile.cross @@ -69,10 +69,10 @@ CROSSCOMPILERLIBS := $(addprefix compilerlibs/,$(addsuffix .cmxa,\ .PHONY: crossopt ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" crossopt: cross-flexdll - $(MAKE) runtime-all $(OLDS) + $(MAKE) runtime $(OLDS) else # In that case, $(OLDS) is empty, we can depend directly on runtime-all -crossopt: runtime-all +crossopt: runtime endif $(MAKE) ocamlc $(TOOLS_BYTECODE_TARGETS) expunge$(EXE) \ $(CROSS_COMPILER_OVERRIDES) $(OLDS) diff --git a/api_docgen/Makefile b/api_docgen/Makefile index 07254645812a..4ad898ea6bfd 100644 --- a/api_docgen/Makefile +++ b/api_docgen/Makefile @@ -14,6 +14,7 @@ #************************************************************************** # Used by included Makefiles ROOTDIR = .. +SUBDIR_NAME = api_docgen -include ../Makefile.build_config odoc-%: diff --git a/api_docgen/ocamldoc/Makefile b/api_docgen/ocamldoc/Makefile index 058f88c26d95..f173b7690001 100644 --- a/api_docgen/ocamldoc/Makefile +++ b/api_docgen/ocamldoc/Makefile @@ -14,6 +14,7 @@ #************************************************************************** # Used by included Makefiles ROOTDIR = ../.. +SUBDIR_NAME = api_docgen/ocamldoc include ../Makefile.common vpath %.mli ../../stdlib $(DOC_COMPILERLIBS_DIRS) $(DOC_STDLIB_DIRS) @@ -121,7 +122,4 @@ build/latex/compilerlibs_input.tex: | build/latex .PHONY: install install: - $(MKDIR) "$(INSTALL_LIBRARIES_MAN_DIR)" - if test -d build/man; then \ - $(INSTALL_DATA) build/man/*.3o "$(INSTALL_LIBRARIES_MAN_DIR)"; \ - fi + $(call INSTALL_ITEMS_OPT, build/man/*.3o, man, $(INSTALL_MANDIR_LIBRARIES)) diff --git a/api_docgen/odoc/Makefile b/api_docgen/odoc/Makefile index c40ed778c41e..8f2c0195571a 100644 --- a/api_docgen/odoc/Makefile +++ b/api_docgen/odoc/Makefile @@ -15,6 +15,7 @@ # Used by included Makefiles ROOTDIR = ../.. +SUBDIR_NAME = api_docgen/odoc include ../Makefile.common @@ -191,13 +192,10 @@ $(ALL_PAGED_DOC:%=build/%.3o.stamp):build/%.3o.stamp:build/%.odocl | build/ # Man pages are the only installed documentation .PHONY: install install: - $(MKDIR) "$(INSTALL_LIBRARIES_MAN_DIR)" - if test -d build/man/libref ; then \ - $(INSTALL_DATA) build/man/libref/* "$(INSTALL_LIBRARIES_MAN_DIR)"; \ - fi - if test -d build/man/compilerlibref ; then \ - $(INSTALL_DATA) build/man/libref/* "$(INSTALL_LIBRARIES_MAN_DIR)"; \ - fi + $(call INSTALL_ITEMS_OPT, \ + build/man/libref/*, man, $(INSTALL_MANDIR_LIBRARIES)) + $(call INSTALL_ITEMS_OPT, \ + build/man/compilerlibref/*, man, $(INSTALL_MANDIR_LIBRARIES)) # Dependencies for stdlib modules. # Use the same dependencies used for compiling .cmx files. diff --git a/appveyor.yml b/appveyor.yml index bc4cdef38dfb..bfef4674d4af 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -17,6 +17,13 @@ branches: - /4\.*/ - /5\.*/ - trunk + - relocatable-base-trunk + - relocatable-testing + - installation-tests + - installation-tests-unified-header + +# Do not build feature branch with open Pull Requests +skip_branch_with_pr: true # Compile the 64 bits version platform: @@ -53,7 +60,7 @@ environment: # "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 matrix: - fast_finish: true + fast_finish: false cache: - C:\cygwin64\var\cache\setup diff --git a/asmcomp/asmlink.ml b/asmcomp/asmlink.ml index 19d1f045898b..cb87df072ef3 100644 --- a/asmcomp/asmlink.ml +++ b/asmcomp/asmlink.ml @@ -234,6 +234,9 @@ let make_startup_file ~ppf_dump units_list ~crc_interfaces = Array.iteri (fun i name -> compile_phrase (Cmm_helpers.predef_exception i name)) Runtimedef.builtin_exceptions; + compile_phrase (Cmm_helpers.emit_global_char_os_constant + "caml_executable_ocamlrunparam" + (Compenv.overridden_runtime_parameters ())); if need_stdlib then begin let standard_library_default = Option.value ~default:Config.standard_library_default diff --git a/asmcomp/cmm_helpers.ml b/asmcomp/cmm_helpers.ml index abb21f13d965..1b1b9f0cb3ce 100644 --- a/asmcomp/cmm_helpers.ml +++ b/asmcomp/cmm_helpers.ml @@ -2663,6 +2663,37 @@ let predef_exception i name = let emit_global_string_constant name value = Cdata (emit_string_constant (name, Global) value []) +module String = Misc.Stdlib.String + +let os_bytes_of_string = + if Config.target_win32 then + fun s -> + let b = Buffer.create (String.length s * 2) in + Seq.iter (Buffer.add_utf_16le_uchar b) (String.to_utf_8_seq s); + Buffer.add_utf_16le_uchar b (Uchar.of_int 0); + Buffer.contents b + else + fun s -> s ^ "\000" + +let emit_global_char_os_constant name value = + let data = + Cglobal_symbol name + :: Cdefine_symbol name + :: match value with + | Some s -> + if String.is_valid_utf_8 s then + let value = os_bytes_of_string s in + let value_sym = Compilenv.new_const_symbol () in + [Csymbol_address value_sym; + Cdefine_symbol value_sym; + Cstring value] + else + invalid_arg "Cmm_helpers.emit_global_string_constant" + | None -> + [cint_zero] + in + Cdata data + (* Header for a plugin *) let plugin_header units = diff --git a/asmcomp/cmm_helpers.mli b/asmcomp/cmm_helpers.mli index f874b37dd7e2..d6a255cce906 100644 --- a/asmcomp/cmm_helpers.mli +++ b/asmcomp/cmm_helpers.mli @@ -628,6 +628,10 @@ val code_segment_table: string list -> phrase (** Generate data for a predefined exception *) val predef_exception: int -> string -> phrase +(** Generate data for a global NUL-terminated string represented using [char_os] + (i.e. [wchar_t] on Windows; [char] on Unix). Must be valid UTF-8. *) +val emit_global_char_os_constant: string -> string option -> phrase + (** Generate data for a global string constant *) val emit_global_string_constant: string -> string -> phrase diff --git a/boot/ocamlc b/boot/ocamlc index 1b15fe7be386..b2db1a407775 100755 Binary files a/boot/ocamlc and b/boot/ocamlc differ diff --git a/boot/ocamllex b/boot/ocamllex index 54a92395a909..282448a23b18 100755 Binary files a/boot/ocamllex and b/boot/ocamllex differ diff --git a/bytecomp/bytegen.ml b/bytecomp/bytegen.ml index 4b0b82046601..bba38494db4b 100644 --- a/bytecomp/bytegen.ml +++ b/bytecomp/bytegen.ml @@ -440,7 +440,8 @@ let comp_primitive stack_info p sz args = | Ostype_win32 -> "ostype_win32" | Ostype_cygwin -> "ostype_cygwin" | Backend_type -> "backend_type" - | Standard_library_default -> "standard_library_default" in + | Standard_library_default -> "standard_library_default" + | Shared_libraries -> "shared_libraries" in Kccall(Printf.sprintf "caml_sys_const_%s" const_name, 1) | Pisint -> Kisint | Pisout -> Kisout diff --git a/bytecomp/bytelink.ml b/bytecomp/bytelink.ml index e68bb64b7cf3..5926b9f5e02e 100644 --- a/bytecomp/bytelink.ml +++ b/bytecomp/bytelink.ml @@ -228,10 +228,11 @@ let link_compunit accu output_fun currpos_fun inchan file_name compunit = debug_info := (currpos_fun(), debug_event_list, debug_dirs) :: !debug_info end; output_fun code_block; - let fold_primitive needs_stdlib name = + let fold_primitive (needs_stdlib, uses_dynlink) name = if !Clflags.link_everything then Symtable.require_primitive name; - (needs_stdlib || name = "%standard_library_default") + (needs_stdlib || name = "%standard_library_default", + uses_dynlink || name = "caml_reify_bytecode") in List.fold_left fold_primitive accu compunit.cu_primitives @@ -265,7 +266,7 @@ let link_file output_fun currpos_fun accu = function link_archive accu output_fun currpos_fun file_name units let link_files output_fun currpos_fun = - List.fold_left (link_file output_fun currpos_fun) false + List.fold_left (link_file output_fun currpos_fun) (false, false) (* Output the debugging information *) (* Format is: @@ -577,7 +578,10 @@ let link_bytecode ?final_name tolink exec_name standalone = let output_fun buf = Out_channel.output_bigarray outchan buf 0 (Bigarray.Array1.dim buf) and currpos_fun () = pos_out outchan - start_code in - let needs_stdlib = + (* link_files returns true if any module refers to caml_reify_bytecode, + which is used solely by the toplevel and dynlink libraries and is used + to control whether we included the CRCS section. *) + let needs_stdlib, uses_dynlink = link_files output_fun currpos_fun tolink in if check_dlls then Dll.close_all_dlls(); @@ -623,12 +627,21 @@ let link_bytecode ?final_name tolink exec_name standalone = output_string outchan standard_library_default; Bytesections.record toc_writer OSLD end; + begin match Compenv.overridden_runtime_parameters () with + | Some ocamlrunparam when standalone -> + (* Embedded runtime defaults *) + output_string outchan ocamlrunparam; + Bytesections.record toc_writer ORUN; + | _ -> () + end; (* The map of global identifiers *) Symtable.output_global_map outchan; Bytesections.record toc_writer SYMB; (* CRCs for modules *) - output_value outchan (extract_crc_interfaces()); - Bytesections.record toc_writer CRCS; + if uses_dynlink then begin + output_value outchan (extract_crc_interfaces()); + Bytesections.record toc_writer CRCS + end; (* Debug info *) if !Clflags.debug then begin output_debug_info outchan; @@ -729,6 +742,10 @@ let c_string_literal_of_string s = Buffer.add_char b '"'; Buffer.contents b +let emit_global_constant outchan name value = + let value = Option.fold ~none:"NULL" ~some:c_string_literal_of_string value in + Printf.fprintf outchan "const char_os * %s = %s;\n" name value + let emit_runtime_standard_library_default outchan = let stdlib = let default = Config.standard_library_default in @@ -771,7 +788,7 @@ static int caml_code[] = { output_code_string outchan code; currpos := !currpos + (Bigarray.Array1.dim code) and currpos_fun () = !currpos in - ignore (link_files output_fun currpos_fun tolink); + let _, uses_dynlink = link_files output_fun currpos_fun tolink in (* The final STOP instruction *) Printf.fprintf outchan "\n0x%x};\n" Opcodes.opSTOP; (* The table of global data *) @@ -784,21 +801,25 @@ static char caml_data[] = { }; |}; (* The sections *) - let sections : (string * Obj.t) array = - [| Bytesections.Name.to_string SYMB, - Symtable.data_global_map(); - Bytesections.Name.to_string CRCS, - Obj.repr(extract_crc_interfaces()) |] + let sections : (string * Obj.t) list = + (Bytesections.Name.to_string SYMB, Symtable.data_global_map()) :: + if uses_dynlink then + [ Bytesections.Name.to_string CRCS, + Obj.repr(extract_crc_interfaces()) ] + else + [] in output_string outchan {| static char caml_sections[] = { |}; output_data_string outchan - (Marshal.to_string sections []); + (Marshal.to_string (Array.of_list sections) []); output_string outchan {| }; |}; + emit_global_constant outchan "caml_executable_ocamlrunparam" + (Compenv.overridden_runtime_parameters ()); emit_runtime_standard_library_default outchan; (* The table of primitives *) Symtable.output_primitive_table outchan; @@ -965,6 +986,8 @@ const enum caml_byte_program_mode caml_byte_program_mode = APPENDED; |}; Symtable.output_primitive_table poc; + emit_global_constant poc "caml_executable_ocamlrunparam" + (Compenv.overridden_runtime_parameters ()); emit_runtime_standard_library_default poc; output_string poc {| #ifdef __cplusplus diff --git a/bytecomp/bytesections.ml b/bytecomp/bytesections.ml index dd848a7de719..64939d545ff6 100644 --- a/bytecomp/bytesections.ml +++ b/bytecomp/bytesections.ml @@ -26,6 +26,7 @@ module Name = struct | DBUG (** debug info *) | DLLS (** dll names *) | DLPT (** dll paths *) + | ORUN (** embedded runtime parameters *) | OSLD (** OCaml Standard Library Default location *) | PRIM (** primitives names *) | RNTM (** The path to the bytecode interpreter (use_runtime mode) *) @@ -38,6 +39,7 @@ module Name = struct | "DLPT" -> DLPT | "DLLS" -> DLLS | "DATA" -> DATA + | "ORUN" -> ORUN | "OSLD" -> OSLD | "PRIM" -> PRIM | "SYMB" -> SYMB @@ -54,6 +56,7 @@ module Name = struct | DLPT -> "DLPT" | DLLS -> "DLLS" | DATA -> "DATA" + | ORUN -> "ORUN" | OSLD -> "OSLD" | PRIM -> "PRIM" | SYMB -> "SYMB" diff --git a/bytecomp/bytesections.mli b/bytecomp/bytesections.mli index e6f9e6af1d56..b4dc3ea6dbf5 100644 --- a/bytecomp/bytesections.mli +++ b/bytecomp/bytesections.mli @@ -27,6 +27,7 @@ module Name : sig | DBUG (** debug info *) | DLLS (** dll names *) | DLPT (** dll paths *) + | ORUN (** embedded runtime parameters *) | OSLD (** OCaml Standard Library Default location *) | PRIM (** primitives names *) | RNTM (** The path to the bytecode interpreter (use_runtime mode) *) diff --git a/bytecomp/dll.ml b/bytecomp/dll.ml index 3443c34df6ba..f8407a3448a1 100644 --- a/bytecomp/dll.ml +++ b/bytecomp/dll.ml @@ -154,6 +154,7 @@ let ld_library_path_contents () = | exception Not_found -> [] | s -> + (* NB: Misc.split_path_contents "" = [] *) Misc.split_path_contents s (* Initialization for separate compilation *) diff --git a/bytecomp/meta.ml b/bytecomp/meta.ml deleted file mode 100644 index c1ec15f3db9a..000000000000 --- a/bytecomp/meta.ml +++ /dev/null @@ -1,28 +0,0 @@ -(**************************************************************************) -(* *) -(* OCaml *) -(* *) -(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) -(* *) -(* Copyright 1996 Institut National de Recherche en Informatique et *) -(* en Automatique. *) -(* *) -(* All rights reserved. This file is distributed under the terms of *) -(* the GNU Lesser General Public License version 2.1, with the *) -(* special exception on linking described in the file LICENSE. *) -(* *) -(**************************************************************************) - -external global_data : unit -> Obj.t array = "caml_get_global_data" -external realloc_global_data : int -> unit = "caml_realloc_global" -type closure = unit -> Obj.t -type bytecode -external reify_bytecode : - (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t -> - Instruct.debug_event list array -> string option -> - bytecode * closure - = "caml_reify_bytecode" -external release_bytecode : bytecode -> unit - = "caml_static_release_bytecode" -external invoke_traced_function : Obj.raw_data -> Obj.t -> Obj.t -> Obj.t - = "caml_invoke_traced_function" diff --git a/bytecomp/meta.mli b/bytecomp/meta.mli deleted file mode 100644 index e48898b2bd76..000000000000 --- a/bytecomp/meta.mli +++ /dev/null @@ -1,30 +0,0 @@ -(**************************************************************************) -(* *) -(* OCaml *) -(* *) -(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) -(* *) -(* Copyright 1996 Institut National de Recherche en Informatique et *) -(* en Automatique. *) -(* *) -(* All rights reserved. This file is distributed under the terms of *) -(* the GNU Lesser General Public License version 2.1, with the *) -(* special exception on linking described in the file LICENSE. *) -(* *) -(**************************************************************************) - -(* To control the runtime system and bytecode interpreter *) - -external global_data : unit -> Obj.t array = "caml_get_global_data" -external realloc_global_data : int -> unit = "caml_realloc_global" -type closure = unit -> Obj.t -type bytecode -external reify_bytecode : - (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t -> - Instruct.debug_event list array -> string option -> - bytecode * closure - = "caml_reify_bytecode" -external release_bytecode : bytecode -> unit - = "caml_static_release_bytecode" -external invoke_traced_function : Obj.raw_data -> Obj.t -> Obj.t -> Obj.t - = "caml_invoke_traced_function" diff --git a/bytecomp/symtable.ml b/bytecomp/symtable.ml index 0c3a7481b703..2f2fb66ac609 100644 --- a/bytecomp/symtable.ml +++ b/bytecomp/symtable.ml @@ -326,12 +326,15 @@ let data_global_map () = (* Functions for toplevel use *) +external global_data : unit -> Obj.t array = "caml_get_global_data" +external realloc_global_data : int -> unit = "caml_realloc_global" + (* Update the in-core table of globals *) let update_global_table () = let ng = !global_table.cnt in - if ng > Array.length(Meta.global_data()) then Meta.realloc_global_data ng; - let glob = Meta.global_data() in + if ng > Array.length(global_data()) then realloc_global_data ng; + let glob = global_data() in List.iter (fun (slot, cst) -> glob.(slot) <- cst) !literal_table; @@ -361,9 +364,9 @@ let init_toplevel () = let get_global_position = slot_for_getglobal let get_global_value global = - (Meta.global_data()).(slot_for_getglobal global) + (global_data()).(slot_for_getglobal global) let assign_global_value global v = - (Meta.global_data()).(slot_for_getglobal global) <- v + (global_data()).(slot_for_getglobal global) <- v (* Check that all compilation units referenced in the given patch list have already been initialized *) diff --git a/configure b/configure index ea76d5078980..a55751a53d3f 100755 --- a/configure +++ b/configure @@ -3638,10 +3638,6 @@ ac_config_files="$ac_config_files Makefile.config" ac_config_files="$ac_config_files stdlib/sys.ml" -ac_config_files="$ac_config_files manual/src/version.tex" - -ac_config_files="$ac_config_files manual/src/html_processing/src/common.ml" - ac_config_files="$ac_config_files otherlibs/dynlink/dynlink_config.ml" ac_config_files="$ac_config_files utils/config.common.ml" @@ -3664,6 +3660,13 @@ ac_config_files="$ac_config_files otherlibs/runtime_events/META" ac_config_files="$ac_config_files stdlib/META" +if test -d manual +then : + ac_config_files="$ac_config_files manual/src/version.tex" + + ac_config_files="$ac_config_files manual/src/html_processing/src/common.ml" + +fi # Definitions related to the version of OCaml printf "%s\n" "#define OCAML_VERSION_MAJOR 5" >>confdefs.h @@ -25829,8 +25832,6 @@ do "Makefile.build_config") CONFIG_FILES="$CONFIG_FILES Makefile.build_config" ;; "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;; "stdlib/sys.ml") CONFIG_FILES="$CONFIG_FILES stdlib/sys.ml" ;; - "manual/src/version.tex") CONFIG_FILES="$CONFIG_FILES manual/src/version.tex" ;; - "manual/src/html_processing/src/common.ml") CONFIG_FILES="$CONFIG_FILES manual/src/html_processing/src/common.ml" ;; "otherlibs/dynlink/dynlink_config.ml") CONFIG_FILES="$CONFIG_FILES otherlibs/dynlink/dynlink_config.ml" ;; "utils/config.common.ml") CONFIG_FILES="$CONFIG_FILES utils/config.common.ml" ;; "utils/config.generated.ml") CONFIG_FILES="$CONFIG_FILES utils/config.generated.ml" ;; @@ -25842,6 +25843,8 @@ do "otherlibs/dynlink/META") CONFIG_FILES="$CONFIG_FILES otherlibs/dynlink/META" ;; "otherlibs/runtime_events/META") CONFIG_FILES="$CONFIG_FILES otherlibs/runtime_events/META" ;; "stdlib/META") CONFIG_FILES="$CONFIG_FILES stdlib/META" ;; + "manual/src/version.tex") CONFIG_FILES="$CONFIG_FILES manual/src/version.tex" ;; + "manual/src/html_processing/src/common.ml") CONFIG_FILES="$CONFIG_FILES manual/src/html_processing/src/common.ml" ;; "native-symlinks") CONFIG_COMMANDS="$CONFIG_COMMANDS native-symlinks" ;; "ocamldoc/META") CONFIG_FILES="$CONFIG_FILES ocamldoc/META" ;; "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; diff --git a/configure.ac b/configure.ac index 2c7d07245f7a..5f039d300bca 100644 --- a/configure.ac +++ b/configure.ac @@ -295,8 +295,6 @@ AC_SUBST([runtime_search_target]) AC_CONFIG_FILES([Makefile.build_config]) AC_CONFIG_FILES([Makefile.config]) AC_CONFIG_FILES([stdlib/sys.ml]) -AC_CONFIG_FILES([manual/src/version.tex]) -AC_CONFIG_FILES([manual/src/html_processing/src/common.ml]) AC_CONFIG_FILES([otherlibs/dynlink/dynlink_config.ml]) AC_CONFIG_FILES([utils/config.common.ml]) AC_CONFIG_FILES([utils/config.generated.ml]) @@ -308,6 +306,9 @@ AC_CONFIG_FILES([compilerlibs/META]) AC_CONFIG_FILES([otherlibs/dynlink/META]) AC_CONFIG_FILES([otherlibs/runtime_events/META]) AC_CONFIG_FILES([stdlib/META]) +AS_IF([test -d manual], + [AC_CONFIG_FILES([manual/src/version.tex]) + AC_CONFIG_FILES([manual/src/html_processing/src/common.ml])]) # Definitions related to the version of OCaml AC_DEFINE([OCAML_VERSION_MAJOR], [OCAML__VERSION_MAJOR]) diff --git a/debugger/unix_tools.ml b/debugger/unix_tools.ml index e054615addca..3ca0403ffeb1 100644 --- a/debugger/unix_tools.ml +++ b/debugger/unix_tools.ml @@ -136,6 +136,8 @@ let rec expand_path ch = in try Filename.concat (Sys.getenv "HOME") tail with Not_found -> + (* If Sys.getenv "LOGNAME" = "" then getpwnam will raise + Not_found instead *) concat_root (Sys.getenv "LOGNAME") tail) | n -> concat_root (String.sub ch 1 (n - 1)) diff --git a/driver/compenv.ml b/driver/compenv.ml index 15fbdea0a389..12cd20a71df0 100644 --- a/driver/compenv.ml +++ b/driver/compenv.ml @@ -781,13 +781,105 @@ let parse_arguments ?(current=ref 0) argv f program = Printf.printf "%s\n%s" help_msg err_msg; raise (Exit_with_status 0) +(* Parse and apply multipliers to runtime parameter values + cf. runtime/startup_aux.c *) +let scanmult name opt = + let val_mult v = function + | 'k' -> v * 1024 + | 'M' -> v * 1024 * 1024 + | 'G' -> v * 1024 * 1024 * 1024 + | c -> + fatalf "-set-runtime-default: unknown multiplier %c in %s=%s." + c name opt + in + let scanners = [ + (fun () -> Scanf.sscanf_opt opt "0x%x%c%!" val_mult); + (fun () -> Scanf.sscanf_opt opt "0x%x%!" Fun.id); + (fun () -> Scanf.sscanf_opt opt "%u%c%!" val_mult); + (fun () -> Scanf.sscanf_opt opt "%u%!" Fun.id)] in + match List.find_map (fun f -> f ()) scanners with + | Some v -> v + | None -> + fatalf "-set-runtime-default: could not parse integer value %s for %s." + opt name + +(* The list of runtime parameters for which "=1" can be omitted. Note that while + b is technically a boolean, it has two values for true as b>1 causes + backtrace information to be loaded on bytecode startup *) +let boolish_runtime_parameters = + ["b"; "c"; "p"; "R"] +let integer_runtime_parameters = + ["d"; "e"; "l"; "M"; "m"; "n"; "o"; "s"; "t"; "v"; "V"; "W"] + +(* To keep in sync with startup_aux.c *) let parse_runtime_parameter opt = - let k, setting = - try Misc.cut_at opt '=' - with Not_found -> - fatalf "-set-runtime-default: invalid runtime parameter '%s'. \ - Expected =." opt in + if List.mem opt boolish_runtime_parameters then + Hashtbl.replace Clflags.runtime_parameters opt "1" + else if opt <> "" then + let k, setting = + try Misc.cut_at opt '=' + with Not_found -> + if List.mem opt integer_runtime_parameters then + fatalf "-set-runtime-default: runtime parameter %s requires a \ + parameter." opt + else + fatalf "-set-runtime-default: invalid runtime parameter %s. \ + Expected [=]." opt in + let set_parameter k setting = + let () = + let v = scanmult k setting in + match k with + | "b" -> + if v > 2 then + fatal "-set-runtime-default: runtime parameter b can only be set \ + to 0, 1, or 2." + | "d" -> + (* cf. Max_domains_max in runtime/caml/domain.h *) + let max_domains_max = 4096 in + if v < 1 then + fatal "-set-runtime-default: max_domains(d) must be at least 1"; + if v > max_domains_max then + fatalf "-set-runtime-default: max_domains(d) is too large. \ + The maximum value is %d." max_domains_max + | k when List.mem k boolish_runtime_parameters -> + if v > 1 then + fatalf "-set-runtime-default: runtime parameter %s can only be \ + set to 0 or 1." k + | k -> + if not (List.mem k integer_runtime_parameters) then + fatalf "-set-runtime-default: unrecognized runtime parameter \ + %s." k + in + Hashtbl.replace Clflags.runtime_parameters k setting + in if k = "standard_library_default" then Clflags.standard_library_default := Some setting else - fatalf "-set-runtime-default: unrecognized runtime parameter %s." k + set_parameter k setting + +let overridden_runtime_parameters () = + if Hashtbl.length Clflags.runtime_parameters = 0 then + None + else + let sort (l, _) (r, _) = + (* Parameters in alphabetical order; if an option has both upper/lower + then upper first cf. caml_runtime_parameters *) + let l', r' = String.lowercase_ascii l, String.lowercase_ascii r in + if l' = r' then + String.compare l r + else + String.compare l' r' + in + let convert (parameter, value) = + if List.mem parameter boolish_runtime_parameters && value = "1" then + parameter + else + parameter ^ "=" ^ value + in + let ocamlrunparam = + Hashtbl.fold (fun k v acc -> (k, v) :: acc) Clflags.runtime_parameters [] + |> List.sort sort + |> List.map convert + |> String.concat "," + in + Some ocamlrunparam diff --git a/driver/compenv.mli b/driver/compenv.mli index 56512e7d3c18..3fdb5c29b693 100644 --- a/driver/compenv.mli +++ b/driver/compenv.mli @@ -83,3 +83,7 @@ val parse_arguments : ?current:(int ref) (** Validate a single -set-runtime-default parameter specification. *) val parse_runtime_parameter : string -> unit + +(** Return {!Clflags.runtime_parameters} in the same format as the contents of + the [OCAMLRUNPARAM] environment variable. *) +val overridden_runtime_parameters : unit -> string option diff --git a/driver/compmisc.ml b/driver/compmisc.ml index f2179c6ba2dd..ddd096cc5523 100644 --- a/driver/compmisc.ml +++ b/driver/compmisc.ml @@ -81,16 +81,16 @@ let initial_env () = ~open_implicit_modules:(List.rev !Clflags.open_modules) let set_from_env flag Clflags.{ parse; usage; env_var } = - try - match parse (Sys.getenv env_var) with - | None -> - Location.prerr_warning Location.none - (Warnings.Bad_env_variable (env_var, usage)) - | Some x -> match !flag with - | None -> flag := Some x - | Some _ -> () - with - Not_found -> () + match Sys.getenv_opt env_var with + | None | Some "" -> () + | Some value -> + match parse value with + | None -> + Location.prerr_warning Location.none + (Warnings.Bad_env_variable (env_var, usage)) + | Some x -> match !flag with + | None -> flag := Some x + | Some _ -> () let read_clflags_from_env () = set_from_env Clflags.color Clflags.color_reader; diff --git a/driver/compmisc.mli b/driver/compmisc.mli index 3359b481988f..263da1358d7d 100644 --- a/driver/compmisc.mli +++ b/driver/compmisc.mli @@ -18,7 +18,9 @@ val init_path : -> ?dir:string -> unit -> unit val initial_env : unit -> Env.t -(* Support for flags that can also be set from an environment variable *) +(* Support for flags that can also be set from an environment variable. + Environment variables which are set, but to the empty string are ignored + (i.e. they are treated as if they were not set at all) *) val set_from_env : 'a option ref -> 'a Clflags.env_reader -> unit val read_clflags_from_env : unit -> unit diff --git a/flexdll b/flexdll index 3400287999af..1fbab5748323 160000 --- a/flexdll +++ b/flexdll @@ -1 +1 @@ -Subproject commit 3400287999afcdc737f35c1d0e1447c7d2ae5a83 +Subproject commit 1fbab57483231da60cf26f12ed99bf9cbb2ce584 diff --git a/lambda/lambda.ml b/lambda/lambda.ml index e18a3b49cb05..41ef61532337 100644 --- a/lambda/lambda.ml +++ b/lambda/lambda.ml @@ -26,6 +26,7 @@ type compile_time_constant = | Ostype_cygwin | Backend_type | Standard_library_default + | Shared_libraries type immediate_or_pointer = | Immediate diff --git a/lambda/lambda.mli b/lambda/lambda.mli index c5d2c714bd58..32c183793303 100644 --- a/lambda/lambda.mli +++ b/lambda/lambda.mli @@ -27,6 +27,7 @@ type compile_time_constant = | Ostype_cygwin | Backend_type | Standard_library_default + | Shared_libraries type immediate_or_pointer = | Immediate diff --git a/lambda/printlambda.ml b/lambda/printlambda.ml index 144b46cb1b32..c4ee4ff4e414 100644 --- a/lambda/printlambda.ml +++ b/lambda/printlambda.ml @@ -270,7 +270,8 @@ let primitive ppf = function | Ostype_win32 -> "ostype_win32" | Ostype_cygwin -> "ostype_cygwin" | Backend_type -> "backend_type" - | Standard_library_default -> "standard_library_default" in + | Standard_library_default -> "standard_library_default" + | Shared_libraries -> "shared_libraries" in fprintf ppf "sys.constant_%s" const_name | Pisint -> fprintf ppf "isint" | Pisout -> fprintf ppf "isout" diff --git a/lambda/translprim.ml b/lambda/translprim.ml index d5a9ebde5226..dca5b1e67f69 100644 --- a/lambda/translprim.ml +++ b/lambda/translprim.ml @@ -161,6 +161,7 @@ let primitives_table = "%boolnot", Primitive (Pnot, 1); "%big_endian", Primitive ((Pctconst Big_endian), 1); "%backend_type", Primitive ((Pctconst Backend_type), 1); + "%shared_libraries", Primitive ((Pctconst Shared_libraries), 1); "%word_size", Primitive ((Pctconst Word_size), 1); "%int_size", Primitive ((Pctconst Int_size), 1); "%max_wosize", Primitive ((Pctconst Max_wosize), 1); diff --git a/man/Makefile b/man/Makefile index 05424bab4737..e43a3451bc66 100644 --- a/man/Makefile +++ b/man/Makefile @@ -14,6 +14,7 @@ #************************************************************************** ROOTDIR = .. +SUBDIR_NAME = man include $(ROOTDIR)/Makefile.common MANPAGES = $(addsuffix .1,\ @@ -22,5 +23,4 @@ MANPAGES = $(addsuffix .1,\ .PHONY: install install: - $(MKDIR) $(call QUOTE_SINGLE,$(INSTALL_PROGRAMS_MAN_DIR)) - $(INSTALL_DATA) $(MANPAGES) $(call QUOTE_SINGLE,$(INSTALL_PROGRAMS_MAN_DIR)) + $(call INSTALL_ITEMS, $(MANPAGES), man, $(INSTALL_MANDIR_PROGRAMS)) diff --git a/man/ocamlrun.1 b/man/ocamlrun.1 index 4e6e3db99d7a..e08c01407332 100644 --- a/man/ocamlrun.1 +++ b/man/ocamlrun.1 @@ -150,8 +150,10 @@ See the Gc module documentation for details. .TP .B b Trigger the printing of a stack backtrace -when an uncaught exception aborts the program. -This option takes no argument. +when an uncaught exception aborts the program. Possible values +are 0 for disabled, 1 for enabled, and 2 which, in bytecode, causes the +required debugging information to be loaded when the program starts instead +of when the program aborts. .TP .B c (cleanup_on_exit) Shut the runtime down gracefully on exit. The option @@ -219,13 +221,12 @@ Turn on debugging support for .BR ocamlyacc -generated parsers. When this option is on, the pushdown automaton that executes the parsers prints a -trace of its actions. This option takes no argument. +trace of its actions. .TP .BR R Turn on randomization of all hash tables by default (see the .B Hashtbl -module of the standard library). This option takes no -argument. +module of the standard library). .TP .BR s " (minor_heap_size)" The size of the minor heap (in words). diff --git a/manual/src/cmds/runtime.etex b/manual/src/cmds/runtime.etex index ac7e827d2186..5ebc7456be96 100644 --- a/manual/src/cmds/runtime.etex +++ b/manual/src/cmds/runtime.etex @@ -167,7 +167,7 @@ The following environment variables are also consulted: \item[p] (parser trace) Turn on debugging support for "ocamlyacc"-generated parsers. When this option is on, the pushdown automaton that executes the parsers prints a - trace of its actions. This option takes no argument. + trace of its actions. \item[R] (randomize) Turn on randomization of all hash tables by default (see \ifouthtml @@ -175,7 +175,6 @@ The following environment variables are also consulted: \else section~\ref{Hashtbl}). \fi - This option takes no argument. \item[s] ("minor_heap_size") Size of the minor heap. (in words) \item[t] Set the trace level for the debug runtime (ignored by the standard runtime). \item[v] ("verbose") What GC messages to print to stderr. This diff --git a/middle_end/closure/closure.ml b/middle_end/closure/closure.ml index 0d2f70937ca0..6d92c266e59d 100644 --- a/middle_end/closure/closure.ml +++ b/middle_end/closure/closure.ml @@ -1078,6 +1078,7 @@ let rec close ({ backend; fenv; cenv ; mutable_vars } as env) lam = let dbg = Debuginfo.from_location loc in let id = Ident.name Compilenv.stdlib_symbol_name in Uprim(P.Pread_symbol id, [], dbg), Value_const (Uconst_ref (id, None)) + | Shared_libraries -> make_const_bool Config.supports_shared_libraries end | Lprim(Pignore, [arg], _loc) -> let expr, approx = make_const_int 0 in diff --git a/middle_end/flambda/closure_conversion.ml b/middle_end/flambda/closure_conversion.ml index 9d28a6bc6d6a..bc8ec40933f9 100644 --- a/middle_end/flambda/closure_conversion.ml +++ b/middle_end/flambda/closure_conversion.ml @@ -407,6 +407,8 @@ let rec close t env (lam : Lambda.lambda) : Flambda.t = let symbol = t.symbol_for_global' Compilenv.stdlib_symbol_name in t.imported_symbols <- Symbol.Set.add symbol t.imported_symbols; name_expr (Symbol symbol) ~name:Names.pgetglobal + | Shared_libraries -> + cst lambda_const_bool Config.supports_shared_libraries end | Lprim (Pfield _, [Lprim (Pgetglobal id, [],_)], _) when Ident.same id t.current_unit_id -> diff --git a/ocaml-variants.opam b/ocaml-variants.opam index 7de297ba21cf..fb9a245fa8c6 100644 --- a/ocaml-variants.opam +++ b/ocaml-variants.opam @@ -39,18 +39,18 @@ depends: [ # facility is not yet available for other platforms. "host-arch-x86_32" {os != "win32" & arch = "x86_32" & post} ("host-arch-x86_64" {os != "win32" & arch = "x86_64" & post} | - ("host-arch-x86_32" {os != "win32" & arch = "x86_64" & post} & "ocaml-option-32bit" {os != "win32" & arch = "x86_64"})) + ("host-arch-x86_32" {os != "win32" & arch = "x86_64" & post} & "ocaml-option-32bit" {build & os != "win32" & arch = "x86_64"})) "host-arch-unknown" {os != "win32" & arch != "arm32" & arch != "arm64" & arch != "ppc64" & arch != "riscv64" & arch != "s390x" & arch != "x86_32" & arch != "x86_64" & post} # Port selection (Windows) # amd64 mingw-w64 / MSVC - (("arch-x86_64" {os = "win32" & arch = "x86_64"} & - (("system-mingw" & "mingw-w64-shims" {os-distribution = "cygwin" & build}) | - ("system-msvc" & "winpthreads" & "ocaml-option-no-compression" {os = "win32"}))) | + (("arch-x86_64" {build & os = "win32" & arch = "x86_64"} & + (("system-mingw" {build} & "mingw-w64-shims" {os-distribution = "cygwin" & build}) | + ("system-msvc" {build} & "winpthreads" {os = "win32"} & "ocaml-option-no-compression" {build & os = "win32"}))) | # i686 mingw-w64 / MSVC - ("arch-x86_32" {os = "win32"} & "ocaml-option-bytecode-only" {os = "win32"} & - (("system-mingw" & "mingw-w64-shims" {os-distribution = "cygwin" & build}) | - ("system-msvc" & "winpthreads" & "ocaml-option-no-compression" {os = "win32"}))) | + ("arch-x86_32" {build & os = "win32"} & "ocaml-option-bytecode-only" {build & os = "win32"} & + (("system-mingw" {build} & "mingw-w64-shims" {os-distribution = "cygwin" & build}) | + ("system-msvc" {build} & "winpthreads" {os = "win32"} & "ocaml-option-no-compression" {build & os = "win32"}))) | # Non-Windows systems "host-system-other" {os != "win32" & post}) @@ -81,7 +81,7 @@ build: [ "--enable-runtime-search" "--enable-runtime-search-target=fallback" "--with-flexdll=%{flexdll:share}%" {os = "win32" & flexdll:installed} - "--with-winpthreads-msvc=%{winpthreads:share}%" {system-msvc:installed} + "--with-winpthreads-msvc=%{winpthreads:share}%" {winpthreads:installed & system-msvc:installed} "-C" "--with-afl" {ocaml-option-afl:installed} "--disable-native-compiler" {ocaml-option-bytecode-only:installed} @@ -107,8 +107,9 @@ build: [ "--disable-warn-error" ] [make "-j%{jobs}%"] + [make "INSTALL_MODE=opam" "install"] ] -install: [make "install"] +install: ["sh" "./%{name}%-fixup.sh" prefix] depopts: [ "ocaml-option-32bit" "ocaml-option-afl" diff --git a/ocamldoc/odoc_man.ml b/ocamldoc/odoc_man.ml index c8a4d9bf9dfb..5d7f03f0ea91 100644 --- a/ocamldoc/odoc_man.ml +++ b/ocamldoc/odoc_man.ml @@ -901,7 +901,7 @@ class man = let b = new_buf () in bs b (".TH \""^cl.cl_name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); @@ -959,7 +959,7 @@ class man = let b = new_buf () in bs b (".TH \""^ct.clt_name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); @@ -1051,7 +1051,7 @@ class man = let b = new_buf () in bs b (".TH \""^mt.mt_name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); @@ -1133,7 +1133,7 @@ class man = let b = new_buf () in bs b (".TH \""^m.m_name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); @@ -1239,7 +1239,7 @@ class man = let b = new_buf () in bs b (".TH \""^name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); bs b ".SH NAME\n"; diff --git a/ocamldoc/odoc_misc.ml b/ocamldoc/odoc_misc.ml index 1e587af681b4..66878915a01e 100644 --- a/ocamldoc/odoc_misc.ml +++ b/ocamldoc/odoc_misc.ml @@ -247,14 +247,28 @@ let string_of_date ?(absolute=false) ?(hour=true) d = "" ) +let date_warning = ref (Fun.const ()) + let current_date = let time = - try - float_of_string (Sys.getenv "SOURCE_DATE_EPOCH") - with - Not_found -> Unix.time () + match Sys.getenv_opt "SOURCE_DATE_EPOCH" with + | None -> Unix.time () + | Some value -> + match Float.of_string_opt value with + | Some stamp -> stamp + | None -> + date_warning := (fun () -> + Odoc_global.pwarning + "The SOURCE_DATE_EPOCH environment variable could not be \ + parsed and has been ignored."; + date_warning := Fun.const ()); + Unix.time () in string_of_date ~absolute: true ~hour: false time +let current_date () = + (* Displays a warning the first time this is called if SOURCE_DATE_EPOCH was + set but could not be parsed. *) + !date_warning (); current_date let rec text_list_concat sep l = match l with diff --git a/ocamldoc/odoc_misc.mli b/ocamldoc/odoc_misc.mli index 86db543940d9..6cb927ec1f8a 100644 --- a/ocamldoc/odoc_misc.mli +++ b/ocamldoc/odoc_misc.mli @@ -66,7 +66,7 @@ val string_of_date : ?absolute:bool -> ?hour:bool -> float -> string (* Value returned by string_of_date for current time. * Uses environment variable SOURCE_DATE_EPOCH if set; falls back to * current timestamp otherwise. *) -val current_date : string +val current_date : unit -> string (** Return the first sentence (until the first dot) of a text. Don't stop in the middle of [Code], [Verbatim], [List], [Lnum], diff --git a/ocamltest/main.ml b/ocamltest/main.ml index e22acc24bc5b..9c0f9e8d61c2 100644 --- a/ocamltest/main.ml +++ b/ocamltest/main.ml @@ -251,7 +251,7 @@ let test_file test_filename = let reference_filename = Filename.concat test_source_directory (test_prefix ^ ".reference") in - let make = try Sys.getenv "MAKE" with Not_found -> "make" in + let make = Sys.getenv_with_default_value "MAKE" "make" in let initial_environment = Environments.from_bindings [ Builtin_variables.dev_null, "/dev/null"; diff --git a/ocamltest/ocamltest_stdlib.ml b/ocamltest/ocamltest_stdlib.ml index 75f4ee146865..c38098e437fa 100644 --- a/ocamltest/ocamltest_stdlib.ml +++ b/ocamltest/ocamltest_stdlib.ml @@ -223,7 +223,9 @@ module Sys = struct Fun.protect ~finally:(fun () -> Sys.chdir oldcwd) f let getenv_with_default_value variable default_value = - try Sys.getenv variable with Not_found -> default_value + match Sys.getenv_opt variable with + | None | Some "" -> default_value + | Some value -> value let safe_getenv variable = getenv_with_default_value variable "" end diff --git a/otherlibs/Makefile b/otherlibs/Makefile index d76643bd297b..83dce3235d50 100644 --- a/otherlibs/Makefile +++ b/otherlibs/Makefile @@ -14,6 +14,7 @@ #************************************************************************** ROOTDIR=.. +SUBDIR_NAME=otherlibs include $(ROOTDIR)/Makefile.common # Although the OTHERLIBS variable is defined in ../Makefile.config, diff --git a/otherlibs/Makefile.otherlibs.common b/otherlibs/Makefile.otherlibs.common index 06cbca6e4099..6fb6c34f7366 100644 --- a/otherlibs/Makefile.otherlibs.common +++ b/otherlibs/Makefile.otherlibs.common @@ -16,6 +16,7 @@ # Common Makefile for otherlibs ROOTDIR=../.. +SUBDIR_NAME=otherlibs/$(LIBNAME) include $(ROOTDIR)/Makefile.common include $(ROOTDIR)/Makefile.best_binaries @@ -101,56 +102,46 @@ else endif $(LIBNAME).cmxs: $(LIBNAME).cmxa $(STUBSLIB_NATIVE) - $(V_OCAMLOPT)$(CAMLOPT) -shared -o $(LIBNAME).cmxs -I . $(LIBNAME).cmxa + $(V_OCAMLOPT)$(CAMLOPT) -shared -o $@ -I . $< $(addprefix -cclib , $(LDOPTS)) lib$(CLIBNAME_BYTECODE).$(A): $(COBJS) $(V_OCAMLMKLIB)$(MKLIB) -oc $(CLIBNAME_BYTECODE) $(COBJS_BYTECODE) $(LDOPTS) lib$(CLIBNAME_NATIVE).$(A): $(COBJS) - $(V_OCAMLMKLIB)$(MKLIB) -oc $(CLIBNAME_NATIVE) $(COBJS_NATIVE) $(LDOPTS) - -INSTALL_LIBDIR_LIBNAME = $(INSTALL_LIBDIR)/$(LIBNAME) + $(V_OCAMLMKLIB)$(MKLIB) \ + -custom -oc $(CLIBNAME_NATIVE) $(COBJS_NATIVE) $(LDOPTS) install:: ifneq "$(STUBSLIB_BYTECODE)" "" ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" - $(INSTALL_PROG) $(STUBSDLL) "$(INSTALL_STUBLIBDIR)" + $(call INSTALL_ITEMS, $(STUBSDLL), stublibs) endif - $(INSTALL_DATA) $(STUBSLIB_BYTECODE) "$(INSTALL_LIBDIR)/" + $(call INSTALL_ITEMS, $(STUBSLIB_BYTECODE), lib) endif # If installing over a previous OCaml version, ensure the library is removed # from the previous installation. - rm -f $(addprefix "$(INSTALL_LIBDIR)"/, \ - $(LIBNAME).cma $(CMIFILES) \ - $(CMIFILES:.cmi=.mli) $(CMIFILES:.cmi=.cmti) \ - $(CAMLOBJS_NAT) $(LIBNAME).cmxa $(LIBNAME).cmxs $(LIBNAME).$(A)) - $(MKDIR) "$(INSTALL_LIBDIR_LIBNAME)" - $(INSTALL_DATA) \ - $(LIBNAME).cma $(CMIFILES) META \ - "$(INSTALL_LIBDIR_LIBNAME)/" + $(call INSTALL_RM, \ + $(addprefix "$(INSTALL_LIBDIR)"/, \ + $(LIBNAME).cma $(CMIFILES) \ + $(CMIFILES:.cmi=.mli) $(CMIFILES:.cmi=.cmti) \ + $(CAMLOBJS_NAT) $(LIBNAME).cmxa $(LIBNAME).cmxs $(LIBNAME).$(A))) + $(call INSTALL_ITEMS, $(LIBNAME).cma $(CMIFILES) META, lib, $(LIBNAME)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - $(CMIFILES:.cmi=.mli) \ - $(CMIFILES:.cmi=.cmti) \ - "$(INSTALL_LIBDIR_LIBNAME)/" + $(call INSTALL_ITEMS, $(CMIFILES:.cmi=.mli) $(CMIFILES:.cmi=.cmti), \ + lib, $(LIBNAME)) +endif +ifneq "$(HEADERS)" "" + $(call INSTALL_ITEMS, $(HEADERS), lib, $(INSTALL_LIBDIR_CAML)) endif - if test -n "$(HEADERS)"; then \ - $(INSTALL_DATA) $(HEADERS) "$(INSTALL_INCDIR)/"; \ - fi installopt: - $(INSTALL_DATA) \ - $(CAMLOBJS_NAT) $(LIBNAME).cmxa $(LIBNAME).$(A) \ - "$(INSTALL_LIBDIR_LIBNAME)/" - if test -f $(LIBNAME).cmxs; then \ - $(INSTALL_PROG) $(LIBNAME).cmxs "$(INSTALL_LIBDIR_LIBNAME)"; \ - fi - if test -f dll$(CLIBNAME_NATIVE)$(EXT_DLL); then \ - $(INSTALL_PROG) \ - dll$(CLIBNAME_NATIVE)$(EXT_DLL) "$(INSTALL_STUBLIBDIR)"; \ - fi + $(call INSTALL_ITEMS, \ + $(CAMLOBJS_NAT) $(LIBNAME).cmxa $(LIBNAME).$(A), lib, $(LIBNAME)) +ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" + $(call INSTALL_ITEMS, $(LIBNAME).cmxs, libexec, $(LIBNAME)) +endif ifneq "$(STUBSLIB_NATIVE)" "" - $(INSTALL_DATA) $(STUBSLIB_NATIVE) "$(INSTALL_LIBDIR)/" + $(call INSTALL_ITEMS, $(STUBSLIB_NATIVE), lib) endif partialclean: diff --git a/otherlibs/dynlink/byte/dynlink.ml b/otherlibs/dynlink/byte/dynlink.ml index 209218c318b5..256a33e17ee9 100644 --- a/otherlibs/dynlink/byte/dynlink.ml +++ b/otherlibs/dynlink/byte/dynlink.ml @@ -171,6 +171,8 @@ module Bytecode = struct (DT.Error (Library's_module_initializers_failed exn)) (Printexc.get_raw_backtrace ()) + external supports_shared_libraries : unit -> bool = "%shared_libraries" + let load ~filename:file_name ~priv = let ic = try open_in_bin file_name @@ -200,7 +202,11 @@ module Bytecode = struct let toc_pos = input_binary_int ic in (* Go to table of contents *) seek_in ic toc_pos; let lib = (input_value ic : library) in - Symtable.open_dlls lib.lib_dllibs; + if supports_shared_libraries () then + Symtable.open_dlls lib.lib_dllibs + else + raise (DT.Error (Cannot_open_dynamic_library (Failure + "loading shared libraries not supported by this runtime"))); handle, lib.lib_units end else begin raise (DT.Error (Not_a_bytecode_file file_name)) diff --git a/otherlibs/dynlink/byte/dynlink_symtable.ml b/otherlibs/dynlink/byte/dynlink_symtable.ml index 0b770f121e72..cca0005e0ce8 100644 --- a/otherlibs/dynlink/byte/dynlink_symtable.ml +++ b/otherlibs/dynlink/byte/dynlink_symtable.ml @@ -242,19 +242,15 @@ let patch_object buff patchlist = #327 "bytecomp/symtable.ml" (* Functions for toplevel use *) -(* Update the in-core table of globals *) -#247 "otherlibs/dynlink/byte/dynlink_symtable.ml" -module Meta = struct -#16 "bytecomp/meta.ml" external global_data : unit -> Obj.t array = "caml_get_global_data" external realloc_global_data : int -> unit = "caml_realloc_global" -#252 "otherlibs/dynlink/byte/dynlink_symtable.ml" -end -#331 "bytecomp/symtable.ml" + +(* Update the in-core table of globals *) + let update_global_table () = let ng = !global_table.cnt in - if ng > Array.length(Meta.global_data()) then Meta.realloc_global_data ng; - let glob = Meta.global_data() in + if ng > Array.length(global_data()) then realloc_global_data ng; + let glob = global_data() in List.iter (fun (slot, cst) -> glob.(slot) <- cst) !literal_table; @@ -274,16 +270,16 @@ external get_bytecode_sections : unit -> bytecode_sections = let init_toplevel () = let sect = get_bytecode_sections () in global_table := sect.symb; -#278 "otherlibs/dynlink/byte/dynlink_symtable.ml" +#274 "otherlibs/dynlink/byte/dynlink_symtable.ml" Dll.init ~dllpaths:sect.dlpt ~prims:sect.prim; -#357 "bytecomp/symtable.ml" +#360 "bytecomp/symtable.ml" sect.crcs (* Find the value of a global identifier *) -#363 "bytecomp/symtable.ml" +#366 "bytecomp/symtable.ml" let get_global_value global = - (Meta.global_data()).(slot_for_getglobal global) -#368 "bytecomp/symtable.ml" + (global_data()).(slot_for_getglobal global) +#371 "bytecomp/symtable.ml" (* Check that all compilation units referenced in the given patch list have already been initialized *) @@ -324,17 +320,17 @@ let check_global_initialized patchlist = type global_map = GlobalMap.t let current_state () = !global_table -#411 "bytecomp/symtable.ml" +#414 "bytecomp/symtable.ml" let hide_additions (st : global_map) = if st.cnt > !global_table.cnt then -#331 "otherlibs/dynlink/byte/dynlink_symtable.ml" +#327 "otherlibs/dynlink/byte/dynlink_symtable.ml" failwith "Symtable.hide_additions"; -#414 "bytecomp/symtable.ml" +#417 "bytecomp/symtable.ml" global_table := {GlobalMap. cnt = !global_table.cnt; tbl = st.tbl } -#433 "bytecomp/symtable.ml" +#436 "bytecomp/symtable.ml" let is_defined_in_global_map (gmap : global_map) global = Global.Map.mem global gmap.tbl diff --git a/otherlibs/runtime_events/runtime_events.mli b/otherlibs/runtime_events/runtime_events.mli index e7f87e0f27a7..a20cead3876e 100644 --- a/otherlibs/runtime_events/runtime_events.mli +++ b/otherlibs/runtime_events/runtime_events.mli @@ -34,16 +34,16 @@ The runtime events system's behaviour can be controlled by the following environment variables: - - OCAML_RUNTIME_EVENTS_START if set will cause the runtime events system - to be started as part of the OCaml runtime initialization. + - OCAML_RUNTIME_EVENTS_START if non-empty will cause the runtime events + system to be started as part of the OCaml runtime initialization. - OCAML_RUNTIME_EVENTS_DIR sets the directory where the runtime events ring buffers will be located. If not present the program's working directory will be used. - - OCAML_RUNTIME_EVENTS_PRESERVE if set will prevent the OCaml runtime from - removing its ring buffers when it terminates. This can help if monitoring - very short running programs. + - OCAML_RUNTIME_EVENTS_PRESERVE if non-empty will prevent the OCaml runtime + from removing its ring buffers when it terminates. This can help if + monitoring very short running programs. *) (** The type for counter events emitted by the runtime. Counter events are used diff --git a/otherlibs/systhreads/Makefile b/otherlibs/systhreads/Makefile index 2b97c9c62c83..2b410b0724c1 100644 --- a/otherlibs/systhreads/Makefile +++ b/otherlibs/systhreads/Makefile @@ -14,6 +14,7 @@ #************************************************************************** ROOTDIR=../.. +SUBDIR_NAME=otherlibs/systhreads include $(ROOTDIR)/Makefile.common include $(ROOTDIR)/Makefile.best_binaries @@ -45,9 +46,6 @@ LIBNAME=threads # That's why this dependency is handled in the Makefile directly # and removed from the output of the C compiler during make depend -BYTECODE_C_OBJS=st_stubs.b.$(O) -NATIVECODE_C_OBJS=st_stubs.n.$(O) - THREADS_SOURCES = thread.ml event.ml THREADS_BCOBJS = $(THREADS_SOURCES:.ml=.cmo) @@ -61,15 +59,20 @@ all: lib$(LIBNAME).$(A) $(LIBNAME).cma $(CMIFILES) allopt: lib$(LIBNAME)nat.$(A) $(LIBNAME).cmxa $(CMIFILES) +ifeq "$(NATDYNLINK)" "true" +allopt: $(LIBNAME).cmxs +endif + lib$(LIBNAME).$(A): OC_CFLAGS = $(OC_BYTECODE_CFLAGS) -lib$(LIBNAME).$(A): $(BYTECODE_C_OBJS) - $(V_OCAMLMKLIB)$(MKLIB) -o $(LIBNAME) $(BYTECODE_C_OBJS) +lib$(LIBNAME).$(A): st_stubs.b.$(O) st_stubs_shared.b.$(O) + @$(MKLIB) -o $(LIBNAME) st_stubs_shared.b.$(O) + $(V_OCAMLMKLIB)$(MKLIB) -custom -o $(LIBNAME) $< lib$(LIBNAME)nat.$(A): OC_CFLAGS = $(OC_NATIVE_CFLAGS) -lib$(LIBNAME)nat.$(A): $(NATIVECODE_C_OBJS) - $(V_OCAMLMKLIB)$(MKLIB) -o $(LIBNAME)nat $^ +lib$(LIBNAME)nat.$(A): st_stubs.n.$(O) + $(V_OCAMLMKLIB)$(MKLIB) -custom -o $(LIBNAME)nat $^ $(LIBNAME).cma: $(THREADS_BCOBJS) $(V_OCAMLMKLIB)$(MKLIB) -o $(LIBNAME) -ocamlc '$(CAMLC)' -linkall $^ @@ -78,14 +81,20 @@ $(LIBNAME).cma: $(THREADS_BCOBJS) $(LIBNAME).cmxa: $(THREADS_NCOBJS) $(V_LINKOPT)$(CAMLOPT) -linkall -a -cclib -lthreadsnat -o $@ $^ -# The following lines produce two object files st_stubs.b.$(O) and -# st_stubs.n.$(O) from the same source file st_stubs.c (it is compiled -# twice, each time with different options). +st_stubs_shared.n.$(O): OC_CFLAGS = $(OC_NATIVE_CFLAGS) + +$(LIBNAME).cmxs: $(THREADS_NCOBJS) st_stubs_shared.n.$(O) + $(V_LINKOPT)$(CAMLOPT) -linkall -shared -o $@ $^ + +# The following lines produce object files based on st_stubs.c. Four objects are +# produced - a static and shared version in both bytecode and native versions. + +st_stubs_shared.%.$(O): OC_CPPFLAGS += -DSYSTHREADS_SHARED ifeq "$(COMPUTE_DEPS)" "true" -st_stubs.%.$(O): st_stubs.c +st_stubs%.$(O): st_stubs.c else -st_stubs.%.$(O): st_stubs.c $(RUNTIME_HEADERS) $(wildcard *.h) +st_stubs%.$(O): st_stubs.c $(RUNTIME_HEADERS) $(wildcard *.h) endif $(V_CC)$(CC) $(OC_CFLAGS) $(CFLAGS) $(OC_CPPFLAGS) $(CPPFLAGS) \ $(OUTPUTOBJ)$@ -c $< @@ -103,30 +112,25 @@ clean: partialclean distclean: clean rm -f META -INSTALL_THREADSLIBDIR=$(INSTALL_LIBDIR)/$(LIBNAME) - install: ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" - $(INSTALL_PROG) $(DLLTHREADS) "$(INSTALL_STUBLIBDIR)" + $(call INSTALL_ITEMS, $(DLLTHREADS), stublibs) endif - $(INSTALL_DATA) libthreads.$(A) "$(INSTALL_LIBDIR)" - $(MKDIR) "$(INSTALL_THREADSLIBDIR)" - $(INSTALL_DATA) \ - $(CMIFILES) threads.cma META \ - "$(INSTALL_THREADSLIBDIR)" + $(call INSTALL_ITEMS, libthreads.$(A), lib) + $(call INSTALL_ITEMS, $(CMIFILES) threads.cma META, lib, $(LIBNAME)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - $(CMIFILES:.cmi=.cmti) \ - "$(INSTALL_THREADSLIBDIR)" - $(INSTALL_DATA) $(MLIFILES) "$(INSTALL_THREADSLIBDIR)" + $(call INSTALL_ITEMS, $(CMIFILES:.cmi=.cmti), lib, $(LIBNAME)) + $(call INSTALL_ITEMS, $(MLIFILES), lib, $(LIBNAME)) endif - $(INSTALL_DATA) caml/threads.h "$(INSTALL_INCDIR)" + $(call INSTALL_ITEMS, caml/threads.h, lib, $(INSTALL_LIBDIR_CAML)) installopt: - $(INSTALL_DATA) libthreadsnat.$(A) "$(INSTALL_LIBDIR)" - $(INSTALL_DATA) \ - $(THREADS_NCOBJS) threads.cmxa threads.$(A) \ - "$(INSTALL_THREADSLIBDIR)" + $(call INSTALL_ITEMS, libthreadsnat.$(A), lib) + $(call INSTALL_ITEMS, $(THREADS_NCOBJS) threads.cmxa threads.$(A), \ + lib, $(LIBNAME)) +ifeq "$(NATDYNLINK)" "true" + $(call INSTALL_ITEMS, $(LIBNAME).cmxs, libexec, $(LIBNAME)) +endif %.cmi: %.mli $(V_OCAMLC)$(CAMLC) -c $(COMPFLAGS) $< diff --git a/otherlibs/systhreads/st_stubs.c b/otherlibs/systhreads/st_stubs.c index 971c8992c949..b18c75a93182 100644 --- a/otherlibs/systhreads/st_stubs.c +++ b/otherlibs/systhreads/st_stubs.c @@ -19,16 +19,13 @@ any reason. In mingw-w64 13.0.0, a subtle change meant that time.h causes pthread_compat.h to be read. For this reason, this next block must appear before anything headers are included. */ -#if defined(_WIN32) && !defined(NATIVE_CODE) && !defined(_MSC_VER) +#if defined(_WIN32) && defined(SYSTHREADS_SHARED) /* Ensure that pthread.h marks symbols __declspec(dllimport) so that they can be picked up from the runtime (which will have linked winpthreads statically). mingw-w64 11.0.0 introduced WINPTHREADS_USE_DLLIMPORT to do this explicitly; prior versions co-opted this on the internal DLL_EXPORT, but this is ignored in 11.0 and later unless IN_WINPTHREAD is also defined, so we can safely - define both to support both versions. - When compiling with MSVC, we currently link directly the winpthreads objects - into our runtime, so we do not want to mark its symbols with - __declspec(dllimport). */ + define both to support both versions. */ #define WINPTHREADS_USE_DLLIMPORT #define DLL_EXPORT #endif diff --git a/otherlibs/unix/Makefile b/otherlibs/unix/Makefile index 9cedb6f36370..d64ca05d4bab 100644 --- a/otherlibs/unix/Makefile +++ b/otherlibs/unix/Makefile @@ -35,9 +35,7 @@ unixLabels.cmi: \ EXTRACAMLFLAGS += -pp "$(AWK) -f $(ROOTDIR)/stdlib/expand_module_aliases.awk" ifeq "$(UNIX_OR_WIN32)" "win32" -WIN32_LIBS=$(call SYSLIB,ws2_32) $(call SYSLIB,advapi32) -LINKOPTS=$(addprefix -cclib ,$(WIN32_LIBS)) -LDOPTS=$(addprefix -ldopt ,$(WIN32_LIBS)) +LDOPTS=-lws2_32 -ladvapi32 else # Unix # dllunix.so particularly requires libm for modf symbols LDOPTS=$(NATIVECCLIBS) diff --git a/otherlibs/unix/unix_win32.ml b/otherlibs/unix/unix_win32.ml index ee4b6cd37ca9..4a351882d3c5 100644 --- a/otherlibs/unix/unix_win32.ml +++ b/otherlibs/unix/unix_win32.ml @@ -655,7 +655,7 @@ type group_entry = gr_gid : int; gr_mem : string array } -let getlogin () = try Sys.getenv "USERNAME" with Not_found -> "" +let getlogin () = Option.value (Sys.getenv_opt "USERNAME") ~default:"" let getpwnam _x = raise Not_found let getgrnam = getpwnam let getpwuid = getpwnam @@ -1115,8 +1115,9 @@ let open_process_args_full prog args = let open_process_shell fn cmd = let shell = - try Sys.getenv "COMSPEC" - with Not_found -> raise(Unix_error(ENOEXEC, "open_process_shell", cmd)) in + match Sys.getenv_opt "COMSPEC" with + | None | Some "" -> raise(Unix_error(ENOEXEC, "open_process_shell", cmd)) + | Some shell -> shell in fn shell (shell ^ " /c " ^ cmd) let open_process_in cmd = open_process_shell open_process_cmdline_in cmd diff --git a/runtime/backtrace_byt.c b/runtime/backtrace_byt.c index 27fe41b800ef..790e8f40620d 100644 --- a/runtime/backtrace_byt.c +++ b/runtime/backtrace_byt.c @@ -50,8 +50,6 @@ /* The table of debug information fragments */ struct ext_table caml_debug_info; -CAMLexport char_os * caml_cds_file = NULL; - /* Location of fields in the Instruct.debug_event record */ enum { EV_POS = 0, @@ -443,7 +441,7 @@ static void read_main_debug_info(struct debug_info *di) { CAMLparam0(); CAMLlocal3(events, evl, l); - char_os *exec_name; + const char_os *exec_name; int fd, num_events, orig; struct channel *chan; struct exec_trailer trail; @@ -460,12 +458,12 @@ static void read_main_debug_info(struct debug_info *di) CAMLreturn0; if (caml_params->cds_file != NULL) { - exec_name = (char_os*) caml_params->cds_file; + exec_name = caml_params->cds_file; } else { - exec_name = (char_os*) caml_params->exe_name; + exec_name = caml_params->exe_name; } - fd = caml_attempt_open(&exec_name, &trail, 1); + fd = caml_attempt_open(exec_name, &trail, 1); if (fd < 0) { /* Record the failure of caml_attempt_open in di->already-read */ di->already_read = fd; diff --git a/runtime/caml/backtrace.h b/runtime/caml/backtrace.h index 7f8b3fae476f..6431b4799dfd 100644 --- a/runtime/caml/backtrace.h +++ b/runtime/caml/backtrace.h @@ -111,9 +111,6 @@ CAMLextern void caml_record_backtraces(int); #ifndef NATIVE_CODE -/* Path to the file containing debug information, if any, or NULL. */ -CAMLextern char_os * caml_cds_file; - /* Primitive called _only_ by runtime to record unwinded frames to * backtrace. A similar primitive exists for native code, but with a * different prototype. */ diff --git a/runtime/caml/gc_ctrl.h b/runtime/caml/gc_ctrl.h index af160f3f4761..97e540d4d6cf 100644 --- a/runtime/caml/gc_ctrl.h +++ b/runtime/caml/gc_ctrl.h @@ -18,6 +18,8 @@ #ifdef CAML_INTERNALS +#include + #include "misc.h" CAMLextern atomic_uintnat caml_max_stack_wsize; @@ -28,6 +30,7 @@ void caml_init_gc (void); value caml_gc_stat(value); value caml_gc_major(value); +extern atomic_bool caml_runtime_randomized; #define caml_stat_top_heap_wsz caml_top_heap_words(Caml_state->shared_heap) #define caml_stat_compactions 0 diff --git a/runtime/caml/misc.h b/runtime/caml/misc.h index 0f8f7f0f0719..d2b451de12fa 100644 --- a/runtime/caml/misc.h +++ b/runtime/caml/misc.h @@ -524,6 +524,7 @@ extern double caml_log1p(double); #define strlen_os wcslen #define sscanf_os swscanf #define strcpy_os wcscpy +#define strdup_os wcsdup #define mktemp_os _wmktemp #define fopen_os _wfopen @@ -569,6 +570,7 @@ extern double caml_log1p(double); #define strlen_os strlen #define sscanf_os sscanf #define strcpy_os strcpy +#define strdup_os strdup #define mktemp_os mktemp #define fopen_os fopen diff --git a/runtime/caml/osdeps.h b/runtime/caml/osdeps.h index ab4c11d33724..84e310d3ee54 100644 --- a/runtime/caml/osdeps.h +++ b/runtime/caml/osdeps.h @@ -156,11 +156,17 @@ CAMLextern value caml_win32_get_temp_path(void); #define CAML_DIR_SEP T("\\") #define Is_separator(c) (c == '\\' || c == '/') +#define EXT_DLL L".dll" #else #define CAML_DIR_SEP T("/") #define Is_separator(c) (c == '/') +#define EXT_DLL ".so" + +/* As caml_search_exe_in_path, but returns NULL if the file cannot be found in + any of the directories specified in PATH. Used by stdlib/header.c */ +caml_stat_string caml_search_in_system_path(const char *); #endif /* _WIN32 */ diff --git a/runtime/caml/startup.h b/runtime/caml/startup.h index 628e06c627c1..34e0723dc9a8 100644 --- a/runtime/caml/startup.h +++ b/runtime/caml/startup.h @@ -39,7 +39,7 @@ CAMLextern value caml_startup_code_exn( /* These enum members should all be negative */ enum { FILE_NOT_FOUND = -1, BAD_BYTECODE = -2, WRONG_MAGIC = -3, NO_FDS = -4 }; -extern int caml_attempt_open(char_os **name, struct exec_trailer *trail, +extern int caml_attempt_open(const char_os *name, struct exec_trailer *trail, int do_open_script); extern int caml_read_trailer(int fd, struct exec_trailer *trail); extern void caml_read_section_descriptors(int fd, struct exec_trailer *trail); diff --git a/runtime/caml/startup_aux.h b/runtime/caml/startup_aux.h index d0a44affbdf0..050a554988b0 100644 --- a/runtime/caml/startup_aux.h +++ b/runtime/caml/startup_aux.h @@ -43,8 +43,6 @@ struct caml_params { uintnat trace_level; uintnat runtime_events_log_wsize; uintnat verify_heap; - uintnat print_magic; - uintnat print_config; uintnat init_percent_free; uintnat init_minor_heap_wsz; @@ -62,6 +60,8 @@ struct caml_params { extern const struct caml_params* const caml_params; +extern const char_os *caml_executable_ocamlrunparam; + extern void caml_parse_ocamlrunparam (void); /* Common entry point to caml_startup. diff --git a/runtime/dynlink.c b/runtime/dynlink.c index 53251d3ab9cf..4645fe9cc53c 100644 --- a/runtime/dynlink.c +++ b/runtime/dynlink.c @@ -54,12 +54,19 @@ struct ext_table caml_prim_table; /* The names of primitives */ struct ext_table caml_prim_name_table; +#ifndef DEBUG +/* The buffer for the strings in caml_prim_name_table */ +static char *prim_names = NULL; +#endif /* The table of shared libraries currently opened */ static struct ext_table shared_libs; /* The search path for shared libraries */ struct ext_table caml_shared_libs_path; +/* Buffers under-pinning caml_shared_libs_path */ +static char_os *shared_libs_buffer1 = NULL; +static char_os *shared_libs_buffer2 = NULL; /* Look up the given primitive name in the built-in primitive table, then in the opened shared libraries (shared_libs) */ @@ -136,7 +143,7 @@ CAMLexport char_os * caml_parse_ld_conf(const char_os * stdlib, /* Use a temporary ext_table to hold the individually-allocated entries */ caml_ext_table_init(&entries, 8); for (int i = 0; i < sizeof(locations) / sizeof(locations[0]); i++) { - if (locations[i] != NULL) { + if (locations[i] != NULL && *locations[i] != '\0') { libroot = caml_stat_strdup_os(locations[i]); size_t libroot_length = strlen_os(libroot); while (libroot_length > 0 && Is_separator(libroot[libroot_length - 1])) @@ -175,10 +182,12 @@ CAMLexport char_os * caml_parse_ld_conf(const char_os * stdlib, while (q > p && *(q - 1) == '\r') q--; } - *q = '\0'; - char_os *entry = make_relative_path_absolute(p, libroot); - length += strlen_os(entry) + 1; - caml_ext_table_add(&entries, entry); + if (p < q) { + *q = '\0'; + char_os *entry = make_relative_path_absolute(p, libroot); + length += strlen_os(entry) + 1; + caml_ext_table_add(&entries, entry); + } p = r; } @@ -229,6 +238,17 @@ CAMLprim value caml_dynlink_parse_ld_conf(value vstdlib) #ifndef NATIVE_CODE +char_os * caml_search_dll_in_path(struct ext_table * path, const char_os * name) +{ + char_os * dllname; + char_os * res; + + dllname = caml_stat_strconcat_os(2, name, EXT_DLL); + res = caml_search_in_path(path, dllname); + caml_stat_free(dllname); + return res; +} + /* Open the given shared library and add it to shared_libs. Abort on error. */ static void open_shared_lib(char_os * name) @@ -288,13 +308,15 @@ void caml_build_primitive_table(char_os * lib_path, caml_shared_libs_path and caml_prim_name_table are not freed afterwards: they may later be used by caml_dynlink_get_bytecode_sections. */ - caml_decompose_path(&caml_shared_libs_path, - caml_secure_getenv(T("CAML_LD_LIBRARY_PATH"))); + shared_libs_buffer1 = + caml_decompose_path(&caml_shared_libs_path, + caml_secure_getenv(T("CAML_LD_LIBRARY_PATH"))); if (lib_path != NULL) for (char_os *p = lib_path; *p != 0; p += strlen_os(p) + 1) caml_ext_table_add(&caml_shared_libs_path, p); - caml_parse_ld_conf(caml_runtime_standard_library_effective, - &caml_shared_libs_path); + shared_libs_buffer2 = + caml_parse_ld_conf(caml_runtime_standard_library_effective, + &caml_shared_libs_path); /* Open the shared libraries */ caml_ext_table_init(&shared_libs, 8); if (libs != NULL) @@ -303,14 +325,18 @@ void caml_build_primitive_table(char_os * lib_path, /* Build the primitive table */ caml_ext_table_init(&caml_prim_table, 0x180); caml_ext_table_init(&caml_prim_name_table, 0x180); - if (req_prims != NULL) + if (req_prims != NULL) { +#ifndef DEBUG + prim_names = req_prims; +#endif for (char *q = req_prims; *q != 0; q += strlen(q) + 1) { c_primitive prim = lookup_primitive(q); if (prim == NULL) caml_fatal_error("unknown C primitive `%s'", q); caml_ext_table_add(&caml_prim_table, (void *) prim); - caml_ext_table_add(&caml_prim_name_table, caml_stat_strdup(q)); + caml_ext_table_add(&caml_prim_name_table, q); } + } } /* Build the table of primitives as a copy of the builtin primitive table. @@ -394,6 +420,13 @@ CAMLprim value caml_dynlink_get_bytecode_sections(value unit) list = caml_alloc_2(Tag_cons, str, list); } Store_field(ret, 2, list); +#ifndef DEBUG + if (caml_prim_name_table.size > 0) { + /* caml_prim_name_table is no longer required */ + caml_ext_table_free(&caml_prim_name_table, 0); + caml_stat_free(prim_names); + } +#endif list = Val_emptylist; for (int i = caml_shared_libs_path.size - 1; i >= 0; i--) { @@ -402,6 +435,13 @@ CAMLprim value caml_dynlink_get_bytecode_sections(value unit) } Store_field(ret, 3, list); + if (caml_shared_libs_path.size > 0) { + /* caml_shared_libs_path is no longer required */ + caml_ext_table_free(&caml_shared_libs_path, 0); + caml_stat_free(shared_libs_buffer1); + caml_stat_free(shared_libs_buffer2); + } + CAMLreturn (ret); } diff --git a/runtime/extern.c b/runtime/extern.c index dcb860957d75..8d14f9ee0fac 100644 --- a/runtime/extern.c +++ b/runtime/extern.c @@ -19,6 +19,7 @@ /* The interface of this file is "caml/intext.h" */ +#include #include #include @@ -111,6 +112,10 @@ struct caml_extern_state { struct caml_output_block * extern_output_first; struct caml_output_block * extern_output_block; + + /* extern_value sets this to true if the value written was 32-bit compatible. + Set regardless of COMPAT_32 (see caml_output_value_with_compat) */ + bool compat_32; }; static void init_extern_stack(struct caml_extern_state* s) @@ -571,6 +576,7 @@ Caml_inline void extern_int(struct caml_extern_state* s, intnat n) writecode16(s, CODE_INT16, n); #ifdef ARCH_SIXTYFOUR } else if (n < -((intnat)1 << 30) || n >= ((intnat)1 << 30)) { + s->compat_32 = false; if (s->extern_flags & COMPAT_32) extern_failwith(s, "output_value: integer cannot be read back on " "32-bit platform"); @@ -609,9 +615,12 @@ Caml_inline void extern_header(struct caml_extern_state* s, } else { header_t hd = Make_header(sz, tag, NOT_MARKABLE); #ifdef ARCH_SIXTYFOUR - if (sz > 0x3FFFFF && (s->extern_flags & COMPAT_32)) - extern_failwith(s, "output_value: array cannot be read back on " - "32-bit platform"); + if (sz > 0x3FFFFF) { + s->compat_32 = false; + if (s->extern_flags & COMPAT_32) + extern_failwith(s, "output_value: array cannot be read back on " + "32-bit platform"); + } if (hd < (uintnat)1 << 32) writecode32(s, CODE_BLOCK32, hd); else @@ -633,9 +642,12 @@ Caml_inline void extern_string(struct caml_extern_state *s, writecode8(s, CODE_STRING8, len); } else { #ifdef ARCH_SIXTYFOUR - if (len > 0xFFFFFB && (s->extern_flags & COMPAT_32)) - extern_failwith(s, "output_value: string cannot be read back on " - "32-bit platform"); + if (len > 0xFFFFFB) { + s->compat_32 = false; + if (s->extern_flags & COMPAT_32) + extern_failwith(s, "output_value: string cannot be read back on " + "32-bit platform"); + } if (len < (uintnat)1 << 32) writecode32(s, CODE_STRING32, len); else @@ -664,9 +676,12 @@ Caml_inline void extern_double_array(struct caml_extern_state* s, writecode8(s, CODE_DOUBLE_ARRAY8_NATIVE, nfloats); } else { #ifdef ARCH_SIXTYFOUR - if (nfloats > 0x1FFFFF && (s->extern_flags & COMPAT_32)) - extern_failwith(s, "output_value: float array cannot be read back on " - "32-bit platform"); + if (nfloats > 0x1FFFFF) { + s->compat_32 = false; + if (s->extern_flags & COMPAT_32) + extern_failwith(s, "output_value: float array cannot be read back on " + "32-bit platform"); + } if (nfloats < (uintnat) 1 << 32) writecode32(s, CODE_DOUBLE_ARRAY32_NATIVE, nfloats); else @@ -927,6 +942,7 @@ static intnat extern_value(struct caml_extern_state* s, value v, value flags, intnat res_len; /* Parse flag list */ s->extern_flags = caml_convert_flag_list(flags, extern_flag_values); + s->compat_32 = true; /* Turn compression off if Zlib missing or if called from caml_output_value_to_block */ #ifdef HAS_ZSTD @@ -950,14 +966,16 @@ static intnat extern_value(struct caml_extern_state* s, value v, value flags, res_len = extern_output_length(s); /* Check lengths if compat32 mode is requested */ #ifdef ARCH_SIXTYFOUR - if (s->extern_flags & COMPAT_32 - && (uncompressed_len >= (uintnat)1 << 32 - || res_len >= (uintnat)1 << 32 - || s->size_32 >= (uintnat)1 << 32 - || s->size_64 >= (uintnat)1 << 32)) { - free_extern_output(s); - caml_failwith("output_value: object too big to be read back on " - "32-bit platform"); + if (uncompressed_len >= (uintnat)1 << 32 + || res_len >= (uintnat)1 << 32 + || s->size_32 >= (uintnat)1 << 32 + || s->size_64 >= (uintnat)1 << 32) { + s->compat_32 = false; + if (s->extern_flags & COMPAT_32) { + free_extern_output(s); + caml_failwith("output_value: object too big to be read back on " + "32-bit platform"); + } } #endif /* Write the header in compressed format */ @@ -979,6 +997,7 @@ static intnat extern_value(struct caml_extern_state* s, value v, value flags, s->size_32 >= ((intnat)1 << 32) || s->size_64 >= ((intnat)1 << 32)) { /* The object is too big for the small header format. Fail if we are in compat32 mode, or use big header. */ + s->compat_32 = false; if (s->extern_flags & COMPAT_32) { free_extern_output(s); caml_failwith("output_value: object too big to be read back on " @@ -1039,6 +1058,13 @@ CAMLprim value caml_output_value(value vchan, value v, value flags) CAMLreturn (Val_unit); } +CAMLprim value caml_output_value_with_compat(value vchan, value v, value flags) +{ + struct caml_extern_state* s = init_extern_state (); + caml_output_value(vchan, v, flags); + return Val_bool(s->compat_32); +} + CAMLprim value caml_output_value_to_bytes(value v, value flags) { char header[MAX_INTEXT_HEADER_SIZE]; @@ -1281,6 +1307,7 @@ CAMLprim value caml_obj_reachable_words(value v) s->obj_counter = 0; s->extern_flags = 0; + s->compat_32 = true; extern_init_position_table(s); sp = s->extern_stack; size = 0; diff --git a/runtime/gc_ctrl.c b/runtime/gc_ctrl.c index 85946e63309e..8adf48679292 100644 --- a/runtime/gc_ctrl.c +++ b/runtime/gc_ctrl.c @@ -372,15 +372,30 @@ CAMLprim value caml_runtime_variant (value unit) #endif } +atomic_bool caml_runtime_randomized = false; + +CAMLprim value caml_runtime_randomize(value vunit) +{ + caml_runtime_randomized = true; + return Val_unit; +} + +CAMLprim value caml_runtime_is_randomized(value vunit) +{ + return Val_bool(caml_runtime_randomized); +} + CAMLprim value caml_runtime_parameters (value unit) { #define F_Z CAML_PRIuNAT #define F_S CAML_PRIuSZT CAMLassert (unit == Val_unit); + /* Parameters in alphabetical order; if an option has both upper/lower then + upper first cf. Compenv.overridden_runtime_parameters */ return caml_alloc_sprintf ("b=%d,c=%"F_Z",e=%"F_Z",l=%"F_Z",M=%"F_Z",m=%"F_Z",n=%"F_Z"," - "o=%"F_Z",p=%d,s=%"F_S",t=%"F_Z",v=%"F_Z",V=%"F_Z",W=%"F_Z"", + "o=%"F_Z",p=%d,R=%u,s=%"F_S",t=%"F_Z",V=%"F_Z",v=%"F_Z",W=%"F_Z"", /* b */ (int) Caml_state->backtrace_active, /* c */ caml_params->cleanup_on_exit, /* e */ caml_params->runtime_events_log_wsize, @@ -390,11 +405,11 @@ CAMLprim value caml_runtime_parameters (value unit) /* n */ caml_custom_minor_max_bsz, /* o */ caml_percent_free, /* p */ Caml_state->parser_trace, - /* R */ /* missing */ + /* R */ caml_runtime_randomized, /* s */ Caml_state->minor_heap_wsz, /* t */ caml_params->trace_level, - /* v */ caml_verb_gc, /* V */ caml_params->verify_heap, + /* v */ caml_verb_gc, /* W */ caml_runtime_warnings ); #undef F_Z diff --git a/runtime/gen_primsc.sh b/runtime/gen_primsc.sh index 9630501a7c60..77dec42c1594 100755 --- a/runtime/gen_primsc.sh +++ b/runtime/gen_primsc.sh @@ -67,8 +67,10 @@ echo ' 0 };' # ocamlrun values for symbols which are provided by the bytecode linker # - ocamlrun is able to use any of the mechanisms to load the bytecode # - caml_runtime_standard_library_default for bytecode images on this runtime +# - stub default OCAMLRUNPARAM string cat <<'EOF' const enum caml_byte_program_mode caml_byte_program_mode = STANDARD; const char_os *caml_runtime_standard_library_default = OCAML_STDLIB_DIR; +const char_os *caml_executable_ocamlrunparam = NULL; EOF diff --git a/runtime/meta.c b/runtime/meta.c index 40cb506f7266..5d6e18c4a9a2 100644 --- a/runtime/meta.c +++ b/runtime/meta.c @@ -204,31 +204,13 @@ CAMLprim value caml_invoke_traced_function(value codeptr, value env, value arg) value caml_get_global_data(value unit) { - caml_invalid_argument("Meta.get_global_data"); + caml_invalid_argument("Symtable.get_global_data"); return Val_unit; /* not reached */ } value caml_realloc_global(value size) { - caml_invalid_argument("Meta.realloc_global"); - return Val_unit; /* not reached */ -} - -value caml_invoke_traced_function(value codeptr, value env, value arg) -{ - caml_invalid_argument("Meta.invoke_traced_function"); - return Val_unit; /* not reached */ -} - -value caml_reify_bytecode(value prog, value len) -{ - caml_invalid_argument("Meta.reify_bytecode"); - return Val_unit; /* not reached */ -} - -value caml_static_release_bytecode(value prog, value len) -{ - caml_invalid_argument("Meta.static_release_bytecode"); + caml_invalid_argument("Symtable.realloc_global"); return Val_unit; /* not reached */ } diff --git a/runtime/runtime_events.c b/runtime/runtime_events.c index 5b9578b7589a..67ee95c1e5e9 100644 --- a/runtime/runtime_events.c +++ b/runtime/runtime_events.c @@ -138,17 +138,19 @@ void caml_runtime_events_init(void) { runtime_events_path = caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_DIR")); - if (runtime_events_path) { + if (runtime_events_path && *runtime_events_path != '\0') { /* caml_secure_getenv's return shouldn't be cached */ runtime_events_path = caml_stat_strdup_os(runtime_events_path); } ring_size_words = 1 << caml_params->runtime_events_log_wsize; + char_os *value = caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_PRESERVE")); preserve_ring = - caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_PRESERVE")) ? 1 : 0; + (value && *value != '\0') ? 1 : 0; - if (caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_START"))) { + value = caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_START")); + if (value && *value != '\0') { runtime_events_create_from_stw_single(); /* stw_single: mutators and domains have not started yet. */ } diff --git a/runtime/startup_aux.c b/runtime/startup_aux.c index 70f13ed94bc7..21535de8ab12 100644 --- a/runtime/startup_aux.c +++ b/runtime/startup_aux.c @@ -27,12 +27,14 @@ #ifndef NATIVE_CODE #include "caml/dynlink.h" #endif +#include "caml/gc_ctrl.h" #include "caml/gc_stats.h" #include "caml/osdeps.h" #include "caml/shared_heap.h" #include "caml/startup_aux.h" #include "caml/prims.h" #include "caml/signals.h" +#include "caml/platform.h" #ifdef _WIN32 extern void caml_win32_unregister_overflow_detection (void); @@ -62,19 +64,20 @@ static void init_startup_params(void) atomic_store_relaxed(&caml_verb_gc, CAML_GC_MSG_VERBOSE | CAML_GC_MSG_MINOR); #endif #ifndef NATIVE_CODE + /* TODO #4703 The .cds file should be determined from exe_name */ cds_file = caml_secure_getenv(T("CAML_DEBUG_FILE")); - if (cds_file != NULL) { - params.cds_file = caml_stat_strdup_os(cds_file); + /* Ignore CAML_DEBUG_FILE if it's "Set But Null" */ + if (cds_file != NULL && *cds_file != '\0') { + /* Largely by historical accident, resolve CAML_DEBUG_FILE in PATH */ + params.cds_file = caml_search_exe_in_path(cds_file); } #endif params.trace_level = 0; params.cleanup_on_exit = 0; - params.print_magic = 0; - params.print_config = 0; params.event_trace = 0; } -static void scanmult (char_os *opt, uintnat *var) +static void scanmult (const char_os *opt, uintnat *var) { char_os mult = ' '; unsigned int val = 1; @@ -88,14 +91,10 @@ static void scanmult (char_os *opt, uintnat *var) } } -void caml_parse_ocamlrunparam(void) +/* To keep in sync with Compenv.parse_runtime_parameter */ +static void parse_ocamlrunparam(const char_os *opt) { - init_startup_params(); uintnat val; - - char_os *opt = caml_secure_getenv (T("OCAMLRUNPARAM")); - if (opt == NULL) opt = caml_secure_getenv (T("CAMLRUNPARAM")); - if (opt != NULL){ while (*opt != '\0'){ switch (*opt++){ @@ -109,7 +108,10 @@ void caml_parse_ocamlrunparam(void) case 'n': scanmult (opt, ¶ms.init_custom_minor_max_bsz); break; case 'o': scanmult (opt, ¶ms.init_percent_free); break; case 'p': scanmult (opt, ¶ms.parser_trace); break; - case 'R': break; /* see stdlib/hashtbl.mli */ + case 'R': + scanmult (opt, &val); + caml_runtime_randomized = !!val; + break; case 's': scanmult (opt, ¶ms.init_minor_heap_wsz); break; case 't': scanmult (opt, ¶ms.trace_level); break; case 'v': @@ -136,6 +138,19 @@ void caml_parse_ocamlrunparam(void) } } +void caml_parse_ocamlrunparam(void) +{ + init_startup_params(); + + /* Update any of this runtime's default parameter values with the defaults + specified in the executable/image */ + parse_ocamlrunparam(caml_executable_ocamlrunparam); + + /* Now parse OCAMLRUNPARAM/CAMLRUNPARAM for values specified by the user */ + const char_os *opt = caml_secure_getenv (T("OCAMLRUNPARAM")); + if (opt == NULL || *opt == '\0') opt = caml_secure_getenv (T("CAMLRUNPARAM")); + parse_ocamlrunparam(opt); +} /* The number of outstanding calls to caml_startup */ static int startup_count = 0; diff --git a/runtime/startup_byt.c b/runtime/startup_byt.c index 6beca392a918..6ca017b334a9 100644 --- a/runtime/startup_byt.c +++ b/runtime/startup_byt.c @@ -22,11 +22,13 @@ #include #include #include +#include #include "caml/config.h" #ifndef _WIN32 #include #endif #ifdef _WIN32 +#include #include #include #endif @@ -74,10 +76,13 @@ const char_os * caml_runtime_standard_library_effective = NULL; +static bool print_magic = false; +static bool print_config = false; + static char magicstr[EXEC_MAGIC_LENGTH+1]; /* Print the specified error message followed by an end-of-line and exit */ -static void error(const char *msg, ...) +CAMLnoret static void error(const char *msg, ...) { va_list ap; va_start(ap, msg); @@ -106,7 +111,7 @@ int caml_read_trailer(int fd, struct exec_trailer *trail) memcpy(magicstr, trail->magic, EXEC_MAGIC_LENGTH); magicstr[EXEC_MAGIC_LENGTH] = 0; - if (caml_params->print_magic) { + if (print_magic) { printf("%s\n", magicstr); exit(0); } @@ -115,44 +120,41 @@ int caml_read_trailer(int fd, struct exec_trailer *trail) ? 0 : WRONG_MAGIC; } -int caml_attempt_open(char_os **name, struct exec_trailer *trail, +int caml_attempt_open(const char_os *name, struct exec_trailer *trail, int do_open_script) { - char_os * truename; int fd; int err; - char buf [2], * u8; + char *u8; - truename = caml_search_exe_in_path(*name); - u8 = caml_stat_strdup_of_os(truename); + u8 = caml_stat_strdup_of_os(name); CAML_GC_MESSAGE(STARTUP, "Opening bytecode executable %s\n", u8); caml_stat_free(u8); - fd = open_os(truename, O_RDONLY | O_BINARY); + fd = open_os(name, O_RDONLY | O_BINARY); if (fd == -1) { - caml_stat_free(truename); CAML_GC_MESSAGE(STARTUP, "Cannot open file\n"); if (errno == EMFILE) return NO_FDS; else return FILE_NOT_FOUND; } +#ifndef _WIN32 + char buf[2]; if (!do_open_script) { err = read (fd, buf, 2); - if (err < 2 || (buf [0] == '#' && buf [1] == '!')) { + if (err < 2 || (buf[0] == '#' && buf[1] == '!')) { close(fd); - caml_stat_free(truename); CAML_GC_MESSAGE(STARTUP, "Rejected #! script\n"); return BAD_BYTECODE; } } +#endif err = caml_read_trailer(fd, trail); if (err != 0) { close(fd); - caml_stat_free(truename); CAML_GC_MESSAGE(STARTUP, "Not a bytecode executable\n"); return err; } - *name = truename; return fd; } @@ -300,12 +302,12 @@ static void do_print_help(void) /* Parse options on the command line */ -static int parse_command_line(char_os **argv) +static int parse_command_line(char_os **argv, + uintnat *trace_level, + uintnat *backtrace_enabled, + uintnat *event_trace) { int i, len, parsed; - /* cast to make caml_params mutable; this assumes we are only called - by one thread at startup */ - struct caml_params* params = (struct caml_params*)caml_params; for(i = 1; argv[i] != NULL && argv[i][0] == '-'; i++) { len = strlen_os(argv[i]); @@ -317,7 +319,7 @@ static int parse_command_line(char_os **argv) return i + 1; break; case 't': - params->trace_level += 1; /* ignored unless DEBUG mode */ + *trace_level += 1; /* ignored unless DEBUG mode */ break; case 'v': atomic_store_relaxed(&caml_verb_gc, CAML_GC_MSG_VERBOSE); @@ -328,7 +330,7 @@ static int parse_command_line(char_os **argv) exit(0); break; case 'b': - params->backtrace_enabled = 1; + *backtrace_enabled = 1; break; case 'I': if (argv[i + 1] != NULL) { @@ -339,7 +341,7 @@ static int parse_command_line(char_os **argv) } break; case 'm': - params->print_magic = 1; + print_magic = true; break; case 'M': printf("%s\n", EXEC_MAGIC); @@ -357,13 +359,13 @@ static int parse_command_line(char_os **argv) printf("%s\n", OCAML_VERSION_STRING); exit(0); } else if (!strcmp_os(argv[i], T("-events"))) { - params->event_trace = 1; /* Ignored unless DEBUG mode */ + *event_trace = 1; /* Ignored unless DEBUG mode */ } else if (!strcmp_os(argv[i], T("-help")) || !strcmp_os(argv[i], T("--help"))) { do_print_help(); exit(0); } else if (!strcmp_os(argv[i], T("-config"))) { - params->print_config = 1; + print_config = true; } else { parsed = 0; } @@ -389,8 +391,6 @@ static const char_os * get_stdlib_location(void) freed, since the runtime will terminate after calling this. */ static void do_print_config(void) { - const char_os * dir; - /* Print the runtime configuration */ printf("version: %s\n", OCAML_VERSION_STRING); printf("standard_library_default: %s\n", @@ -436,17 +436,8 @@ static void do_print_config(void) caml_secure_getenv(T("CAML_LD_LIBRARY_PATH"))); caml_parse_ld_conf(caml_runtime_standard_library_effective, &caml_shared_libs_path); - for (int i = 0; i < caml_shared_libs_path.size; i++) { - dir = caml_shared_libs_path.contents[i]; - if (dir[0] == 0) -#ifdef _WIN32 - /* See caml_search_in_path in win32.c */ - continue; -#else - dir = "."; -#endif - printf(" %s\n", caml_stat_strdup_of_os(dir)); - } + for (int i = 0; i < caml_shared_libs_path.size; i++) + printf(" %s\n", caml_stat_strdup_of_os(caml_shared_libs_path.contents[i])); } #ifdef _WIN32 @@ -470,117 +461,278 @@ CAMLexport void caml_main(char_os **argv) value res; char * req_prims; char_os * shared_lib_path, * shared_libs; - char_os * exe_name, * proc_self_exe, * argv0; + char_os * exe_name = NULL, * proc_self_exe, * argv0, * tofree = NULL; + /* Only one thread at startup - caml_params won't be mutated once the VM + starts */ + struct caml_params* params = (struct caml_params*)caml_params; + uintnat trace_level = 0, backtrace_enabled = 0, event_trace = 0; - /* Determine options */ +#ifdef _MSC_VER + caml_install_invalid_parameter_handler(); +#endif + + /* Parse OCAMLRUNPARAM - for -custom, -output-obj, etc. this will take + caml_executable_ocamlrunparam into account, but for tendered bytecode + images (or for explicit invocation as ocamlrun ./foo.byte) the ORUN section + has not yet been read. The only relevant setting between here and ORUN + being read is c=1 (pooling). If ORUN includes c=1 and OCAMLRUNPARAM does + not include c=0, then a brief memory dance is done to re-initialise the + runtime in pooling mode. */ caml_parse_ocamlrunparam(); if (!caml_startup_aux(/* pooling */ caml_params->cleanup_on_exit)) return; - caml_init_codefrag(); - - caml_init_locale(); -#ifdef _MSC_VER - caml_install_invalid_parameter_handler(); -#endif - caml_init_custom_operations(); - caml_init_os_params(); - caml_ext_table_init(&caml_shared_libs_path, 8); - /* Determine position of bytecode file */ pos = 0; argv0 = proc_self_exe = caml_executable_name(); - /* In APPENDED mode (i.e. with -custom), we always want to load the bytecode - from the running executable, and argv[0] should never be used. However, - some platforms still don't implement caml_executable_name, so there is an - escape hatch here to fallback to checking argv[0] if proc_self_exe is - NULL. - For STANDARD mode (i.e. the current executable is ocamlrun), argv[0] is - tried first, as this should be the path to shebang-script/executable - originally executed by the user. */ - CAMLassert(caml_byte_program_mode != EMBEDDED); - if (caml_byte_program_mode != APPENDED || proc_self_exe == NULL) { - exe_name = argv[0]; - fd = caml_attempt_open(&exe_name, &trail, 0); - } - - /* Little grasshopper wonders why we do that at all, since - "The current executable is ocamlrun itself, it's never a bytecode - program". Little grasshopper "ocamlc -custom" in mind should keep. - With -custom, we have an executable that is ocamlrun itself - concatenated with the bytecode. So, if the attempt with argv[0] - failed, it is worth trying again with executable_name. */ - if (caml_byte_program_mode == APPENDED || fd < 0) { - if (proc_self_exe != NULL) { - exe_name = proc_self_exe; - fd = caml_attempt_open(&exe_name, &trail, 0); - } - if (fd < 0 && caml_byte_program_mode == APPENDED) - error("unable to open file '%s'", caml_stat_strdup_of_os(exe_name)); - } - - if (argv0 == NULL) - argv0 = caml_search_exe_in_path(exe_name); + /* caml_shared_libs_path is used by parse_command_line */ + caml_ext_table_init(&caml_shared_libs_path, 8); - if (fd < 0) { - pos = parse_command_line(argv); - if (caml_params->print_config) { - caml_runtime_standard_library_effective = - caml_locate_standard_library(argv0, - caml_runtime_standard_library_default, - NULL); + char_os *str_fd = NULL; + /* -custom executables do not inspect __OCAML_EXEC_FD */ + if (caml_byte_program_mode != APPENDED) + str_fd = caml_secure_getenv(T("__OCAML_EXEC_FD")); - do_print_config(); - exit(0); + if (str_fd != NULL) { +#ifdef _WIN32 + /* On Windows, __OCAML_EXEC_FD must be exactly one wchar_t and the scalar + value of that character is the fd number */ + if (wcslen(str_fd) != 1) + error("descriptor passed via environment is invalid"); + else + fd = (int)str_fd[0]; + + DWORD len = + GetFinalPathNameByHandle((HANDLE)_get_osfhandle(fd), + NULL, 0, VOLUME_NAME_DOS); + if (len > 0) { + exe_name = caml_stat_alloc((len + 1) * sizeof(wchar_t)); + if (GetFinalPathNameByHandle((HANDLE)_get_osfhandle(fd), + exe_name, len, VOLUME_NAME_DOS) != 0) { + CAMLassert(len > 4 && exe_name[0] == '\\' && exe_name[1] == '\\' + && exe_name[2] == '?' && exe_name[3] == '\\'); + wchar_t *p, *w; + /* GetFinalPathNameByHandle always returns a string beginning \\?\ and + returns \\?\UNC\ for a UNC path. Setup p and w to copy the string + back either 4 characters (for this first case) or 6 characters (for + the second) so that \\?\C:\Foo becomes C:\Foo and \\?\UNC\Server\Foo + becomes \\Server\Foo */ + if (len >= 8 && exe_name[4] == 'U' && exe_name[5] == 'N' + && exe_name[6] == 'C' && exe_name[7] == '\\') { + p = exe_name + 8; + w = exe_name + 2; + } else { + p = exe_name + 4; + w = exe_name; + } + while ((*w++ = *p++)); + } else { + error("descriptor passed via environment is invalid"); + } + } else { + error("descriptor passed via environment is invalid"); } - if (argv[pos] == 0) { - error("no bytecode file specified"); +#else + int offset; + if (sscanf_os(str_fd, T("%u,%n"), &fd, &offset) <= 0) + error("descriptor passed via environment is invalid"); + exe_name = caml_stat_strdup(str_fd + offset); +#endif + int err = caml_read_trailer(fd, &trail); + if (err != 0) { + close(fd); + CAML_GC_MESSAGE(STARTUP, "Descriptor is not a bytecode image\n"); + /* Termination code shared with normal startup route */ + fd = err; + } else { +#if defined(_WIN32) + _wputenv(L"__OCAML_EXEC_FD="); +#elif defined(HAS_SETENV_UNSETENV) + unsetenv("__OCAML_EXEC_FD"); +#endif + } + if (proc_self_exe == NULL) + argv0 = exe_name; + } else { + /* In APPENDED mode (i.e. with -custom), we always want to load the bytecode + from the running executable, and argv[0] should never be used. However, + some platforms still don't implement caml_executable_name, so there is an + escape hatch here to fallback to checking argv[0] if proc_self_exe is + NULL. + For STANDARD mode (i.e. the current executable is ocamlrun), argv[0] is + tried first, as this should be the path to shebang-script/executable + originally executed by the user. */ + CAMLassert(caml_byte_program_mode != EMBEDDED); + if (caml_byte_program_mode != APPENDED || proc_self_exe == NULL) { + exe_name = caml_search_exe_in_path(argv[0]); + fd = caml_attempt_open(exe_name, &trail, 0); + if (proc_self_exe == NULL) { + argv0 = exe_name; + if (fd < 0) + tofree = exe_name; + } else if (fd < 0) { + caml_stat_free(exe_name); + } } - exe_name = argv[pos]; - fd = caml_attempt_open(&exe_name, &trail, 1); - switch(fd) { - case FILE_NOT_FOUND: - error("cannot find file '%s'", - caml_stat_strdup_of_os(argv[pos])); - break; - case BAD_BYTECODE: - error( - "the file '%s' is not a bytecode executable file", - caml_stat_strdup_of_os(exe_name)); - break; - case WRONG_MAGIC: - error( - "the file '%s' has not the right magic number: "\ - "expected %s, got %s", - caml_stat_strdup_of_os(exe_name), - EXEC_MAGIC, - magicstr); - break; + + /* Little grasshopper wonders why we do that at all, since + "The current executable is ocamlrun itself, it's never a bytecode + program". Little grasshopper "ocamlc -custom" in mind should keep. + With -custom, we have an executable that is ocamlrun itself + concatenated with the bytecode. So, if the attempt with argv[0] + failed, it is worth trying again with executable_name. */ + if (caml_byte_program_mode == APPENDED || fd < 0) { + if (proc_self_exe != NULL) { + exe_name = proc_self_exe; + fd = caml_attempt_open(exe_name, &trail, 0); + } + if (fd < 0 && caml_byte_program_mode == APPENDED) + error("unable to open file '%s'", caml_stat_strdup_of_os(exe_name)); + } + + if (fd < 0) { + pos = + parse_command_line(argv, + &trace_level, &backtrace_enabled, &event_trace); + if (print_config) { + caml_runtime_standard_library_effective = + caml_locate_standard_library(argv0, + caml_runtime_standard_library_default, + NULL); + + do_print_config(); + exit(0); + } + if (argv[pos] == 0) { + error("no bytecode file specified"); + } + exe_name = caml_search_exe_in_path(argv[pos]); + fd = caml_attempt_open(exe_name, &trail, 1); } } + + params->trace_level += trace_level; + if (backtrace_enabled) + params->backtrace_enabled = 1; + if (event_trace) + params->event_trace = 1; + switch(fd) { + case FILE_NOT_FOUND: + error("cannot find file '%s'", + caml_stat_strdup_of_os(exe_name)); + break; + case BAD_BYTECODE: + error( + "the file '%s' is not a bytecode executable file", + caml_stat_strdup_of_os(exe_name)); + break; + case WRONG_MAGIC: + error( + "the file '%s' has not the right magic number: "\ + "expected %s, got %s", + caml_stat_strdup_of_os(exe_name), + EXEC_MAGIC, + magicstr); + break; + } + /* Read the table of contents (section descriptors) */ caml_read_section_descriptors(fd, &trail); + /* If caml_executable_ocamlrunparam was set, don't also process ORUN */ + if (!caml_executable_ocamlrunparam) { + /* Load the embedded runtime parameters */ + char_os *orun = read_section_to_os(fd, &trail, "ORUN"); + /* Re-parse options, taking these defaults into account (see note when + caml_parse-ocamlrunparam was previously called in this function) */ + if (orun != NULL) { + int pooling = caml_params->cleanup_on_exit; + caml_executable_ocamlrunparam = orun; + caml_parse_ocamlrunparam(); + + /* caml_parse_ocamlrunparam resets the params fields: re-apply the three + which are affected by command-line parsing. */ + params->trace_level += trace_level; + if (backtrace_enabled) + params->backtrace_enabled = 1; + if (event_trace) + params->event_trace = 1; + + /* c=1 was specified in ORUN, but c not included in OCAMLRUNPARAM */ + if (caml_params->cleanup_on_exit && !pooling) { + /* In order to re-start with pooling, everything which has been + allocated with caml_stat_alloc (i.e. malloc) must be passed to + caml_stat_free (i.e. free) and then reallocated */ + char_os *old_proc_self_exe = NULL; + char_os *old_exe_name = strdup_os(exe_name); + if (proc_self_exe) + old_proc_self_exe = strdup_os(proc_self_exe); + int search_path_size = caml_shared_libs_path.size; + char_os **search_path = + (char_os **)malloc(sizeof(char_os *) * search_path_size); + if (search_path) + memcpy(search_path, caml_shared_libs_path.contents, + sizeof(char_os *) * search_path_size); + else + search_path_size = 0; + + /* caml_stat_free everything which is currently allocated */ + caml_stat_free(orun); + caml_stat_free(proc_self_exe); + caml_stat_free(exe_name); + caml_stat_free(trail.section); + caml_ext_table_free(&caml_shared_libs_path, 0); + + /* Enable pooling */ + caml_stat_create_pool(); + + /* Re-initialise state with pooled memory */ + if (old_proc_self_exe) { + proc_self_exe = caml_stat_strdup_os(old_proc_self_exe); + free(old_proc_self_exe); + } + exe_name = caml_stat_strdup_os(old_exe_name); + free(old_exe_name); + + /* Re-read the table of contents (section descriptors) */ + caml_read_section_descriptors(fd, &trail); + caml_executable_ocamlrunparam = read_section_to_os(fd, &trail, "ORUN"); + + /* Re-initialise caml_shared_libs_path */ + caml_ext_table_init(&caml_shared_libs_path, 8); + for (int i = 0; i < search_path_size; i++) { + caml_ext_table_add(&caml_shared_libs_path, search_path[i]); + } + free(search_path); + } + } + } + caml_runtime_standard_library_effective = caml_locate_standard_library(argv0, caml_runtime_standard_library_default, NULL); - if (argv0 != proc_self_exe) - caml_stat_free(argv0); + caml_stat_free(tofree); /* Load the embedded overridden caml_standard_library_default value, if one is - available. Note that although -custom executables come through this - mechanism, they don't define OSLD sections because - caml_runtime_standard_library_default and caml_standard_library_default are - fundamentally equal and caml_runtime_standard_library_default is set when - the -custom executable is linked. */ + available. This value is set _after_ caml_standard_library_effective has + been set via caml_locate_standard_library, because ocamlrun must use the + value it was configured with. For -custom executables, the value is the + same (albeit irrelevantly) - they specify caml_standard_library_default via + the primitives object, rather than via the OSLD section. */ char_os *image_standard_library_default = read_section_to_os(fd, &trail, "OSLD"); if (image_standard_library_default != NULL) caml_standard_library_default = image_standard_library_default; + caml_init_codefrag(); + + caml_init_locale(); + caml_init_custom_operations(); + caml_init_os_params(); + /* Initialize the abstract machine */ caml_init_gc (); @@ -601,12 +753,19 @@ CAMLexport void caml_main(char_os **argv) /* Build the table of primitives */ shared_lib_path = read_section_to_os(fd, &trail, "DLPT"); shared_libs = read_section_to_os(fd, &trail, "DLLS"); +#ifndef SUPPORT_DYNAMIC_LINKING + if (shared_libs != NULL) + error( + "the file '%s' requires shared libraries to be loaded, which this " + "runtime does not support", + caml_stat_strdup_of_os(exe_name)); +#endif req_prims = read_section(fd, &trail, "PRIM"); if (req_prims == NULL) caml_fatal_error("no PRIM section"); caml_build_primitive_table(shared_lib_path, shared_libs, req_prims); + /* caml_build_primitive_table is responsible for freeing req_prims */ caml_stat_free(shared_lib_path); caml_stat_free(shared_libs); - caml_stat_free(req_prims); /* Load the globals */ caml_seek_section(fd, &trail, "DATA"); chan = caml_open_descriptor_in(fd); @@ -621,7 +780,7 @@ CAMLexport void caml_main(char_os **argv) /* ensure all globals are in major heap */ caml_minor_collection(); #ifdef _WIN32 - /* Start a thread to handle signals */ + /* Start a thread to handle signals - used by ocamlbrowser */ if (caml_secure_getenv(T("CAMLSIGPIPE"))) _beginthread(caml_signal_thread, 4096, NULL); #endif @@ -652,6 +811,10 @@ CAMLexport value caml_startup_code_exn( char_os * exe_name, * proc_self_exe; value res; +#ifdef _MSC_VER + caml_install_invalid_parameter_handler(); +#endif + /* Determine options */ caml_parse_ocamlrunparam(); @@ -663,9 +826,6 @@ CAMLexport value caml_startup_code_exn( caml_init_codefrag(); caml_init_locale(); -#ifdef _MSC_VER - caml_install_invalid_parameter_handler(); -#endif caml_init_custom_operations(); caml_init_os_params(); caml_ext_table_init(&caml_shared_libs_path, 8); diff --git a/runtime/unix.c b/runtime/unix.c index 68a53db8441e..2f64ee32f084 100644 --- a/runtime/unix.c +++ b/runtime/unix.c @@ -57,9 +57,6 @@ #ifdef HAS_LIBGEN_H #include #endif -#ifdef __APPLE__ -#include -#endif #ifdef HAS_SYS_MMAN_H #include #endif @@ -125,9 +122,15 @@ caml_stat_string caml_decompose_path(struct ext_table * tbl, char * path) if (path == NULL) return NULL; p = caml_stat_strdup(path); q = p; + while (1) { + /* Skip any prefixing colons */ + while (*q == ':') + q++; + /* Find the end of this entry */ for (n = 0; q[n] != 0 && q[n] != ':'; n++) /*nothing*/; - caml_ext_table_add(tbl, q); + if (n > 0) + caml_ext_table_add(tbl, q); q = q + n; if (*q == 0) break; *q = 0; @@ -157,81 +160,29 @@ caml_stat_string caml_search_in_path(struct ext_table * path, const char * name) return caml_stat_strdup(name); } -#ifdef __CYGWIN__ - -/* Cygwin needs special treatment because of the implicit ".exe" at the - end of executable file names */ - -static int cygwin_file_exists(const char * name) -{ - int fd, ret; - struct stat st; - /* Cannot use stat() here because it adds ".exe" implicitly */ - fd = open(name, O_RDONLY); - if (fd == -1) return 0; - ret = fstat(fd, &st); - close(fd); - return ret == 0 && S_ISREG(st.st_mode); -} - -static caml_stat_string cygwin_search_exe_in_path(struct ext_table * path, - const char * name) -{ - char * dir, * fullname; - for (const char *p = name; *p != 0; p++) { - if (*p == '/' || *p == '\\') goto not_found; - } - for (int i = 0; i < path->size; i++) { - dir = path->contents[i]; - if (dir[0] == 0) dir = "."; /* empty path component = current dir */ - fullname = caml_stat_strconcat(3, dir, "/", name); - if (cygwin_file_exists(fullname)) return fullname; - caml_stat_free(fullname); - fullname = caml_stat_strconcat(4, dir, "/", name, ".exe"); - if (cygwin_file_exists(fullname)) return fullname; - caml_stat_free(fullname); - } - not_found: - if (cygwin_file_exists(name)) return caml_stat_strdup(name); - fullname = caml_stat_strconcat(2, name, ".exe"); - if (cygwin_file_exists(fullname)) return fullname; - caml_stat_free(fullname); - return caml_stat_strdup(name); -} - -#endif - caml_stat_string caml_search_exe_in_path(const char * name) { - struct ext_table path; - char * tofree; - caml_stat_string res; - - caml_ext_table_init(&path, 8); - tofree = caml_decompose_path(&path, getenv("PATH")); -#ifndef __CYGWIN__ - res = caml_search_in_path(&path, name); -#else - res = cygwin_search_exe_in_path(&path, name); -#endif - caml_stat_free(tofree); - caml_ext_table_free(&path, 0); + /* caml_search_in_system_path treats PATH being "Set But Null" (i.e. equal to + "") as being equivalent to being set to "." */ + caml_stat_string res = caml_search_in_system_path(name); + if (res == NULL) + res = caml_stat_strdup(name); return res; } -caml_stat_string caml_search_dll_in_path(struct ext_table * path, - const char * name) +/* Primitive is defined here rather than sys.c as otherwise sys.c would need + duplicating for libcamlrun_non_shared */ +CAMLprim value caml_sys_const_shared_libraries(value unit) { - caml_stat_string dllname; - caml_stat_string res; - - dllname = caml_stat_strconcat(2, name, ".so"); - res = caml_search_in_path(path, dllname); - caml_stat_free(dllname); - return res; +#ifdef WITH_DYNAMIC_LINKING + return Val_true; +#else + return Val_false; +#endif } #ifdef WITH_DYNAMIC_LINKING + #ifdef __CYGWIN__ /* Use flexdll */ @@ -355,57 +306,6 @@ CAMLexport int caml_read_directory(char * dirname, struct ext_table * contents) return 0; } -/* Recover executable name from /proc/self/exe if possible */ - -char * caml_executable_name(void) -{ -#if defined(__linux__) - int namelen, retcode; - char * name; - struct stat st; - - /* lstat("/proc/self/exe") returns st_size == 0 so we cannot use it - to determine the size of the buffer. Instead, we guess and adjust. */ - namelen = 256; - while (1) { - name = caml_stat_alloc(namelen); - retcode = readlink("/proc/self/exe", name, namelen); - if (retcode == -1) { caml_stat_free(name); return NULL; } - if (retcode < namelen) break; - caml_stat_free(name); - if (namelen >= 1024*1024) return NULL; /* avoid runaway and overflow */ - namelen *= 2; - } - /* readlink() does not zero-terminate its result. - There is room for a final zero since retcode < namelen. */ - name[retcode] = 0; - /* Make sure that the contents of /proc/self/exe is a regular file. - (Old Linux kernels return an inode number instead.) */ - if (stat(name, &st) == -1 || ! S_ISREG(st.st_mode)) { - caml_stat_free(name); return NULL; - } - return name; - -#elif defined(__APPLE__) - unsigned int namelen; - char * name; - - namelen = 256; - name = caml_stat_alloc(namelen); - if (_NSGetExecutablePath(name, &namelen) == 0) return name; - caml_stat_free(name); - /* Buffer is too small, but namelen now contains the size needed */ - name = caml_stat_alloc(namelen); - if (_NSGetExecutablePath(name, &namelen) == 0) return name; - caml_stat_free(name); - return NULL; - -#else - return NULL; - -#endif -} - char *caml_secure_getenv (char const *var) { #ifdef HAS_SECURE_GETENV diff --git a/runtime/unix_executable.c b/runtime/unix_executable.c new file mode 100644 index 000000000000..567bf3b66d76 --- /dev/null +++ b/runtime/unix_executable.c @@ -0,0 +1,109 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ +/* */ +/* Copyright 1998 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +/* caml_search_in_system_path and caml_executable_name live here as they're + shared with ../stdlib/header.c */ + +#define CAML_INTERNALS +#include "caml/memory.h" + +#ifdef __APPLE__ +#include +#endif + +#include +#include +#include + +caml_stat_string caml_search_in_system_path(const char * name) +{ + char * fullname; + char * path; + struct stat st; + size_t len = 0; + + for (char *p = (char *)name, len = 0; *p != 0; p++, len++) { + if (*p == '/') return NULL; + } + if ((path = getenv("PATH")) == NULL) return NULL; + /* len is now strlen(name) + strlen(path) + separator + terminator */ + len += strlen(path) + 2; + if ((fullname = (char *)caml_stat_alloc(len)) == NULL) return NULL; + while(1) { + char * p; + for (p = fullname; *path != 0 && *path != ':'; p++, path++) + if (p < fullname + len) *p = *path; + if (p != fullname && p < fullname + len) + *p++ = '/'; + for (char *q = (char *)name; *q != 0; p++, q++) + if (p < fullname + len) *p = *q; + *p = 0; + if (stat(fullname, &st) == 0 && S_ISREG(st.st_mode)) break; + if (*path == 0) return NULL; + path++; + } + return fullname; +} + +/* Recover executable name from /proc/self/exe if possible */ + +char * caml_executable_name(void) +{ +#if defined(__linux__) + int namelen, retcode; + char * name; + struct stat st; + + /* lstat("/proc/self/exe") returns st_size == 0 so we cannot use it + to determine the size of the buffer. Instead, we guess and adjust. */ + namelen = 256; + while (1) { + name = caml_stat_alloc(namelen); + retcode = readlink("/proc/self/exe", name, namelen); + if (retcode == -1) { caml_stat_free(name); return NULL; } + if (retcode < namelen) break; + caml_stat_free(name); + if (namelen >= 1024*1024) return NULL; /* avoid runaway and overflow */ + namelen *= 2; + } + /* readlink() does not zero-terminate its result. + There is room for a final zero since retcode < namelen. */ + name[retcode] = 0; + /* Make sure that the contents of /proc/self/exe is a regular file. + (Old Linux kernels return an inode number instead.) */ + if (stat(name, &st) == -1 || ! S_ISREG(st.st_mode)) { + caml_stat_free(name); return NULL; + } + return name; + +#elif defined(__APPLE__) + unsigned int namelen; + char * name; + + namelen = 256; + name = caml_stat_alloc(namelen); + if (_NSGetExecutablePath(name, &namelen) == 0) return name; + caml_stat_free(name); + /* Buffer is too small, but namelen now contains the size needed */ + name = caml_stat_alloc(namelen); + if (_NSGetExecutablePath(name, &namelen) == 0) return name; + caml_stat_free(name); + return NULL; + +#else + return NULL; + +#endif +} diff --git a/runtime/win32.c b/runtime/win32.c index 346db16a5305..19b3000954c1 100644 --- a/runtime/win32.c +++ b/runtime/win32.c @@ -141,8 +141,12 @@ wchar_t * caml_decompose_path(struct ext_table * tbl, wchar_t * path) p = caml_stat_wcsdup(path); q = p; while (1) { + /* Don't include blank entries */ + while (*q == ';') + q++; for (n = 0; q[n] != 0 && q[n] != L';'; n++) /*nothing*/; - caml_ext_table_add(tbl, q); + if (n > 0) + caml_ext_table_add(tbl, q); q = q + n; if (*q == 0) break; *q = 0; @@ -210,15 +214,15 @@ CAMLexport wchar_t * caml_search_exe_in_path(const wchar_t * name) } } -wchar_t * caml_search_dll_in_path(struct ext_table * path, const wchar_t * name) +/* Primitive is defined here rather than sys.c as otherwise sys.c would need + duplicating for libcamlrun_non_shared */ +CAMLprim value caml_sys_const_shared_libraries(value unit) { - wchar_t * dllname; - wchar_t * res; - - dllname = caml_stat_wcsconcat(2, name, L".dll"); - res = caml_search_in_path(path, dllname); - caml_stat_free(dllname); - return res; +#ifdef WITH_DYNAMIC_LINKING + return Val_true; +#else + return Val_false; +#endif } #ifdef WITH_DYNAMIC_LINKING @@ -451,7 +455,10 @@ CAMLexport int caml_read_directory(wchar_t * dirname, #ifndef NATIVE_CODE -/* Set up a new thread for control-C emulation and termination */ +/* Set up a new thread for control-C emulation and termination. This mechanism + is used by the ocamlbrowser program which was part of the OCaml distribution + until OCaml 4.02 but now lives at https://github.com/garrigue/labltk. The + mechanism isn't known to be used by any other software. */ void caml_signal_thread(void * lpParam) { diff --git a/stdlib/.depend b/stdlib/.depend index d6c5987c0a09..c4c054ffe600 100644 --- a/stdlib/.depend +++ b/stdlib/.depend @@ -405,7 +405,6 @@ stdlib__Gc.cmi : gc.mli \ stdlib__Printexc.cmi stdlib__Hashtbl.cmo : hashtbl.ml \ stdlib__Sys.cmi \ - stdlib__String.cmi \ stdlib__Seq.cmi \ stdlib__Random.cmi \ stdlib__Obj.cmi \ @@ -416,7 +415,6 @@ stdlib__Hashtbl.cmo : hashtbl.ml \ stdlib__Hashtbl.cmi stdlib__Hashtbl.cmx : hashtbl.ml \ stdlib__Sys.cmx \ - stdlib__String.cmx \ stdlib__Seq.cmx \ stdlib__Random.cmx \ stdlib__Obj.cmx \ diff --git a/stdlib/Makefile b/stdlib/Makefile index 22614ad0372d..ecab2d587d2c 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -14,6 +14,7 @@ #************************************************************************** ROOTDIR = .. +SUBDIR_NAME = stdlib # NOTE: it is important that the OCAMLDEP variable is defined *before* # Makefile.common gets included, so that its local definition here # take precedence over its general shared definitions in Makefile.common. @@ -47,6 +48,9 @@ endif OPTCOMPILER=$(ROOTDIR)/ocamlopt$(EXE) CAMLOPT=$(OCAMLRUN) $(OPTCOMPILER) +# At present, only META is installed to the package directory +LIBNAME = stdlib + include StdlibModules OBJS=$(addsuffix .cmo,$(STDLIB_MODULES)) @@ -60,29 +64,21 @@ all: stdlib.cma std_exit.cmo $(HEADER_NAME) allopt: stdlib.cmxa std_exit.cmx opt.opt: allopt -INSTALL_STDLIB_META_DIR=$(DESTDIR)$(LIBDIR)/stdlib - .PHONY: install install:: - $(INSTALL_DATA) \ - stdlib.cma std_exit.cmo *.cmi "$(INSTALL_LIBDIR)" - $(MKDIR) "$(INSTALL_STDLIB_META_DIR)" - $(INSTALL_DATA) META "$(INSTALL_STDLIB_META_DIR)" + $(call INSTALL_ITEMS, stdlib.cma std_exit.cmo *.cmi, lib) + $(call INSTALL_ITEMS, META, lib, $(LIBNAME)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - *.cmt *.cmti *.mli *.ml *.ml.in \ - "$(INSTALL_LIBDIR)" + $(call INSTALL_ITEMS, *.cmt *.cmti *.mli *.ml *.ml.in, lib) endif - $(INSTALL_DATA) $(HEADER_NAME) "$(INSTALL_LIBDIR)/$(HEADER_NAME)" + $(call INSTALL_ITEMS, $(HEADER_NAME), lib) .PHONY: installopt installopt: installopt-default .PHONY: installopt-default installopt-default: - $(INSTALL_DATA) \ - stdlib.cmxa stdlib.$(A) std_exit.$(O) *.cmx \ - "$(INSTALL_LIBDIR)" + $(call INSTALL_ITEMS, stdlib.cmxa stdlib.$(A) std_exit.$(O) *.cmx, lib) MANGLING = $(filter true,$(SUFFIXING)) runtime-launch-info: tmpheader.exe @@ -122,7 +118,7 @@ HEADERLIBS = endif .INTERMEDIATE: tmpheader.exe -tmpheader.exe: header.$(O) +tmpheader.exe: header.$(O) libcamlrun.$(A) $(V_MKEXE)$(call MKEXE_VIA_CC,$@,$^ $(HEADERLIBS)) # Do not strip the header produced by cl ifneq "$(TOOLCHAIN)" "msvc" diff --git a/stdlib/filename.ml b/stdlib/filename.ml index c1af643c883d..9739d0ab731c 100644 --- a/stdlib/filename.ml +++ b/stdlib/filename.ml @@ -102,7 +102,9 @@ module Unix : SYSDEPS = struct String.ends_with ~suffix:suff name let temp_dir_name = - try Sys.getenv "TMPDIR" with Not_found -> "/tmp" + match Sys.getenv_opt "TMPDIR" with + | None | Some "" -> "/tmp" + | Some dir -> dir let quote = generic_quote "'\\''" let quote_command cmd ?stdin ?stdout ?stderr args = String.concat " " (List.map quote (cmd :: args)) diff --git a/stdlib/hashtbl.ml b/stdlib/hashtbl.ml index dee5f0fe94ba..363b0d5c0dd7 100644 --- a/stdlib/hashtbl.ml +++ b/stdlib/hashtbl.ml @@ -46,15 +46,24 @@ let flip_ongoing_traversal h = (* To pick random seeds if requested *) -let randomized_default = - let params = - try Sys.getenv "OCAMLRUNPARAM" with Not_found -> - try Sys.getenv "CAMLRUNPARAM" with Not_found -> "" in - String.contains params 'R' +(* The runtime stores the initial value of "R" in caml_runtime_randomized. We + choose to copy this initial value here and then keep then in sync in order to + avoid adding a C call to every call to Hashtbl.create. *) +external randomized : unit -> bool = "caml_runtime_is_randomized" [@@noalloc] +let randomized = Atomic.make (randomized ()) + +external randomize : unit -> unit = "caml_runtime_randomize" [@@noalloc] +let randomize () = + Atomic.set randomized true; + (* Update the runtime's value so that the result from Sys.runtime_parameters + includes "R". There is technically a race here where Hashtbl.create () + creates randomized hash tables, but Sys.runtime_parameters doesn't yet + return R=1. We choose not to care - Hashtbl.is_randomized will always + return the correct value, and making Sys.runtime_parameters always be in + sync would either add a C call to every Hashtbl.create call or would + introduce a complicated dependency cycle between Sys and Hashtbl *) + randomize () -let randomized = Atomic.make randomized_default - -let randomize () = Atomic.set randomized true let is_randomized () = Atomic.get randomized let prng_key = Domain.DLS.new_key Random.State.make_self_init diff --git a/stdlib/header.c b/stdlib/header.c index e42f9f78d513..7603098628ae 100644 --- a/stdlib/header.c +++ b/stdlib/header.c @@ -39,22 +39,49 @@ typedef wchar_t * argv_t; #define ITOT(i) ITOL(i) #define PATH_NAME L"%Path%" +/* The header is written to be able to cope with paths greater than MAX_PATH, + so undefine it to stop it being used in error. */ +#undef MAX_PATH + +#if defined(__MINGW32__) && defined(PATH_MAX) +/* mingw-w64 has a limits.h which defines PATH_MAX as an alias for MAX_PATH */ +#undef PATH_MAX +#endif + #if WINDOWS_UNICODE #define CP CP_UTF8 -/* The characters in RNTM will be converted from UTF-8 to UTF-16. Parasitically, - there could be 4 bytes in RNTM for every wchar_t in the actual value. */ -#define RNTM_ENCODING_LENGTH 4 #else #define CP CP_ACP #endif -/* mingw-w64 has a limits.h which defines PATH_MAX as an alias for MAX_PATH */ -#if !defined(PATH_MAX) -#define PATH_MAX MAX_PATH +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif + +#if __has_attribute(fallthrough) + #define fallthrough __attribute__ ((fallthrough)) +#else + #define fallthrough ((void) 0) #endif +/* The maximum representable path for any API function, after internal expansion + of \\?\ etc. is 32767 characters. PATH_MAX includes the terminator. */ +#define PATH_MAX 0x8000 + +/* Initialised as the first statement of wmainCRTStartup */ +static HANDLE hProcessHeap; + +#define malloc(size) HeapAlloc(hProcessHeap, 0, (size)) +#define free(memblock) HeapFree(hProcessHeap, 0, (memblock)) + #define SEEK_END FILE_END +/* Initialised as the first statement of wmainCRTStartup */ +static HANDLE hProcessHeap; + +#define malloc(size) HeapAlloc(hProcessHeap, 0, (size)) +#define free(memblock) HeapFree(hProcessHeap, 0, (memblock)) + #define lseek(h, offset, origin) SetFilePointer((h), (offset), NULL, (origin)) typedef HANDLE file_descriptor; @@ -76,54 +103,51 @@ static BOOL WINAPI ctrl_handler(DWORD event) return FALSE; } -static int exec_file(wchar_t *file, wchar_t *cmdline) +static int exec_file(wchar_t *file, wchar_t *cmdline, STARTUPINFO *stinfo) { - wchar_t truename[MAX_PATH]; - STARTUPINFO stinfo; + LPWSTR truename = (LPWSTR)malloc(PATH_MAX * sizeof(WCHAR)); PROCESS_INFORMATION procinfo; - DWORD retcode; + DWORD retcode = ENOMEM; - if (SearchPath(NULL, file, L".exe", sizeof(truename)/sizeof(wchar_t), - truename, NULL)) { + if (truename && SearchPath(NULL, file, L".exe", PATH_MAX, truename, NULL)) { /* Need to ignore ctrl-C and ctrl-break, otherwise we'll die and take the underlying OCaml program with us! */ SetConsoleCtrlHandler(ctrl_handler, TRUE); - stinfo.cb = sizeof(stinfo); - stinfo.lpReserved = NULL; - stinfo.lpDesktop = NULL; - stinfo.lpTitle = NULL; - stinfo.dwFlags = 0; - stinfo.cbReserved2 = 0; - stinfo.lpReserved2 = NULL; if (CreateProcess(truename, cmdline, NULL, NULL, TRUE, 0, NULL, NULL, - &stinfo, &procinfo)) { + stinfo, &procinfo)) { + free(truename); CloseHandle(procinfo.hThread); WaitForSingleObject(procinfo.hProcess, INFINITE); GetExitCodeProcess(procinfo.hProcess, &retcode); CloseHandle(procinfo.hProcess); ExitProcess(retcode); } else { - return ENOEXEC; + retcode = ENOEXEC; } } else { - return ENOENT; + retcode = ENOENT; } + + free(truename); + + return retcode; } static void write_error(const wchar_t *wstr, HANDLE hOut) { DWORD consoleMode, numwritten, len; - char str[MAX_PATH]; + char *str; if (GetConsoleMode(hOut, &consoleMode) != 0) { /* The output stream is a Console */ WriteConsole(hOut, wstr, lstrlen(wstr), &numwritten, NULL); } else { /* The output stream is redirected */ - len = - WideCharToMultiByte(CP, 0, wstr, lstrlen(wstr), str, sizeof(str), - NULL, NULL); - WriteFile(hOut, str, len, &numwritten, NULL); + len = WideCharToMultiByte(CP, 0, wstr, -1, NULL, 0, NULL, NULL); + str = (char *)malloc(len); + WideCharToMultiByte(CP, 0, wstr, -1, str, len, NULL, NULL); + /* len includes the terminator */ + WriteFile(hOut, str, len - 1, &numwritten, NULL); } } @@ -153,7 +177,6 @@ NORETURN static void exit_with_error(const wchar_t *wstr1, #include #endif #include -#include /* O_BINARY is defined in Gnulib, but is not POSIX */ #ifndef O_BINARY @@ -180,87 +203,18 @@ typedef char ** argv_t; #define unsafe_copy(dst, src, dstsize) strcpy(dst, src) #endif -#ifndef __CYGWIN__ - -/* Normal Unix search path function */ - -static char * searchpath(char * name) -{ - static char fullname[PATH_MAX + 1]; - char * path; - struct stat st; - - for (char *p = name; *p != 0; p++) { - if (*p == '/') return name; - } - path = getenv("PATH"); - if (path == NULL) return name; - while(1) { - char * p; - for (p = fullname; *path != 0 && *path != ':'; p++, path++) - if (p < fullname + PATH_MAX) *p = *path; - if (p != fullname && p < fullname + PATH_MAX) - *p++ = '/'; - for (char *q = name; *q != 0; p++, q++) - if (p < fullname + PATH_MAX) *p = *q; - *p = 0; - if (stat(fullname, &st) == 0 && S_ISREG(st.st_mode)) break; - if (*path == 0) return name; - path++; - } - return fullname; -} - -#else - -/* Special version for Cygwin32: takes care of the ".exe" implicit suffix */ - -static int file_ok(char * name) +/* caml_search_in_system_path uses caml_stat_alloc and caml_executable_name also + uses caml_stat_free */ +void *caml_stat_alloc(size_t size) { - int fd; - /* Cannot use stat() here because it adds ".exe" implicitly */ - fd = open(name, O_RDONLY); - if (fd == -1) return 0; - close(fd); - return 1; + return malloc(size); } -static char * searchpath(char * name) +void caml_stat_free(void *ptr) { - char * path, * fullname; - - path = getenv("PATH"); - fullname = malloc(strlen(name) + (path == NULL ? 0 : strlen(path)) + 6); - /* 6 = "/" plus ".exe" plus final "\0" */ - if (fullname == NULL) return name; - /* Check for absolute path name */ - for (char *p = name; *p != 0; p++) { - if (*p == '/' || *p == '\\') { - if (file_ok(name)) return name; - strcpy(fullname, name); - strcat(fullname, ".exe"); - if (file_ok(fullname)) return fullname; - return name; - } - } - /* Search in path */ - if (path == NULL) return name; - while(1) { - char * p; - for (p = fullname; *path != 0 && *path != ':'; p++, path++) *p = *path; - if (p != fullname) *p++ = '/'; - strcpy(p, name); - if (file_ok(fullname)) return fullname; - strcat(fullname, ".exe"); - if (file_ok(fullname)) return fullname; - if (*path == 0) break; - path++; - } - return name; + free(ptr); } -#endif - NORETURN static void exit_with_error(const char *str1, const char *str2, const char *str3) @@ -272,7 +226,7 @@ NORETURN static void exit_with_error(const char *str1, exit(2); } -static int exec_file(const char *file, char * const argv[]) +static int exec_file(const char *file, char * const argv[], void *_stinfo) { return (execvp(file, argv) == -1 ? errno : 0); } @@ -292,14 +246,10 @@ static uint32_t read_size(const char *ptr) ((uint32_t) p[2] << 8) | p[3]; } -#ifndef RNTM_ENCODING_LENGTH -#define RNTM_ENCODING_LENGTH 1 -#endif - static char * read_runtime_path(file_descriptor fd, uint32_t *rntm_strlen) { char buffer[TRAILER_SIZE]; - static char runtime_path[PATH_MAX * RNTM_ENCODING_LENGTH]; + char *runtime_path; int num_sections; long ofs; @@ -319,12 +269,10 @@ static char * read_runtime_path(file_descriptor fd, uint32_t *rntm_strlen) ofs += read_size(buffer + 4); } if (*rntm_strlen == 0) return NULL; - /* The last character of runtime_path must be '\0', so RNTM must be strictly - less than PATH_MAX */ - if (*rntm_strlen >= PATH_MAX * RNTM_ENCODING_LENGTH) return NULL; + if ((runtime_path = (char *)malloc(*rntm_strlen + 1)) == NULL) return NULL; if (lseek(fd, -ofs, SEEK_END) == -1) return NULL; if (read(fd, runtime_path, *rntm_strlen) != *rntm_strlen) return NULL; - + runtime_path[*rntm_strlen] = 0; return runtime_path; } @@ -335,7 +283,8 @@ static char * read_runtime_path(file_descriptor fd, uint32_t *rntm_strlen) Decode rntm and search for a runtime (using argv0_dirname if non-NULL and required) and exec the first runtime found passing argv. */ NORETURN void search_and_exec_runtime(char_os *rntm, uint32_t rntm_bsz, - argv_t argv, char_os *argv0_dirname) + argv_t argv, char_os *argv0_dirname, + void *stinfo) { /* rntm_end points to the NUL "terminator" of rntm (_not_ the last character of the RNTM section */ @@ -356,7 +305,7 @@ NORETURN void search_and_exec_runtime(char_os *rntm, uint32_t rntm_bsz, NUL-terminated full path we can attempt to exec. */ if (rntm_bindir_end != rntm_end) *rntm_bindir_end = Directory_separator_character; - int status = exec_file(rntm, argv); + int status = exec_file(rntm, argv, stinfo); /* exec failed. For Disable mode, there's nothing else to be tried. For Fallback, if the failure was for any other reason than ENOENT then there is also nothing else to be tried. */ @@ -370,7 +319,9 @@ NORETURN void search_and_exec_runtime(char_os *rntm, uint32_t rntm_bsz, /* Searching takes place first in the directory containing this executable, if it's known. */ if (argv0_dirname != NULL) { - char_os root[PATH_MAX]; + char_os *root = (char_os *)malloc((PATH_MAX + 1) * sizeof(char_os)); + if (root == NULL) + exit_with_error(T("Out of memory"), NULL, NULL); unsafe_copy(root, argv0_dirname, PATH_MAX); /* Ensure root ends with a directory separator. root_basename points to @@ -387,13 +338,13 @@ NORETURN void search_and_exec_runtime(char_os *rntm, uint32_t rntm_bsz, (rntm_end - rntm) is strlen_os(rntm). */ if ((rntm_end - rntm) <= PATH_MAX - (root_basename - root) - 1) { unsafe_copy(root_basename, rntm, PATH_MAX - (root_basename - root)); - if (exec_file(root, argv) != ENOENT) + if (exec_file(root, argv, stinfo) != ENOENT) exit_with_error(T("Cannot exec "), root, NULL); } } /* Otherwise, search in PATH */ - if (exec_file(rntm, argv) != ENOENT) + if (exec_file(rntm, argv, stinfo) != ENOENT) exit_with_error(T("Cannot exec "), rntm, NULL); } @@ -405,63 +356,189 @@ NORETURN void search_and_exec_runtime(char_os *rntm, uint32_t rntm_bsz, #ifdef _WIN32 +#undef RtlMoveMemory +void __declspec(dllimport) __stdcall RtlMoveMemory(void *Destination, + const void *Source, + size_t Length); + NORETURN void __cdecl wmainCRTStartup(void) { - wchar_t module[MAX_PATH]; - wchar_t truename[MAX_PATH]; + LPWSTR truename; + LPWSTR dirname; uint32_t rntm_strlen = 0, rntm_bsz = 0; char *runtime_path; - wchar_t wruntime_path[MAX_PATH], *dirname; + wchar_t *wruntime_path, *basename; HANDLE h; - if (GetModuleFileName(NULL, module, sizeof(module)/sizeof(wchar_t)) == 0) - exit_with_error(L"Out of memory", NULL, NULL); - - h = CreateFile(module, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, OPEN_EXISTING, 0, NULL); + hProcessHeap = GetProcessHeap(); + truename = (LPWSTR)malloc(PATH_MAX * sizeof(WCHAR)); + dirname = (LPWSTR)malloc(PATH_MAX * sizeof(WCHAR)); - /* read_runtime_path returns the actual size of RNTM, but the buffer returned - is guaranteed to have a null character following the final character of - RNTM. */ + if (truename == NULL || dirname == NULL + || GetModuleFileName(NULL, truename, PATH_MAX) == 0 + || GetFullPathName(truename, PATH_MAX, dirname, &basename) >= PATH_MAX) + exit_with_error(L"Out of memory", NULL, NULL); + /* GetFullPathName leaves basename pointing to the first character of the + basename, so setting that to NUL means the string pointed to by dirname + is the dirname of the currently running executable with a trailing + separator (although search_and_exec_runtime will check that anyway) */ + *basename = 0; + + /* Mark the HANDLE as inheritable so ocamlrun can use it */ + SECURITY_ATTRIBUTES sa; + sa.nLength = sizeof(sa); + sa.lpSecurityDescriptor = NULL; + sa.bInheritHandle = TRUE; + h = CreateFile(truename, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, + &sa, OPEN_EXISTING, 0, NULL); if (h == INVALID_HANDLE_VALUE || (runtime_path = read_runtime_path(h, &rntm_strlen)) == NULL - || (rntm_bsz = - MultiByteToWideChar(CP, 0, runtime_path, rntm_strlen + 1, - wruntime_path, - sizeof(wruntime_path)/sizeof(wchar_t))) == 0 - || GetFullPathName(module, sizeof(truename)/sizeof(wchar_t), truename, - &dirname) >= sizeof(truename)/sizeof(wchar_t)) + || (wruntime_path = + (wchar_t *)malloc((rntm_strlen + 1) * sizeof(wchar_t))) == NULL + || (rntm_bsz = MultiByteToWideChar(CP, 0, runtime_path, rntm_strlen + 1, + wruntime_path, rntm_strlen + 1)) == 0) exit_with_error(NULL, truename, L" not found or is not a bytecode executable file"); - CloseHandle(h); - - if (dirname) { - /* GetFullPathName leaves dirname pointing to the first character of the - basename, so setting that to NUL means the string pointed to by truename - is the dirname of the currently running executable with a trailing - separator (although search_and_exec_runtime will check that anyway) */ - *dirname = 0; - dirname = truename; + free(runtime_path); + free(truename); + STARTUPINFO stinfo; + /* Retrieve the existing STARTUPINFO structure - however this header was + invoked is morally how we should invoke ocamlrun, but we also need to + set-up or augment the cbReserved2 / lpReserved2 members in order to pass + the HANDLE h to ocamlrun as a CRT fd. The cloexec.ml test checks that + existing fds are passed through successfully. The use of lpReserved2 by the + CRT can be seen in the Universal CRT sources info exec/spawnv.cpp for the + code which sets the buffer up and in lowio/ioinit.cpp which reads the + buffer provided to the process. The semantics of this buffer are unchanged + since the very beginning of Windows NT. + It is a relatively well-documented "trick" to be able to pass up to 64KiB + of information to a new process using lpReserved2, on condition that the + data respects the CRT's requirements. The CRT processes lpReserved2 if it + is not NULL and if cbReserved2 is non-zero - it performs no further + checking beyond that. Applications can therefore embed additional data by + setting cbReserved2 to the actual size of lpReserved2 and simply ensuring + that the first 4 bytes pointed to by lpReserved2 are zero. + Cygwin uses this mechanism when invoking processes to allow the Cygwin DLL + to pick up the required information about the caller, amongst other things + to implement fork (it's also used as part of argument passing). + The code below must therefore cater for three cases: + 1. cbReserved2 == 0 / lpReserved2 == NULL, in which case the structure must + be created + 2. cbReserved2 > 0 but there are fewer than 3 fds in the structure, in + which case empty handles must be added so that our HANDLE is fd 3 + 3. cbReserved2 > 0 and there are already 3 or more fds in the structure, in + which case our HANDLE is appended to the end of the structure */ + GetStartupInfo(&stinfo); + + /* This header avoids the CRT to keep its size down - the Windows API doesn't + have anything sprintf-like, however, the largest fd-number fits comfortably + within a 16-bit wide character and we know that it will never be zero - the + number of the fd is therefore passed to ocamlrun as a single wide-character + string where the code-point represents the fd. + Nemo nunc te poteste servare. */ + WCHAR fd[2] = {0, 0}; + + /* Match the CRT's check - ignore the existing values if either cbReserved2 is + zero _or_ lpReserved2 is NULL */ + if (stinfo.cbReserved2 > 0 && stinfo.lpReserved2 == NULL) + stinfo.cbReserved2 = 0; + + int existing_count = 0; + /* Work out the fd number for h */ + if (stinfo.cbReserved2 > 0) { + existing_count = *(int *)stinfo.lpReserved2; + fd[0] = existing_count; + /* If there is a structure present, but it has no fds, discard it. */ + if (existing_count == 0) + stinfo.cbReserved2 = 0; + } + /* Allow for the standard handles */ + if (fd[0] < 3) + fd[0] = 3; + + WORD buffer_size = sizeof(int) + (fd[0] + 1) * (1 + sizeof(HANDLE)); + LPBYTE buffer = (LPBYTE)malloc(buffer_size); + + /* Store the total number of handles */ + *(int *)buffer = fd[0] + 1; + + /* Copy the existing flags and HANDLEs */ + if (stinfo.cbReserved2 > 0) { + RtlMoveMemory(buffer + sizeof(int), stinfo.lpReserved2 + sizeof(int), + existing_count); + RtlMoveMemory(buffer + sizeof(int) + fd[0] + 1, + stinfo.lpReserved2 + sizeof(int) + existing_count, + existing_count * sizeof(HANDLE)); + } + + /* Pointers to the next slot for flags and the next slot for a HANDLE */ + LPBYTE osflags = + buffer + sizeof(int) + existing_count; + LPHANDLE oshandles = + (LPHANDLE)(buffer + sizeof(int) + fd[0] + 1 + + existing_count * sizeof(HANDLE)); + + /* Ensure the standard fds are populated. Unrolled to prevent cl requiring the + memset intrinsic. */ + switch (existing_count) { + case 0: + *osflags++ = 0; + *oshandles++ = INVALID_HANDLE_VALUE; + fallthrough; + case 1: + *osflags++ = 0; + *oshandles++ = INVALID_HANDLE_VALUE; + fallthrough; + case 2: + *osflags++ = 0; + *oshandles++ = INVALID_HANDLE_VALUE; } - search_and_exec_runtime(wruntime_path, rntm_bsz, GetCommandLine(), dirname); + /* Add h to the structure */ + *osflags = 1; + *oshandles = h; + + stinfo.cbReserved2 = buffer_size; + stinfo.lpReserved2 = buffer; + + SetEnvironmentVariable(L"__OCAML_EXEC_FD", fd); + search_and_exec_runtime(wruntime_path, rntm_bsz, + GetCommandLine(), dirname, &stinfo); } #else +/* Borrowed from libcamlrun */ +char * caml_search_in_system_path(const char *); +char * caml_executable_name(void); + int main(int argc, char *argv[]) { char *truename, *runtime_path, *argv0_dirname; uint32_t rntm_strlen = 0; int fd; - truename = searchpath(argv[0]); + if (argc < 1) + exit_with_error("Unable to load bytecode image", NULL, NULL); + + truename = caml_executable_name(); + if (truename == NULL) truename = caml_search_in_system_path(argv[0]); + if (truename == NULL) truename = argv[0]; fd = open(truename, O_RDONLY | O_BINARY); if (fd == -1 || (runtime_path = read_runtime_path(fd, &rntm_strlen)) == NULL) exit_with_error(NULL, truename, " not found or is not a bytecode executable file"); - close(fd); + + size_t truename_len = strlen(truename); + char *value = (char *)malloc(10 + 1 + truename_len + 1); + snprintf(value, 11, "%u,", fd); + strcat(value, truename); +#ifdef HAS_SETENV_UNSETENV + setenv("__OCAML_EXEC_FD", value, 1); +#else +#error "Require a way to set environment variables" +#endif #ifdef HAS_LIBGEN_H argv0_dirname = dirname(strdup(truename)); @@ -469,11 +546,11 @@ int main(int argc, char *argv[]) argv0_dirname = NULL; #endif - argv[0] = truename; /* read_runtime_path returns the actual size of RNTM, but the buffer returned is guaranteed to have a null character following the final character of RNTM. */ - search_and_exec_runtime(runtime_path, rntm_strlen + 1, argv, argv0_dirname); + search_and_exec_runtime(runtime_path, rntm_strlen + 1, argv, argv0_dirname, + NULL); } #endif /* defined(_WIN32) */ diff --git a/testsuite/Makefile b/testsuite/Makefile index 4cad7738954a..a8302c6922dc 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -166,7 +166,7 @@ all: @$(MAKE) --no-print-directory report .PHONY: new-without-report -new-without-report: +new-without-report: | tests @rm -f $(failstamp) @($(ocamltest) -find-test-dirs tests | while $(IFS_LINE) read -r dir; do \ echo Running tests from \'$$dir\' ... ; \ @@ -185,7 +185,7 @@ check-failstamp: fi .PHONY: all-% -all-%: +all-%: | tests @for dir in tests/$**; do \ $(MAKE) --no-print-directory exec-one DIR=$$dir; \ done 2>&1 | tee $(TESTLOG) @@ -223,7 +223,7 @@ all-%: J_ARGUMENT = $(filter-out -j,$(filter -j%,$(MAKEFLAGS))) .PHONY: parallel-% -parallel-%: +parallel-%: | tests @echo | parallel >/dev/null 2>/dev/null \ || (echo "Unable to run the GNU parallel tool;";\ echo "You should install it before using the parallel* targets.";\ @@ -327,3 +327,25 @@ distclean: clean report: @if [ ! -f $(TESTLOG) ]; then echo "No $(TESTLOG) file."; exit 1; fi @$(AWK) -f ./summarize.awk < $(TESTLOG) + +# When an archive is created by git-archive, this is expanded to the SHA of the +# commit. The filter-out causes this to be blank if it's run when the Format +# tag has not been expanded +GIT_ARCHIVE_SHA = $(filter-out ormat%, $Format:%H$ ) + +tests: + @echo "There are no tests in the tests directory!" + @echo "This happens when the sources of OCaml are extracted from a \ +tarball" + @echo "generated by git-archive (which includes those generated by \ +GitHub)" + @head -n 1 $(ROOTDIR)/VERSION | grep -Fq + || \ + echo "Note that the release tarballs published at \ +https://caml.inria.fr/pub/distrib/ include all the manual and testsuite sources" + @$(if $(GIT_ARCHIVE_SHA),,false) + @echo "The required files are in commit $(GIT_ARCHIVE_SHA), for \ +example:" + @echo " git clone https://github.com/ocaml/ocaml \ +--revision $(GIT_ARCHIVE_SHA) --depth 1 git-sources" + @echo " mv git-sources/$@ ." + @false diff --git a/testsuite/in_prefix/Makefile.test b/testsuite/in_prefix/Makefile.test index 16ffe3a30dd6..1f7b410815e0 100644 --- a/testsuite/in_prefix/Makefile.test +++ b/testsuite/in_prefix/Makefile.test @@ -63,9 +63,6 @@ test-in-prefix: $(DRIVER) ../tools/main_in_c.$(O) ../tools/poisonedruntime$(EXE) @rm -rf poisoned-runtime @rm -f test-ocamlrun* -SCRUB_ENV = \ - CAML_LD_LIBRARY_PATH OCAMLLIB CAMLLIB OCAMLPARAM OCAMLRUNPARAM CAMLRUNPARAM - # Generates --without-$(1) if $(2) = false or --with-$(1) otherwise bool_to_with = --with$(if $(filter false,$(2)),out)-$(strip $(1)) diff --git a/testsuite/tests/backtrace/backtrace2.reference b/testsuite/tests/backtrace/backtrace2.reference index dcd377bef5f5..0c3d90412ea5 100644 --- a/testsuite/tests/backtrace/backtrace2.reference +++ b/testsuite/tests/backtrace/backtrace2.reference @@ -35,7 +35,7 @@ Uncaught exception Invalid_argument("index out of bounds") Raised by primitive operation at Backtrace2.run in file "backtrace2.ml", line 62, characters 14-22 test_Not_found Uncaught exception Not_found -Raised at Stdlib__Hashtbl.find in file "hashtbl.ml", line 584, characters 13-28 +Raised at Stdlib__Hashtbl.find in file "hashtbl.ml", line 593, characters 13-28 Called from Backtrace2.test_Not_found in file "backtrace2.ml", line 43, characters 9-42 Re-raised at Backtrace2.test_Not_found in file "backtrace2.ml", line 43, characters 61-70 Called from Backtrace2.run in file "backtrace2.ml", line 62, characters 11-23 @@ -50,7 +50,7 @@ Called from CamlinternalLazy.do_force_block in file "camlinternalLazy.ml", line Re-raised at CamlinternalLazy.do_force_block in file "camlinternalLazy.ml", line 56, characters 4-11 Called from Backtrace2.run in file "backtrace2.ml", line 62, characters 11-23 Uncaught exception Not_found -Raised at Stdlib__Hashtbl.find in file "hashtbl.ml", line 584, characters 13-28 +Raised at Stdlib__Hashtbl.find in file "hashtbl.ml", line 593, characters 13-28 Called from Backtrace2.test_lazy.exception_raised_internally in file "backtrace2.ml", line 50, characters 8-41 Re-raised at CamlinternalLazy.do_force_block.(fun) in file "camlinternalLazy.ml", line 54, characters 43-50 Called from CamlinternalLazy.do_force_block in file "camlinternalLazy.ml", line 49, characters 17-27 diff --git a/testsuite/tests/lib-unix/common/cloexec.ml b/testsuite/tests/lib-unix/common/cloexec.ml index 3cb1d3a6ff3e..7e97f9138f70 100644 --- a/testsuite/tests/lib-unix/common/cloexec.ml +++ b/testsuite/tests/lib-unix/common/cloexec.ml @@ -1,6 +1,6 @@ (* TEST include unix; - readonly_files = "fdstatus_aux.c fdstatus_main.ml"; + readonly_files = "fdstatus_aux.c fdstatus_main.ml cloexec_leap.ml"; hasunix; { @@ -9,6 +9,9 @@ program = "${test_build_directory}/fdstatus.exe"; all_modules = "fdstatus_aux.c fdstatus_main.ml"; ocamlc.byte; + program = "${test_build_directory}/cloexec_leap.exe"; + all_modules = "cloexec_leap.ml"; + ocamlc.byte; program = "${test_build_directory}/cloexec.byte"; all_modules = "fdstatus_aux.c cloexec.ml"; ocamlc.byte; @@ -29,6 +32,9 @@ program = "${test_build_directory}/fdstatus.exe"; all_modules = "fdstatus_aux.c fdstatus_main.ml"; ocamlopt.byte; + program = "${test_build_directory}/cloexec_leap.exe"; + all_modules = "cloexec_leap.ml"; + ocamlopt.byte; program = "${test_build_directory}/cloexec.opt"; all_modules = "fdstatus_aux.c cloexec.ml"; ocamlopt.byte; @@ -49,7 +55,7 @@ external fd_of_file_descr : Unix.file_descr -> int = "caml_fd_of_filedescr" let string_of_fd fd = Int.to_string (fd_of_file_descr fd) -let status_checker = "fdstatus.exe" +let status_checker = "cloexec_leap.exe" let _ = let f0 = @@ -86,18 +92,19 @@ let _ = seen on slower machines, where the test begins running before this call has happened, and which results in tmp.txt still being locked when fdstatus_main.ml tries to delete it, leading to a "Permission denied" - exception. To prevent this, lock.txt is created and locked for writing - by this process. If the checker then sees that lock.txt exists, it - attempts to acquire a write lock on it, which will succeed only after - this process has completely exited and its lock has been automatically - released via process termination. + exception. To prevent this, lock1.txt is created and locked for writing + by this process, and then lock2.txt similarly by cloexec_leap.ml. If + the checker then sees that these exist, it attempts to acquire write + locks on them, which will succeed only after these processes have both + completely exited and their locks have been automatically released via + process termination. This dance is strictly done on native on Windows only, because execv hanging on to open files in this way with a Unix kernel is a very serious misimplementation of execv! *) if Sys.win32 then let lock = - Unix.(openfile "lock.txt" [O_WRONLY; O_CREAT; - O_TRUNC; O_CLOEXEC] 0o600) in + Unix.(openfile "lock1.txt" [O_WRONLY; O_CREAT; + O_TRUNC; O_CLOEXEC] 0o600) in Unix.lockf lock Unix.F_LOCK 0 in Unix.execv (Filename.concat Filename.current_dir_name status_checker) diff --git a/testsuite/tests/lib-unix/common/cloexec_leap.ml b/testsuite/tests/lib-unix/common/cloexec_leap.ml new file mode 100644 index 000000000000..2def123b7575 --- /dev/null +++ b/testsuite/tests/lib-unix/common/cloexec_leap.ml @@ -0,0 +1,21 @@ +let status_checker = "fdstatus.exe" + +let _ = + let args = Array.copy Sys.argv in + let image = Filename.concat Filename.current_dir_name status_checker in + args.(0) <- status_checker; + if Sys.argv.(1) = "execv" then + let () = + (* As in cloexec.ml, on Windows take out a write lock on a file so that + fdstatus_main.ml can be sure that both ancestor processes have actually + terminated before it tries to delete tmp.txt *) + if Sys.win32 then + let lock = + Unix.(openfile "lock2.txt" [O_WRONLY; O_CREAT; + O_TRUNC; O_CLOEXEC] 0o600) in + Unix.lockf lock Unix.F_LOCK 0 in + Unix.execv image args + else + let pid = + Unix.create_process image args Unix.stdin Unix.stdout Unix.stderr in + ignore (Unix.waitpid [] pid) diff --git a/testsuite/tests/lib-unix/common/fdstatus_main.ml b/testsuite/tests/lib-unix/common/fdstatus_main.ml index 6e881fa7f46e..f92ef7db9426 100644 --- a/testsuite/tests/lib-unix/common/fdstatus_main.ml +++ b/testsuite/tests/lib-unix/common/fdstatus_main.ml @@ -3,8 +3,8 @@ external delete_on_close : string -> unit = "caml_win32_delete_on_close" let () = if Sys.win32 then - (* Ensure the ancestor process has definitely terminated (and therefore - closed its handles to tmp.txt) *) + (* Ensure both ancestor processes have definitely terminated (and therefore + closed their handles to tmp.txt) *) let wait_until file = if Sys.file_exists file then let fd = Unix.openfile file [O_RDWR] 0o600 in @@ -12,7 +12,7 @@ let () = Unix.close fd; Sys.remove file in - wait_until "lock.txt" + List.iter wait_until ["lock1.txt"; "lock2.txt"] let () = (* Windows virus scanning can easily get in the way here on slower VMs. When diff --git a/testsuite/tests/tool-toplevel/pr6468.compilers.reference b/testsuite/tests/tool-toplevel/pr6468.compilers.reference index 297306afd4b0..3a523d6f3d75 100644 --- a/testsuite/tests/tool-toplevel/pr6468.compilers.reference +++ b/testsuite/tests/tool-toplevel/pr6468.compilers.reference @@ -10,5 +10,5 @@ Exception: Not_found. Raised at f in file "//toplevel//", line 2, characters 11-26 Called from g in file "//toplevel//", line 1, characters 11-15 Called from in file "//toplevel//", line 1, characters 0-4 -Called from Topeval.load_lambda in file "toplevel/byte/topeval.ml", line 93, characters 4-14 +Called from Topeval.load_lambda in file "toplevel/byte/topeval.ml", line 103, characters 4-14 diff --git a/testsuite/tests/tool-toplevel/pr9701.compilers.reference b/testsuite/tests/tool-toplevel/pr9701.compilers.reference index f1c15be91634..03ad45abb437 100644 --- a/testsuite/tests/tool-toplevel/pr9701.compilers.reference +++ b/testsuite/tests/tool-toplevel/pr9701.compilers.reference @@ -1,4 +1,4 @@ Exception: Failure "test". Raised at Stdlib.failwith in file "stdlib.ml", line 29, characters 17-33 Called from in file "pr9701.ml", line 5, characters 9-16 -Called from Topeval.load_lambda in file "toplevel/byte/topeval.ml", line 93, characters 4-14 +Called from Topeval.load_lambda in file "toplevel/byte/topeval.ml", line 103, characters 4-14 diff --git a/testsuite/tools/testBytecodeBinaries.ml b/testsuite/tools/testBytecodeBinaries.ml index 81532a5de933..ad0718427a30 100644 --- a/testsuite/tools/testBytecodeBinaries.ml +++ b/testsuite/tools/testBytecodeBinaries.ml @@ -31,10 +31,6 @@ let run config env = let bindir = Environment.bindir env in Format.printf "\nTesting bytecode binaries in %a\n" (Environment.pp_path env) bindir; - let ocamlrun = Environment.ocamlrun env in - let exec_magic = - Environment.run_process env ocamlrun ["-M"] - in let test_binary failed binary = if String.starts_with ~prefix:"ocaml" binary || String.starts_with ~prefix:"flexlink" binary then @@ -161,32 +157,9 @@ let run config env = be likely distinct from the behaviour of any of the distribution's tools when called with -M. *) let without_exe = Filename.remove_extension binary in - let (this_exit_code, _) as this = - let fails = not (String.contains without_exe '.') in + let _exit_code, _output = Environment.run_process - ~fails env program ~argv0:without_exe ["-M"] - in - if this_exit_code = 0 then - if this = exec_magic then - let (that_exit_code, _) as that = - Environment.run_process - ~fails:true env program ~argv0:binary ["-M"] - in - if this = that then - Harness.fail_because - "Neither %s nor %s seem to load the bytecode image" - without_exe binary - else if that_exit_code = 0 then - Harness.fail_because - "%s is not expected to return with exit code 0" - binary - else if not (String.contains without_exe '.') then - Harness.fail_because - "%s is not expected to return the exec magic number!" - without_exe - else () (* Expected outcome was the exec magic number *) - else () (* Expected outcome is a zero exit code *) - else () (* Expected outcome is a non-zero exit code *) + ~fails:true env program ~argv0:without_exe ["-M"] in () end; failed | _ -> diff --git a/testsuite/tools/testDynlink.ml b/testsuite/tools/testDynlink.ml index ad6611eff343..74076270a3f5 100644 --- a/testsuite/tools/testDynlink.ml +++ b/testsuite/tools/testDynlink.ml @@ -107,10 +107,7 @@ let () = | None -> (* Systems configured with --disable-shared can't load bytecode libraries which need C stubs *) - if (Sys.cygwin && mode = Native && List.mem "unix" libraries) - || (not Config.supports_shared_libraries && has_c_stubs) then - (* cf. ocaml/flexdll#146 - Cygwin's natdynlink can't load - unix.cmxs *) + if not Config.supports_shared_libraries && has_c_stubs then 2 else 0 @@ -124,19 +121,6 @@ let () = Harness.fail_because "%s is expected to return with exit code %d" test_program expected_exit_code; in - let test_libraries_in_prog ?expected_exit_code env libraries = - if mode = Native && List.mem "threads" libraries then - (* cf. ocaml/ocaml#12250 - no threads.cmxs *) - let threads_plugin = - Environment.in_libdir env (Filename.concat "threads" "threads.cmxs") - in - if Sys.file_exists threads_plugin then - Harness.fail_because "threads.cmxs is not expected to exist" - else - () - else - test_libraries_in_prog ?expected_exit_code env libraries - in let not_dynlink l = not (List.mem "dynlink" l) in let files, re_compile = compile_test_program () in let expected_exit_code = diff --git a/testsuite/tools/testLinkModes.ml b/testsuite/tools/testLinkModes.ml index 861941ce3f4f..71f05a7d6d7f 100644 --- a/testsuite/tools/testLinkModes.ml +++ b/testsuite/tools/testLinkModes.ml @@ -300,21 +300,13 @@ let test_runs usr_bin_sh test_program_path test_program Success {executable_name = test_program_path; argv0 = test_program_path} | Tendered {header = Header_exe; _} -> - if argv0_not_ocaml then - if Sys.win32 then - (* stdlib/header.c will find ocamlrun (because it effectively - uses caml_executable_name) but fails to hand off the bytecode - image, which causes ocamlrun to exit with code 127 *) - Fail 127 - else - (* stdlib/header.c will fail to find ocamlrun, because it never - uses caml_executable_name and so will either fail to find the - executable or will identify that it is not a bytecode - executable. Somewhat confusingly, it exits with code 2 *) - Fail 2 - else if Sys.win32 then - (* stdlib/header.c correctly preserves argv[0] for Windows *) - Success {executable_name = test_program_path; argv0} + if argv0_not_ocaml + && Harness.no_caml_executable_name then + (* stdlib/header.c will fail to find ocamlrun because + caml_executable_name isn't implemented so will either fail to + find the executable or will identify that it is not a bytecode + executable. Somewhat confusingly, it exits with code 2 *) + Fail 2 else if Harness.no_caml_executable_name && config.has_relative_libdir <> None then (* Without caml_executable_name, ocamlrun will be forced to @@ -322,9 +314,13 @@ let test_runs usr_bin_sh test_program_path test_program which will fail. *) Fail 134 else - (* stdlib/header.c does not preserve argv[0] for Unix *) - Success {executable_name = argv0_resolved; - argv0 = argv0_resolved} + let executable_name = + if Harness.no_caml_executable_name then + argv0_resolved + else + test_program_path + in + Success {executable_name; argv0} | Custom -> if Harness.no_caml_executable_name then if argv0_not_ocaml then @@ -552,22 +548,13 @@ let compile_test usr_bin_sh config env test test_program description = ~clibs:["-lcomprmarsh"; "-lunixnat"; Config.compression_c_libraries] ~linker_exit_code ["-output-obj"] | Output_complete_obj(C_ocamlc, Static) -> - (* At the moment, the partial linker will pass -lws2_32 and -ladvapi32 - on to the partial linker on mingw-w64 which causes a failure. Until - this is fixed, pass the libraries manually, using -noautolink. *) - f ~clibs:[] - ["-output-complete-obj"; "-noautolink"; "-cclib"; "-lunixbyt"] + f ~clibs:[] ["-output-complete-obj"] | Output_complete_obj(C_ocamlc, Shared) -> - (* The partial linker doesn't correctly process - -runtime-variant _shared, as the .so gets passed to the partial - linker. On macOS, this causes a warning; on other systems, it's an - error. *) - let compilation_exit_code = fails_if (Config.system <> "macosx") in (* Shared compilation isn't available on native Windows and fails on Cygwin *) let linker_exit_code = fails_if (Sys.win32 || Sys.cygwin) in - f ~use_shared_runtime:true ~clibs:[] ~compilation_exit_code - ~linker_exit_code ["-output-complete-obj"] + f ~use_shared_runtime:true ~clibs:[] ~linker_exit_code + ["-output-complete-obj"] | Output_complete_obj(C_ocamlopt, Static) -> let linker_exit_code = (* cf. ocaml/ocaml#13692 - linking fails on ppc64 *) @@ -576,21 +563,18 @@ let compile_test usr_bin_sh config env test test_program description = else 0 in - (* At the moment, the partial linker will pass -lzstd to ld -r which - will (normally) fail). Until this is done, pass the libraries - manually, using -noautolink. *) f ~mode:Native ~clibs:[Config.compression_c_libraries] - ~linker_exit_code - ["-output-complete-obj"; "-noautolink"; "-cclib"; "-lunixnat"; - "-cclib"; "-lcomprmarsh"] + ~linker_exit_code ["-output-complete-obj"] | Output_complete_obj(C_ocamlopt, Shared) -> - (* ocamlopt allows the .so to be passed to the partial linker which - fails with GNU ld, but not with the macOS linker *) - let compilation_exit_code = fails_if (Config.system <> "macosx") in - f ~mode:Native ~use_shared_runtime:true - ~compilation_exit_code ~clibs:[Config.compression_c_libraries] - ["-output-complete-obj"; "-noautolink"; "-cclib"; "-lunixnat"; - "-cclib"; "-lcomprmarsh"] + (* cf. ocaml/ocaml#13693 - on Fedora/RHEL, this executable + segfaults *) + let may_segfault = List.mem Config.architecture ["s390x"; "riscv"] in + (* Shared compilation isn't available on native Windows and fails on + Cygwin *) + let linker_exit_code = fails_if (Sys.win32 || Sys.cygwin) in + f ~mode:Native ~use_shared_runtime:true ~may_segfault + ~linker_exit_code ~clibs:[Config.compression_c_libraries] + ["-output-complete-obj"] | Output_complete_exe Static -> f ~calls_linker:true ["-output-complete-exe"] | Output_complete_exe Shared -> @@ -616,7 +600,7 @@ let compile_test usr_bin_sh config env test test_program description = test_program_path in let with_unix = (Config.supports_shared_libraries || not tendered) in - let is_randomized = false in + let is_randomized = Environment.is_renamed env in let verbose = Environment.verbose env in write_test_program ~verbose ~is_randomized ~with_unix description; let options = @@ -653,6 +637,12 @@ let compile_test usr_bin_sh config env test test_program description = let args = "-I" :: "+compiler-libs" :: Harness.lib mode "ocamlcommon" :: args in + let args = + if is_randomized then + "-set-runtime-default" :: "R" :: args + else + args + in let args = if verbose then "-verbose" :: args diff --git a/testsuite/tools/testRelocation.ml b/testsuite/tools/testRelocation.ml index 14a854f8629e..0102e7facdeb 100644 --- a/testsuite/tools/testRelocation.ml +++ b/testsuite/tools/testRelocation.ml @@ -178,8 +178,7 @@ let libdir_rules config file = else if List.mem ext [".cma"; ".cmo"; ".cmt"; ".cmti"] then let stdlib = (* via Config.standard_library *) config.has_relative_libdir = None - && List.mem basename ["config.cmt"; "config_main.cmt"; - "ocamlcommon.cma"] in + && (basename = "config.cmt" || basename = "ocamlcommon.cma") in (* The compiler's artefacts are all compiled with -g *) (~stdlib, ~ocaml_debug:true, ~c_debug:false, ~s:false) else if ext = ".cmxs" then diff --git a/testsuite/tools/testToplevel.ml b/testsuite/tools/testToplevel.ml index 699ead0dd8b5..6d2b9235aedc 100644 --- a/testsuite/tools/testToplevel.ml +++ b/testsuite/tools/testToplevel.ml @@ -34,27 +34,6 @@ let run config env mode = (* dynlink.cmxs does not exist, for obvious reasons, but we can check loading the library in ocamlnat "works". *) "cmxa" - else if library = "threads" then - let threads_plugin = - let plugin = Filename.concat "threads" "threads.cmxs" in - Environment.in_libdir env plugin - in - if Sys.file_exists threads_plugin then - Harness.fail_because - "threads.cmxs is not expected to exist" - else if Sys.win32 then - (* cf. note in ocaml/ocaml#13520 - threads.cmxa is - correctly compiled assuming winpthreads is statically - in the same image (so without defining - WINPTHREADS_USE_DLLIMPORT), but this is incorrect for - threads.cmxs, as threads.cmxs may load more than 2GiB - away from the main executable. For native Windows, it's - not possible to rely on ocamlnat's automatic - cmxa -> cmxs recompilation. *) - "cmxs" - else - (* cf. ocaml/ocaml#12250 - no threads.cmxs *) - "cmxa" else "cmxs" | Bytecode -> @@ -77,12 +56,7 @@ let run config env mode = let expected_exit_code = (* Systems configured with --disable-shared can't load bytecode libraries which need C stubs *) - if Sys.cygwin && mode = Native && List.mem "unix" libraries - || Sys.win32 && mode = Native && List.mem "threads" libraries - || has_c_stubs && not Config.supports_shared_libraries then - (* cf. ocaml/flexdll#146 - Cygwin's ocamlnat can't load unix.cmxs and - the lines above will have triggered native Windows being unable to - load threads.cmxs *) + if has_c_stubs && not Config.supports_shared_libraries then 125 else 0 diff --git a/testsuite/tools/test_ld_conf.ml b/testsuite/tools/test_ld_conf.ml index 81bbc96e8a2b..fd6b8a68da25 100644 --- a/testsuite/tools/test_ld_conf.ml +++ b/testsuite/tools/test_ld_conf.ml @@ -73,6 +73,8 @@ let tests config env = with Invalid_argument _ -> libdir in let (/) = Filename.concat in let data = [ + (* Blank line - should be ignored on all platforms *) + "", "", None; (* Root directory (both forms) preserved *) "/", "/", None; "//", "//", None; @@ -101,51 +103,20 @@ let tests config env = in List.fold_left fold ([], [], []) (List.rev data) in + let main_outcome = List.tl main_outcome in + let main_outcome_cr = List.tl main_outcome_cr in let tests = (* Various test lines above all fed via ld.conf in the Standard Library *) - let outcome = - (* Known issue: Windows strips out the blank entries in the search path - (somewhat counterintuitively!) *) - if Sys.win32 then - main_outcome - else - "." :: main_outcome - in [{base with description = "Base ld.conf test"; - stdlib = "" :: main; - outcome = if_ld_conf_found outcome}] in + stdlib = main; + outcome = if_ld_conf_found main_outcome}] in let tests = (* As first, but with the same entries in CAML_LD_LIBRARY_PATH too *) - let stdlib = - if Sys.win32 then - (* Known issue: Windows ignores empty entries in the search path, and - it's slightly easier to test this only once in this test *) - main - else - "" :: main - in - (* Part of the outcome from ld.conf *) - let outcome_ld_conf = - if Sys.win32 then - main_outcome - else - "." :: main_outcome - in - (* Part of the outcome from CAML_LD_LIBRARY_PATH *) - let outcome_caml_ld_library_path = - if Sys.win32 then - (* No blank entry at the start: Windows returns the same entries *) - main - else - (* Unix displays "." for the blank, but otherwise returns the same - entries *) - "." :: main - in {base with description = "Base ld.conf + CAML_LD_LIBRARY_PATH"; - caml_ld_library_path = Set stdlib; - stdlib; - outcome = outcome_caml_ld_library_path - @ if_ld_conf_found outcome_ld_conf} :: tests in + caml_ld_library_path = Set main; + stdlib = main; + outcome = List.tl main + @ if_ld_conf_found main_outcome} :: tests in let tests = (* As first, but with entries in CAML_LD_LIBRARY_PATH including quotes and separators. No effect on Unix, as the colon separator is always @@ -184,49 +155,25 @@ let tests config env = @ if_ld_conf_found main_outcome} :: tests in let tests = (* As first, but with a CR at the end of each line *) - let outcome = - (* Known issue: Windows strips out the blank entries in the search - path (somewhat counterintuitively!) *) - if Sys.win32 then - main_outcome_cr - else - "." :: main_outcome_cr - in {base with description = "Base ld.conf with CRLF endings"; stdlib = List.map (Fun.flip (^) "\r") ("" :: main); - outcome = if_ld_conf_found outcome} :: tests in + outcome = if_ld_conf_found main_outcome_cr} :: tests in tests in (* Batch 2: effects of empty (vs unset) environment variables *) let tests = let tests = - (* Empty CAML_LD_LIBRARY_PATH should add "." to the start of the search - path *) - let outcome_caml_ld_library_path = - if Sys.win32 then - [] - else - ["."] - in + (* Empty CAML_LD_LIBRARY_PATH - should be ignored *) {base with description = "Empty CAML_LD_LIBRARY_PATH"; caml_ld_library_path = Empty; stdlib = ["ld.conf"]; - outcome = outcome_caml_ld_library_path - @ if_ld_conf_found ["ld.conf"]} :: tests in + outcome = if_ld_conf_found ["ld.conf"]} :: tests in let tests = - (* Embedded empty entries in CAML_LD_LIBRARY_PATH should add equivalent - "." entries to the search path *) - let outcome_caml_ld_library_path = - if Sys.win32 then - [] - else - ["."; "."] - in + (* Empty segments in CAML_LD_LIBRARY_PATH - should be ignored *) {base with description = "Embedded empty entry in CAML_LD_LIBRARY_PATH"; caml_ld_library_path = Set [""; ""]; stdlib = ["ld.conf"]; - outcome = outcome_caml_ld_library_path - @ if_ld_conf_found ["ld.conf"]} :: tests in + outcome = if_ld_conf_found ["ld.conf"]} :: tests in let ld_conf_outcome = if_ld_conf_found ["masked-stdlib"] in let tests = (* An empty CAMLLIB shouldn't hide ld.conf in the Standard Library *) @@ -305,17 +252,8 @@ let () = && Sys.getenv_opt "OCAMLLIB" <> Some "") let () = - let print s = - (* Known issue: ocamlrun -config suppresses blank lines on Windows, but - displays them as "." on other platforms. Do a similar transformation - here, but suppress the lines entirely on Windows. *) - if s <> "" then - print_endline s - else if not Sys.win32 then - print_endline "." - in Dll.init_compile false; - List.iter print (Dll.search_path ()) + List.iter print_endline (Dll.search_path ()) |}) in let compile_test_program mode files test_program description = @@ -358,25 +296,11 @@ let () = mode = Bytecode && not config.target_launcher_searches_for_ocamlrun && config.has_relative_libdir = None in - let run run_process test = + let run run_process _test = let code, lines = run_process ~runtime test_program [] in if code = 0 then - let lines = - (* Known issues: - - Misc.split_path_contents ignores empty strings where - caml_decompose_path does not - - Sys.getenv can't return empty environment variables on Windows, - but _wgetenv can - - Windows strips out the blank entries in the search path - (somewhat counterintuitively!) *) - if not Sys.win32 && (test.caml_ld_library_path = Set [] - || test.caml_ld_library_path = Empty) then - "." :: lines - else - lines - in description :: lines else Harness.fail_because "%s is expected to exit with code 0" @@ -556,7 +480,7 @@ let run config env = if not (Sys.file_exists dir) then Sys.mkdir dir 0o775 else if not (Sys.is_directory dir) then begin - Sys.rmdir dir; + Sys.remove dir; Sys.mkdir dir 0o775 end in diff --git a/tools/ci/actions/runner.sh b/tools/ci/actions/runner.sh index f3a0f749dfa2..e289b138b44b 100755 --- a/tools/ci/actions/runner.sh +++ b/tools/ci/actions/runner.sh @@ -16,7 +16,8 @@ set -xe -PREFIX=~/local +# The prefix is designed to be usable as an opam local switch +PREFIX=~/local/_opam MAKE="make $MAKE_ARG" SHELL=dash @@ -56,11 +57,12 @@ EOF # $CONFIG_ARG also appears last to allow settings specified here to be # overridden by the workflows. call-configure --prefix="$PREFIX" \ + --docdir="$PREFIX/doc/ocaml" \ --enable-flambda-invariants \ --enable-ocamltest \ --enable-native-toplevel \ --disable-dependency-generation \ - $CONFIG_ARG + -C $CONFIG_ARG } Build () { @@ -125,7 +127,26 @@ API_Docs () { } Install () { - $MAKE install + $MAKE INSTALL_MODE=list install | grep '^->' | sort | uniq -d > duplicates + if [ -s duplicates ]; then + echo "The installation duplicates targets:" + cat duplicates + exit 1 + fi + rm duplicates + $MAKE DESTDIR="$PWD/install" install + find $PWD/install -name _opam -type d + $MAKE INSTALL_MODE=clone install + ret="$PWD" + script="$PWD/ocaml-compiler-clone.sh" + cd "$(find $PWD/install -name _opam -type d)" + mkdir -p "share/ocaml" + cp "$ret/config.status" "$ret/config.cache" "share/ocaml" + cp "$ret/ocaml-compiler-clone.sh" "share/ocaml/clone" + sh $script ~/local/_opam + cd "$ret" + rm -rf install + rm ocaml-compiler-clone.sh } target_libdir_is_relative='^ *TARGET_LIBDIR_IS_RELATIVE *= *false' @@ -225,7 +246,7 @@ Checks () { # we would need to redo (small parts of) world.opt afterwards to # use the compiler again $MAKE check_all_arches - # Ensure that .gitignore is up-to-date - this will fail if any untreacked or + # Ensure that .gitignore is up-to-date - this will fail if any untracked or # altered files exist. test -z "$(git status --porcelain)" # check that the 'clean' target also works @@ -234,7 +255,9 @@ Checks () { $MAKE -C manual distclean # check that the `distclean` target definitely cleans the tree $MAKE distclean - # Check the working tree is clean + # Check the working tree is clean - config.cache is intentionally not deleted + # by any of the clean targets + rm config.cache test -z "$(git status --porcelain)" # Check that there are no ignored files test -z "$(git ls-files --others -i --exclude-standard)" @@ -298,6 +321,24 @@ BasicCompiler () { ReportBuildStatus 0 } +CreateSwitch () { + # This can be switched to use the Ubuntu package when Ubuntu 26.04 is deployed + # (opam 2.1.5 in Ubuntu 24.04 is too old) + curl -Lo opam \ + 'https://github.com/ocaml/opam/releases/download/2.4.1/opam-2.4.1-x86_64-linux' + chmod +x opam + ./opam init --cli=2.4 --bare --disable-sandboxing --yes --auto-setup + # This is intentionally done before the switch is created - if the install + # target creates _opam then the switch creation will fail. + $MAKE INSTALL_MODE=opam OPAM_PACKAGE_NAME=ocaml-variants install + # These commands intentionally run using opam's "default" CLI + ./opam switch create ~/local --empty + ./opam switch --switch ~/local set-invariant --no-action ocaml-option-flambda + ./opam pin add --switch ~/local --no-action --kind=path ocaml-variants . + ./opam install --switch ~/local --yes --assume-built ocaml-variants + ./opam exec --switch ~/local -- ocamlopt -v +} + case $1 in configure) Configure;; build) Build;; @@ -311,6 +352,7 @@ re-test-in-prefix) Re-Test-In-Prefix;; manual) BuildManual;; other-checks) Checks;; basic-compiler) BasicCompiler;; +opam) CreateSwitch;; *) echo "Unknown CI instruction: $1" exit 1;; esac diff --git a/tools/ci/appveyor/appveyor_build.cmd b/tools/ci/appveyor/appveyor_build.cmd index 48c2ad7e8a2e..be5e4022819d 100644 --- a/tools/ci/appveyor/appveyor_build.cmd +++ b/tools/ci/appveyor/appveyor_build.cmd @@ -22,7 +22,7 @@ chcp 65001 > nul set BUILD_PREFIX=🐫реализация -set OCAMLROOT=%PROGRAMFILES%\Бактріан🐫 +set OCAMLROOT=C:\Бактріан🐫 if "%1" neq "install" goto %1 setlocal enabledelayedexpansion @@ -69,7 +69,13 @@ if %CYGWIN_UPGRADE_REQUIRED% equ 1 ( ) ) if "%CYGWIN_INSTALL_PACKAGES%" neq "" "%CYG_ROOT%\setup-x86_64.exe" --quiet-mode --no-shortcuts --no-startmenu --no-desktop --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" %CYGWIN_FLAGS% --packages %CYGWIN_INSTALL_PACKAGES:~1% -for %%P in (%CYGWIN_COMMANDS%) do "%CYG_ROOT%\bin\%%P.exe" --version 2> nul > nul || set CYGWIN_UPGRADE_REQUIRED=1 +for %%P in (%CYGWIN_COMMANDS%) do ( + if %%P equ unzip ( + "%CYG_ROOT%\bin\%%P.exe" -v 2> nul > nul || set CYGWIN_UPGRADE_REQUIRED=1 + ) else ( + "%CYG_ROOT%\bin\%%P.exe" --version 2> nul > nul || set CYGWIN_UPGRADE_REQUIRED=1 + ) +) "%CYG_ROOT%\bin\bash.exe" -lc "cygcheck -dc %CYGWIN_PACKAGES%" if %CYGWIN_UPGRADE_REQUIRED% equ 1 ( echo Cygwin package upgrade required - please go and drink coffee @@ -87,6 +93,11 @@ if not defined SDK ( if "%PORT%" equ "mingw32" set SDK=call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars32.bat" ) %SDK% +rem The environment block becomes very large on AppVeyor, which can cause +rem problems for xargs in Cygwin. These two environment variables from the SDK +rem infrastructure can be safely junked to reduce the size of the block. +set __VSCMD_PREINIT_PATH= +set EXTERNAL_INCLUDE= goto :EOF :install @@ -103,6 +114,9 @@ if "%BOOTSTRAP_FLEXDLL%" equ "true" ( cd "%APPVEYOR_BUILD_FOLDER%" appveyor DownloadFile "https://github.com/ocaml/flexdll/archive/%FLEXDLL_VERSION%.tar.gz" -FileName "flexdll.tar.gz" || exit /b 1 appveyor DownloadFile "https://github.com/ocaml/flexdll/releases/download/%FLEXDLL_VERSION%/flexdll-bin-%FLEXDLL_VERSION%.zip" -FileName "flexdll.zip" || exit /b 1 +appveyor DownloadFile "https://github.com/ocaml/opam/releases/download/2.4.1/opam-2.4.1-x86_64-windows.exe" -FileName "opam.exe" || exit /b 1 +md "%PROGRAMFILES%\flexdll" +move opam.exe "%PROGRAMFILES%\flexdll" rem flexdll.zip is processed here, rather than in appveyor_build.sh because the rem unzip command comes from MSYS2 (via Git for Windows) and it has to be rem invoked via cmd /c in a bash script which is weird(er). @@ -115,8 +129,8 @@ rem in the list just so that the Cygwin version is always displayed on the log). rem CYGWIN_COMMANDS is a corresponding command to run with --version to test rem whether the package works. This is used to verify whether the installation rem needs upgrading. -set CYGWIN_PACKAGES=cygwin make diffutils -set CYGWIN_COMMANDS=cygcheck make diff +set CYGWIN_PACKAGES=cygwin make diffutils unzip +set CYGWIN_COMMANDS=cygcheck make diff unzip if "%PORT%" equ "mingw32" ( rem mingw64-i686-runtime does not need explicitly installing, but it's useful rem to have the version reported. diff --git a/tools/ci/appveyor/appveyor_build.sh b/tools/ci/appveyor/appveyor_build.sh index 06fee742bda7..3a970c3ff144 100755 --- a/tools/ci/appveyor/appveyor_build.sh +++ b/tools/ci/appveyor/appveyor_build.sh @@ -59,7 +59,10 @@ function set_configuration { CACHE_FILE_PREFIX="$CACHE_DIRECTORY/config.cache-$1" CACHE_FILE="$CACHE_FILE_PREFIX-$CACHE_KEY" - args=('--cache-file' "$CACHE_FILE" '--prefix' "$2" '--enable-ocamltest') + args=('--cache-file' "$CACHE_FILE" \ + '--prefix' "$2/_opam" \ + '--docdir' "$2/_opam/doc/ocaml" \ + '--enable-ocamltest') case "$1" in cygwin*) @@ -96,13 +99,17 @@ function set_configuration { if ((failed)) ; then cat config.log ; exit $failed ; fi fi + cp "$CACHE_FILE" config.cache + # FILE=$(pwd | cygpath -f - -m)/Makefile.config # run "Content of $FILE" cat Makefile.config } -PARALLEL_URL='https://git.savannah.gnu.org/cgit/parallel.git/plain/src/parallel' +PARALLEL_URL=\ +'https://git.savannah.gnu.org/cgit/parallel.git/plain/src/parallel?h=20241222' APPVEYOR_BUILD_FOLDER=$(echo "$APPVEYOR_BUILD_FOLDER" | cygpath -f -) FLEXDLLROOT="$PROGRAMFILES/flexdll" +export OPAMSWITCH="$OCAMLROOT" if [[ $BOOTSTRAP_FLEXDLL = 'false' ]] ; then case "$PORT" in @@ -118,14 +125,7 @@ fi case "$1" in install) - mkdir -p "$CACHE_DIRECTORY" - if [ ! -e "$CACHE_DIRECTORY/parallel-source" ] || \ - [ "$PARALLEL_URL" != "$(cat "$CACHE_DIRECTORY/parallel-source")" ] ; then - # Download latest version directly from the repo - curl -Ls $PARALLEL_URL -o "$CACHE_DIRECTORY/parallel" - echo "$PARALLEL_URL" > "$CACHE_DIRECTORY/parallel-source" - fi - cp "$CACHE_DIRECTORY/parallel" /usr/bin + cp "$APPVEYOR_BUILD_FOLDER/tools/ci/appveyor/parallel" /usr/bin chmod +x /usr/bin/parallel parallel --version if [[ $install_flexdll = 'true' ]] ; then @@ -145,7 +145,7 @@ case "$1" in ;; test) FULL_BUILD_PREFIX="$APPVEYOR_BUILD_FOLDER/../$BUILD_PREFIX" - run 'ocamlc.opt -version' "$FULL_BUILD_PREFIX-$PORT/ocamlc.opt" -version + #run 'ocamlc.opt -version' "$FULL_BUILD_PREFIX-$PORT/ocamlc.opt" -version if [[ $PORT =~ mingw* ]] ; then run "Check runtime symbols" \ "$FULL_BUILD_PREFIX-$PORT/tools/check-symbol-names" \ @@ -164,15 +164,50 @@ case "$1" in # tests now (to include natdynlink) run "test dynlink $PORT" \ $MAKE -C "$FULL_BUILD_PREFIX-$PORT/testsuite" parallel-lib-dynlink - # Now reconfigure ocamltest to run in bytecode-only mode - sed -i '/native_/s/true/false/' \ - "$FULL_BUILD_PREFIX-$PORT/ocamltest/ocamltest_config.ml" - $MAKE -C "$FULL_BUILD_PREFIX-$PORT" -j ocamltest ocamltest.opt + case "$PORT" in + *64) + # Now reconfigure ocamltest to run in bytecode-only mode + sed -i '/native_/s/true/false/' \ + "$FULL_BUILD_PREFIX-$PORT/ocamltest/ocamltest_config.ml" + $MAKE -C "$FULL_BUILD_PREFIX-$PORT" -j ocamltest ocamltest.opt;; + esac # And run the entire testsuite, skipping all the native-code tests run "test $PORT" \ make -C "$FULL_BUILD_PREFIX-$PORT/testsuite" SHOW_TIMINGS=1 all fi run "install $PORT" $MAKE -C "$FULL_BUILD_PREFIX-$PORT" install + make -C "$FULL_BUILD_PREFIX-$PORT" INSTALL_MODE=clone install + ( + cd "$OCAMLROOT" + mv _opam destdir + #ret="$PWD" + #script="$PWD/ocaml-compiler-clone.sh" + #cd "$(find $PWD/install -name _opam -type d)" + mkdir -p "destdir/share/ocaml" + cp "$FULL_BUILD_PREFIX-$PORT/config."{cache,status} 'destdir/share/ocaml/' + cp "$FULL_BUILD_PREFIX-$PORT/ocaml-compiler-clone.sh" \ + 'destdir/share/ocaml/clone' + cd destdir + sh "$FULL_BUILD_PREFIX-$PORT/ocaml-compiler-clone.sh" "$OCAMLROOT/_opam" + ) + rm -rf "$OCAMLROOT" + $MAKE -C "$FULL_BUILD_PREFIX-$PORT" OPAM_PACKAGE_NAME=ocaml-variants \ + INSTALL_MODE=opam install + ( + cd "$FULL_BUILD_PREFIX-$PORT" + export PATH="$FLEXDLLROOT:$PATH" + opam init --cli=2.4 --bare --yes --disable-sandboxing --auto-setup \ + --cygwin-local-install + # These commands intentionally run using opam's "default" CLI + opam switch create "$OPAMSWITCH" --empty + opam pin add --no-action --kind=path ocaml-variants . + opam pin add --no-action flexdll flexdll + opam install --yes flexdll winpthreads + opam install --yes --assume-built ocaml-variants + git checkout -- ocaml-variants.install + rm -f config.cache ocaml-variants-fixup.sh ocaml-compiler-clone.sh + opam exec -- ocamlc -v + ) run "test $PORT in prefix" \ $MAKE -f Makefile.test -C "$FULL_BUILD_PREFIX-$PORT/testsuite/in_prefix" \ test-in-prefix diff --git a/tools/ci/appveyor/parallel b/tools/ci/appveyor/parallel new file mode 100755 index 000000000000..5c0adb415791 --- /dev/null +++ b/tools/ci/appveyor/parallel @@ -0,0 +1,15971 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2024 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2007-2024 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +# open3 used in Job::start +use IPC::Open3; +use POSIX; +# gensym used in Job::start +use Symbol qw(gensym); +# tempfile used in Job::start +use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; +# GetOptions used in get_options_from_array +use Getopt::Long; +# Used to ensure code quality +use strict; +use File::Basename; + +sub set_input_source_header($$) { + my ($command_ref,$input_source_fh_ref) = @_; + if(defined $opt::header and not $opt::pipe) { + # split with colsep or \t + # $header force $colsep = \t if undef? + my $delimiter = defined $opt::colsep ? $opt::colsep : "\t"; + # regexp for {= + my $left = "\Q$Global::parensleft\E"; + my $l = $Global::parensleft; + # regexp for =} + my $right = "\Q$Global::parensright\E"; + my $r = $Global::parensright; + if($opt::header ne "0") { + my $id = 1; + for my $fh (@$input_source_fh_ref) { + my $line = <$fh>; + chomp($line); + $line =~ s/\r$//; + ::debug("init", "Delimiter: '$delimiter'"); + for my $s (split /$delimiter/o, $line) { + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + for(@$command_ref, @Global::ret_files, + @Global::transfer_files, $opt::tagstring, + $opt::workdir, $opt::results, $opt::retries, + @Global::template_contents, @Global::template_names, + @opt::filter) { + # Skip if undefined + $_ or next; + s<\{$s( + (\.\d+)? # Sub position = {3.2} + (|/|//|\.|/\.) # Known replacement strings + (:%.*?)? # Formatting + )\}><\{$id$1\}>gx; + # {=header1 ... =} => {=1 ... =} + s:$left $s (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } + } + # Make it possible to do: + # parallel --header 0 echo {file2} {file1} :::: file1 file2 + my $id = 1; + for my $s (@opt::a) { + # ::: are put into files and given a filehandle + # ignore these and only keep the filenames. + fileno $s and next; + for(@$command_ref, @Global::ret_files, + @Global::transfer_files, $opt::tagstring, + $opt::workdir, $opt::results, $opt::retries, + @Global::template_contents, @Global::template_names, + @opt::filter) { + # Skip if undefined + $_ or next; + s:\{\Q$s\E(|/|//|\.|/\.)\}:\{$id$1\}:g; + # {=header1 ... =} => {=1 ... =} + s:$left \Q$s\E (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } else { + my $id = 1; + for my $fh (@$input_source_fh_ref) { + $Global::input_source_header{$id} = $id; + $id++; + } + } +} + +sub max_jobs_running() { + # Compute $Global::max_jobs_running as the max number of jobs + # running on each sshlogin. + # Returns: + # $Global::max_jobs_running + if(not $Global::max_jobs_running) { + for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); + } + } + if(not $Global::max_jobs_running) { + ::error("Cannot run any jobs."); + wait_and_exit(255); + } + return $Global::max_jobs_running; +} + +sub halt() { + # Compute exit value, + # wait for children to complete + # and exit + if($opt::halt and $Global::halt_when ne "never") { + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + $Global::halt_exitstatus = + ::ceil($Global::total_failed / + ($Global::total_started || 1) * 100); + } elsif($Global::halt_count) { + $Global::halt_exitstatus = + ::min(undef_as_zero($Global::total_failed),101); + } + } + wait_and_exit($Global::halt_exitstatus); + } else { + if($Global::semaphore) { + # --semaphore runs a single job: + # Use exit value of that + wait_and_exit($Global::halt_exitstatus); + } else { + # 0 = all jobs succeeded + # 1-100 = n jobs failed + # 101 = >100 jobs failed + wait_and_exit(min(undef_as_zero($Global::exitstatus),101)); + } + } +} + + +sub __FAST_MODE__() {} + + +sub fast() { + my $jobslots = $Global::host{':'}->user_requested_processes($opt::jobs); + my $i; + my $jobs_per_chunk; + my $buffer_len = 100; + my $splitstring = ("\n#\0\n"); + # n = jobslots + # Read 100 jobs + # Split into n chunks + # Read 3*100 jobs + # Split into n chunks + # Read 3*3*100 jobs (up to at most 300K jobs) + # Split into n chunks + # Receiver: parallel --block 1k --pipe -N1 --recend '\n#\0\n' dash + # TODO --group? --halt? --tag? $PARALLEL_JOBSLOT + # _PARALLEL_TAG="foo" + # | tagger + # _PARALLEL_EXIT + # maybe_exit + my $executer_fh; + if($opt::D eq "fast") { + open($executer_fh, "|-", "cat") || die; + } else { + open($executer_fh, "|-", "parallel --plain -j $jobslots --block 1k --pipe -N1 --recend '\\n#\\0\\n' $Global::shell") || die; + } + # Do some testing of a stuck job - dns = 5 sec + my $block_start = q{ + error() { + _exit=$((_exit+1)) + # --halt-on-error stuff + if [ $_exit -gt 40 ]; then + echo "ERROR: More than 40" + exit 1 + fi + } + + _tagger() { + { + rm -f "$1" + # This will fail for either /1 or /2 + rmdir "$2" 2>/dev/null + # TODO use awk (see fasttag) + perl -pe 's{^}{$ENV{PARALLEL_TAGSTRING}}' + } < "$1" + } + + _taggerwrap() { + _dir=`mktemp -d` + mkfifo "$_dir"/1 "$_dir"/2 + _tagger "$_dir"/1 "$_dir" >&1 & + _tagger "$_dir"/2 "$_dir" >&2 & + # ( ... ) is needed to deal correctly with 'exit 1' + ( eval $@ ; ) >"$_dir"/1 2>"$_dir"/2 || error + wait + } + PARALLEL_TAGSTRING=dummy + export PARALLEL_TAGSTRING + }; + my $block_end = "\n# Define get exit value\necho EXIT=\$_exit\n".$splitstring."\n"; + do { + $i = 0; + my @jobbuffer; + while(my $job = $Global::JobQueue->get()) { + push (@jobbuffer, "PARALLEL_TAGSTRING=".Q($job->tag()). + "\n_taggerwrap ". Q($job->replaced())); + ($i++ < $buffer_len) or last; + } + my $jobs_per_chunk = 1 + ($i / $jobslots); + my $j = 0; + print $executer_fh $block_start; + for(@jobbuffer) { + print $executer_fh $_,"\n"; + (++$j % $jobs_per_chunk) or print $executer_fh $block_end,$block_start; + } + print $executer_fh $block_end; + if($buffer_len < 100000) { $buffer_len *= 3; } + } while ($i); + close $executer_fh; + exit(0); +} + + +sub __PIPE_MODE__() {} + + +sub pipepart_setup() { + # Compute the blocksize + # Generate the commands to extract the blocks + # Push the commands on queue + # Changes: + # @Global::cat_prepends + # $Global::JobQueue + if($opt::tee) { + # Prepend each command with + # < file + my $cat_string = "< ".Q($opt::a[0]); + for(1..$Global::JobQueue->total_jobs()) { + push @Global::cat_appends, $cat_string; + push @Global::cat_prepends, ""; + } + } else { + if(not $opt::blocksize) { + # --blocksize with 10 jobs per jobslot + $opt::blocksize = -10; + } + if($opt::roundrobin) { + # --blocksize with 1 job per jobslot + $opt::blocksize = -1; + } + if($opt::blocksize < 0) { + my $size = 0; + # Compute size of -a + for(@opt::a) { + if(-f $_) { + $size += -s $_; + } elsif(-b $_) { + $size += size_of_block_dev($_); + } elsif(-e $_) { + ::error("$_ is neither a file nor a block device"); + wait_and_exit(255); + } else { + ::error("File not found: $_"); + wait_and_exit(255); + } + } + # Run in total $job_slots*(- $blocksize) jobs + # Set --blocksize = size / no of proc / (- $blocksize) + $Global::dummy_jobs = 1; + $Global::blocksize = 1 + + int($size / max_jobs_running() / + -multiply_binary_prefix($opt::blocksize)); + } + @Global::cat_prepends = (map { pipe_part_files($_) } + # ::: are put into files and given a filehandle + # ignore these and only keep the filenames. + grep { ! fileno $_ } @opt::a); + # Unget the empty arg as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget( + map { [Arg->new("\0noarg")] } @Global::cat_prepends + ); + } +} + +sub pipe_tee_setup() { + # Create temporary fifos + # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background + # This will spread the input to fifos + # Generate commands that reads from fifo1..N: + # cat fifo | user_command + # Changes: + # @Global::cat_prepends + my @fifos; + for(1..$Global::JobQueue->total_jobs()) { + push @fifos, tmpfifo(); + } + # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null + if(not fork()){ + # Test if tee supports --output-error=warn-nopipe + `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`; + my $opt = $? ? "" : "--output-error=warn-nopipe"; + ::debug("init","tee $opt"); + if($opt::dryrun) { + # This is not exactly what is run, but it gives the basic idea + print "mkfifo @fifos\n"; + print "tee $opt @fifos >/dev/null &\n"; + } else { + # Let tee inherit our stdin + # and redirect stdout to null + open STDOUT, ">","/dev/null"; + if($opt) { + exec "tee", $opt, @fifos; + } else { + exec "tee", @fifos; + } + } + exit(0); + } + # For each fifo + # (rm fifo1; grep 1) < fifo1 + # (rm fifo2; grep 2) < fifo2 + # (rm fifo3; grep 3) < fifo3 + # Remove the tmpfifo as soon as it is open + @Global::cat_prepends = map { "(rm $_;" } shell_quote(@fifos); + @Global::cat_appends = map { ") < $_" } shell_quote(@fifos); +} + + +sub parcat_script() { + # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos + my $script = q'{ + use POSIX qw(:errno_h); + use IO::Select; + use strict; + use threads; + use Thread::Queue; + use Fcntl qw(:DEFAULT :flock); + + my $opened :shared; + my $q = Thread::Queue->new(); + my $okq = Thread::Queue->new(); + my @producers; + + if(not @ARGV) { + if(-t *STDIN) { + print "Usage:\n"; + print " parcat file(s)\n"; + print " cat argfile | parcat\n"; + } else { + # Read arguments from stdin + chomp(@ARGV = ); + } + } + my $files_to_open = 0; + # Default: fd = stdout + my $fd = 1; + for (@ARGV) { + # --rm = remove file when opened + /^--rm$/ and do { $opt::rm = 1; next; }; + # -1 = output to fd 1, -2 = output to fd 2 + /^-(\d+)$/ and do { $fd = $1; next; }; + push @producers, threads->create("producer", $_, $fd); + $files_to_open++; + } + + sub producer { + # Open a file/fifo, set non blocking, enqueue fileno of the file handle + my $file = shift; + my $output_fd = shift; + open(my $fh, "<", $file) || do { + print STDERR "parcat: Cannot open $file: $!\n"; + exit(1); + }; + # Remove file when it has been opened + if($opt::rm) { + unlink $file; + } + set_fh_non_blocking($fh); + $opened++; + # Pass the fileno to parent + $q->enqueue(fileno($fh),$output_fd); + # Get an OK that the $fh is opened and we can release the $fh + while(1) { + my $ok = $okq->dequeue(); + if($ok == fileno($fh)) { last; } + # Not ours - very unlikely to happen + $okq->enqueue($ok); + } + return; + } + + my $s = IO::Select->new(); + my %buffer; + + sub add_file { + my $infd = shift; + my $outfd = shift; + open(my $infh, "<&=", $infd) || die; + open(my $outfh, ">&=", $outfd) || die; + $s->add($infh); + # Tell the producer now opened here and can be released + $okq->enqueue($infd); + # Initialize the buffer + @{$buffer{$infh}{$outfd}} = (); + $Global::fh{$outfd} = $outfh; + } + + sub add_files { + # Non-blocking dequeue + my ($infd,$outfd); + do { + ($infd,$outfd) = $q->dequeue_nb(2); + if(defined($outfd)) { + add_file($infd,$outfd); + } + } while(defined($outfd)); + } + + sub add_files_block { + # Blocking dequeue + my ($infd,$outfd) = $q->dequeue(2); + add_file($infd,$outfd); + } + + + my $fd; + my (@ready,$infh,$rv,$buf); + do { + # Wait until at least one file is opened + add_files_block(); + while($q->pending or keys %buffer) { + add_files(); + while(keys %buffer) { + @ready = $s->can_read(0.01); + if(not @ready) { + add_files(); + } + for $infh (@ready) { + # There is only one key, namely the output file descriptor + for my $outfd (keys %{$buffer{$infh}}) { + # TODO test if 60800 is optimal (2^17 is used elsewhere) + $rv = sysread($infh, $buf, 60800); + if (!$rv) { + if($! == EAGAIN) { + # Would block: Nothing read + next; + } else { + # Nothing read, but would not block: + # This file is done + $s->remove($infh); + for(@{$buffer{$infh}{$outfd}}) { + syswrite($Global::fh{$outfd},$_); + } + delete $buffer{$infh}; + # Closing the $infh causes it to block + # close $infh; + add_files(); + next; + } + } + # Something read. + # Find \n or \r for full line + my $i = (rindex($buf,"\n")+1); + if($i) { + # Print full line + for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) { + syswrite($Global::fh{$outfd},$_); + } + # @buffer = remaining half line + $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)]; + } else { + # Something read, but not a full line + push @{$buffer{$infh}{$outfd}}, $buf; + } + redo; + } + } + } + } + } while($opened < $files_to_open); + + for (@producers) { + $_->join(); + } + + sub set_fh_non_blocking { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + my $flags; + fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &O_NONBLOCK; # Add non-blocking to the flags + fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } + }'; + return ::spacefree(3, $script); +} + +sub sharder_script() { + my $script = q{ + use B; + # Column separator + my $sep = shift; + # Which columns to shard on (count from 1) + my $col = shift; + # Which columns to shard on (count from 0) + my $col0 = $col - 1; + # Perl expression + my $perlexpr = shift; + my $bins = @ARGV; + # Open fifos for writing, fh{0..$bins} + my $t = 0; + my %fh; + for(@ARGV) { + open $fh{$t++}, ">", $_; + # open blocks until it is opened by reader + # so unlink only happens when it is ready + unlink $_; + } + if($perlexpr) { + my $subref = eval("sub { no strict; no warnings; $perlexpr }"); + while() { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + { + local $_ = $F[$col0]; + &$subref(); + $fh = $fh{ hex(B::hash($_))%$bins }; + } + print $fh $_; + } + } else { + while() { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + $fh = $fh{ hex(B::hash($F[$col0]))%$bins }; + print $fh $_; + } + } + # Close all open fifos + close values %fh; + }; + return ::spacefree(1, $script); +} + +sub binner_script() { + my $script = q{ + use B; + # Column separator + my $sep = shift; + # Which columns to shard on (count from 1) + my $col = shift; + # Which columns to shard on (count from 0) + my $col0 = $col - 1; + # Perl expression + my $perlexpr = shift; + my $bins = @ARGV; + # Open fifos for writing, fh{0..$bins} + my $t = 0; + my %fh; + # Let the last output fifo be the 0'th + open $fh{$t++}, ">", pop @ARGV; + for(@ARGV) { + open $fh{$t++}, ">", $_; + # open blocks until it is opened by reader + # so unlink only happens when it is ready + unlink $_; + } + if($perlexpr) { + my $subref = eval("sub { no strict; no warnings; $perlexpr }"); + while() { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + { + local $_ = $F[$col0]; + &$subref(); + $fh = $fh{ $_%$bins }; + } + print $fh $_; + } + } else { + while() { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + $fh = $fh{ $F[$col0]%$bins }; + print $fh $_; + } + } + # Close all open fifos + close values %fh; + }; + return ::spacefree(1, $script); +} + +sub pipe_shard_setup() { + # Create temporary fifos + # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background + # This will spread the input to fifos + # Generate commands that reads from fifo1..N: + # cat fifo | user_command + # Changes: + # @Global::cat_prepends + my @shardfifos; + my @parcatfifos; + # TODO $opt::jobs should be evaluated (100%) + # TODO $opt::jobs should be number of total_jobs if there are arguments + max_jobs_running(); + my $njobs = $Global::max_jobs_running; + for my $m (0..$njobs-1) { + for my $n (0..$njobs-1) { + # sharding to A B C D + # parcatting all As together + $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo(); + } + } + my $shardbin = ($opt::shard || $opt::bin); + my $script; + if($opt::bin) { + $script = binner_script(); + } else { + $script = sharder_script(); + } + + # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN + + if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) { + # Group by column name + # (Yes, this will also wrongly match a perlexpr like: chop) + my($read,$char,@line); + # A full line, but nothing more (the rest must be read by the child) + # $Global::header used to prepend block to each job + do { + $read = sysread(STDIN,$char,1); + push @line, $char; + } while($read and $char ne "\n"); + $Global::header = join "", @line; + } + my ($col, $perlexpr, $subref) = + column_perlexpr($shardbin, $Global::header, $opt::colsep); + if(not fork()) { + # Let the sharder inherit our stdin + # and redirect stdout to null + open STDOUT, ">","/dev/null"; + # The PERL_HASH_SEED must be the same for all sharders + # so B::hash will return the same value for any given input + $ENV{'PERL_HASH_SEED'} = $$; + exec qw(parallel -0 --block 100k -q --pipe -j), $njobs, + qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","), + $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos); + } + # For each fifo + # (rm fifo1; grep 1) < fifo1 + # (rm fifo2; grep 2) < fifo2 + # (rm fifo3; grep 3) < fifo3 + my $parcat = Q(parcat_script()); + if(not $parcat) { + ::error("'parcat' must be in path."); + ::wait_and_exit(255); + } + @Global::cat_prepends = + map { "perl -e $parcat ". + join(" ",shell_quote(@$_))." | "} @parcatfifos; +} + +sub pipe_part_files(@) { + # Given the bigfile: + # - find header and split positions + # - make commands that 'cat's the partial file + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; + my $buf = ""; + if(not -f $file and not -b $file) { + ::error("--pipepart only works on seekable files, not streams/pipes.", + "$file is not a seekable file."); + ::wait_and_exit(255); + } + + my $fh = open_or_exit("<",$file); + my $firstlinelen = 0; + if($opt::skip_first_line) { + my $newline; + # Read a full line one byte at a time + while($firstlinelen += sysread($fh,$newline,1,0)) { + $newline eq "\n" and last; + } + } + my $header = find_header(\$buf,$fh); + # find positions + my @pos = find_split_positions($file,int($Global::blocksize), + $header,$firstlinelen); + # Make @cat_prepends + my @cat_prepends = (); + for(my $i=0; $i<$#pos; $i++) { + push(@cat_prepends, + cat_partial($file, $firstlinelen, $firstlinelen+length($header), + $pos[$i], $pos[$i+1])); + } + return @cat_prepends; +} + +sub find_header($$) { + # Compute the header based on $opt::header + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $Global::blocksize + # $Global::header + # Returns: + # $header string + my ($buf_ref, $fh) = @_; + my $header = ""; + # $Global::header may be set in group_by_loop() + if($Global::header) { return $Global::header } + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(sysread($fh,$$buf_ref,int($Global::blocksize),length $$buf_ref)) { + if($$buf_ref =~ s/^($opt::header)//) { + $header = $1; + last; + } + } + } + return $header; +} + +sub find_split_positions($$$) { + # Find positions in bigfile where recend is followed by recstart + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $header = header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $header, $firstlinelen) = @_; + my $skiplen = $firstlinelen + length $header; + my $size = -s $file; + if(-b $file) { + # $file is a blockdevice + $size = size_of_block_dev($file); + } + $block = int $block; + if($opt::groupby) { + return split_positions_for_group_by($file,$size,$block, + $header,$firstlinelen); + } + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + # The optimal dd blocksize for ubuntu (AMD6376) = 2^16 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit("<",$file); + push(@pos,$skiplen); + for(my $pos = $block+$skiplen; $pos < $size; $pos += $block) { + my $buf; + if($recendrecstart eq "") { + # records ends anywhere + push(@pos,$pos); + } else { + # Seek the the block start + if(not sysseek($fh, $pos, 0)) { + ::error("Cannot seek to $pos in $file"); + edit(255); + } + while(sysread($fh,$buf,$dd_block_size,length $buf)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ m:^(.*$recend)$recstart:os) { + # Start looking for next record _after_ this match + $pos += length($1); + push(@pos,$pos); + last; + } + } else { + # If match $recend$recstart => Record position + # TODO optimize to only look at the appended + # $dd_block_size + len $recendrecstart + # TODO increase $dd_block_size to optimize for longer records + my $i = index64(\$buf,$recendrecstart); + if($i != -1) { + # Start looking for next record _after_ this match + $pos += $i + length($recend); + push(@pos,$pos); + last; + } + } + } + } + } + if($pos[$#pos] != $size) { + # Last splitpoint was not at end of the file: add $size as the last + push @pos, $size; + } + close $fh; + return @pos; +} + +sub split_positions_for_group_by($$$$) { + my($fh); + my %value; + sub value_at($) { + my $pos = shift; + if(not defined $value{$pos}) { + if($pos != 0) { + seek($fh, $pos-1, 0) || die; + # Read half line + <$fh>; + } + # Read full line + my $linepos = tell($fh); + if(not defined $value{$linepos}) { + $_ = <$fh>; + if(defined $_) { + # Not end of file + my @F; + if(defined $group_by::col) { + $opt::colsep ||= "\t"; + @F = split /$opt::colsep/, $_; + $_ = $F[$group_by::col]; + } + eval $group_by::perlexpr; + } + $value{$linepos} = [$_,$linepos]; + } + $value{$pos} = $value{$linepos}; + } + return (@{$value{$pos}}); + } + + sub binary_search_end($$$) { + my ($s,$spos,$epos) = @_; + # value_at($spos) == $s + # value_at($epos) != $s + my $posdif = $epos - $spos; + my ($v,$vpos); + while($posdif) { + ($v,$vpos) = value_at($spos+$posdif); + if($v eq $s) { + $spos = $vpos; + $posdif = $epos - $spos; + } else { + $epos = $vpos; + } + $posdif = int($posdif/2); + } + return($v,$vpos); + } + + sub binary_search_start($$$) { + my ($s,$spos,$epos) = @_; + # value_at($spos) != $s + # value_at($epos) == $s + my $posdif = $epos - $spos; + my ($v,$vpos); + while($posdif) { + ($v,$vpos) = value_at($spos+$posdif); + if($v eq $s) { + $epos = $vpos; + } else { + $spos = $vpos; + $posdif = $epos - $spos; + } + $posdif = int($posdif/2); + } + return($v,$vpos); + } + + my ($file,$size,$block,$header,$firstlinelen) = @_; + my @pos; + $fh = open_or_exit("<",$file); + # Set $Global::group_by_column $Global::group_by_perlexpr + group_by_loop($fh,$opt::recsep); + if($opt::max_args) { + # Split after n values + my ($a,$apos); + # $xpos = linestart, $x = value at $xpos + $apos = $firstlinelen + length $header; + for(($a,$apos) = value_at($apos); $apos < $size;) { + push @pos, $apos; + ($a,$apos) = binary_search_end($a,$apos,$size); + if(eof($fh)) { + push @pos, $size; last; + } + } + # @pos = start of every value + # Merge n values + # -nX = keep every X'th position + my $i = 0; + @pos = grep { not ($i++ % $opt::max_args) } @pos; + } else { + # Split after any value group + # Preferable < $blocksize + my ($a,$b,$c,$apos,$bpos,$cpos); + # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos + $apos = $firstlinelen + length $header; + for(($a,$apos) = value_at($apos); $apos < $size;) { + push @pos, $apos; + $bpos = $apos + $block; + ($b,$bpos) = value_at($bpos); + if(eof($fh)) { + # EOF is less than 1 block away + push @pos, $size; last; + } + $cpos = $bpos + $block; + ($c,$cpos) = value_at($cpos); + if($a eq $b) { + while($b eq $c) { + # Move bpos, cpos a block forward until $a == $b != $c + $bpos = $cpos; + $cpos += $block; + ($c,$cpos) = value_at($cpos); + if($cpos >= $size) { + $cpos = $size; + last; + } + } + # $a == $b != $c + # Binary search for $b ending between ($bpos,$cpos) + ($b,$bpos) = binary_search_end($b,$bpos,$cpos); + } else { + if($b eq $c) { + # $a != $b == $c + # Binary search for $b starting between ($apos,$bpos) + ($b,$bpos) = binary_search_start($b,$apos,$bpos); + } else { + # $a != $b != $c + # Binary search for $b ending between ($bpos,$cpos) + ($b,$bpos) = binary_search_end($b,$bpos,$cpos); + } + } + ($a,$apos) = ($b,$bpos); + } + } + if($pos[$#pos] != $size) { + # Last splitpoint was not at end of the file: add it + push @pos, $size; + } + return @pos; +} + +sub cat_partial($@) { + # Efficient command to copy from byte X to byte Y + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert (start,end) to (start,len) + my @start_len = map { + if(++$i % 2) { $start = $_; } else { $_-$start } + } @start_end; + # The optimal block size differs + # It has been measured on: + # AMD 6376: n*4k-1; small n + # AMD Neo N36L: 44k-200k + # Intel i7-3632QM: 55k- + # ARM Cortex A53: 4k-28k + # Intel i5-2410M: 36k-46k + # + # I choose 2^15-1 = 32767 + # q{ + # expseq() { + # perl -E ' + # $last = pop @ARGV; + # $first = shift || 1; + # $inc = shift || 1.03; + # for($i=$first; $i<=$last;$i*=$inc) { say int $i } + # ' "$@" + # } + # + # seq 111111111 > big; + # f() { ppar --test $1 -a big --pipepart --block -1 'md5sum > /dev/null'; } + # export -f f; + # expseq 1000 1.001 300000 | shuf | parallel -j1 --jl jl-md5sum f; + # }; + my $script = spacefree + (0, + q{ + while(@ARGV) { + sysseek(STDIN,shift,0) || die; + $left = shift; + while($read = + sysread(STDIN,$buf, $left > 32767 ? 32767 : $left)){ + $left -= $read; + syswrite(STDOUT,$buf); + } + } + }); + return "<". Q($file) . + " perl -e '$script' @start_len |"; +} + +sub column_perlexpr($$$) { + # Compute the column number (if any), perlexpression from combined + # string (such as --shard key, --groupby key, {=n perlexpr=} + # Input: + # $column_perlexpr = string with column and perl expression + # $header = header from input file (if column is column name) + # $colsep = column separator regexp + # Returns: + # $col = column number + # $perlexpr = perl expression + # $subref = compiled perl expression as sub reference + my ($column_perlexpr, $header, $colsep) = @_; + my ($col, $perlexpr, $subref); + if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) { + # Column name/number (possibly prefix) + if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) { + # Column number (possibly prefix) + $col = $1; + } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) { + # Column name (possibly prefix) + my $colname = $1; + # Split on --copsep pattern + my @headers = split /$colsep/, $header; + my %headers; + @headers{@headers} = (1..($#headers+1)); + $col = $headers{$colname}; + if(not defined $col) { + ::error("Column '$colname' $colsep not found in header",keys %headers); + ::wait_and_exit(255); + } + } + } + # What is left of $column_perlexpr is $perlexpr (possibly empty) + $perlexpr = $column_perlexpr; + $subref = eval("sub { no strict; no warnings; $perlexpr }"); + return($col, $perlexpr, $subref); +} + +sub group_by_loop($$) { + # Generate perl code for group-by loop + # Insert a $recsep when the column value changes + # The column value can be computed with $perlexpr + my($fh,$recsep) = @_; + my $groupby = $opt::groupby; + if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) { + # Group by column name + # (Yes, this will also wrongly match a perlexpr like: chop) + my($read,$char,@line); + # Read a full line, but nothing more + # (the rest must be read by the child) + # $Global::header used to prepend block to each job + do { + $read = sysread($fh,$char,1); + push @line, $char; + } while($read and $char ne "\n"); + $Global::header = join "", @line; + } + $opt::colsep ||= "\t"; + ($group_by::col, $group_by::perlexpr, $group_by::subref) = + column_perlexpr($groupby, $Global::header, $opt::colsep); + # Numbered 0..n-1 due to being used by $F[n] + if($group_by::col) { $group_by::col--; } + + my $loop = ::spacefree(0,q{ + BEGIN{ $last = "RECSEP"; } + { + local $_=COLVALUE; + PERLEXPR; + if(($last) ne $_) { + print "RECSEP"; + $last = $_; + } + } + }); + if(defined $group_by::col) { + $loop =~ s/COLVALUE/\$F[$group_by::col]/g; + } else { + $loop =~ s/COLVALUE/\$_/g; + } + $loop =~ s/PERLEXPR/$group_by::perlexpr/g; + $loop =~ s/RECSEP/$recsep/g; + return $loop; +} + +sub pipe_group_by_setup() { + # Record separator with 119 bit random value + $opt::recend = ''; + $opt::recstart = + join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); + $opt::remove_rec_sep = 1; + my @filter; + push @filter, "perl"; + if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) { + # This is column number/name + # Use -a (auto-split) + push @filter, "-a"; + $opt::colsep ||= "\t"; + my $sep = $opt::colsep; + $sep =~ s/\t/\\t/g; + $sep =~ s/\"/\\"/g; + # man perlrun: -Fpattern [...] You can't use literal whitespace + $sep =~ s/ /\\040/g; + push @filter, "-F$sep"; + } + push @filter, "-pe"; + push @filter, group_by_loop(*STDIN,$opt::recstart); + ::debug("init", "@filter\n"); + open(STDIN, '-|', @filter) || die ("Cannot start @filter"); + if(which("mbuffer")) { + # You get a speed up of 30% by going through mbuffer + open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") || + die ("Cannot start mbuffer"); + } +} + +sub spreadstdin() { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $Global::blocksize + # STDIN + # $opt::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + # Returns: N/A + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $two_gb = 2**31-1; + my $blocksize = int($Global::blocksize); + my $in = *STDIN; + my $timeout = $Global::blocktimeout; + my @parts; + my $everything_read; + + if($opt::skip_first_line) { + my $newline; + # Read a full line one byte at a time + while(sysread($in,$newline,1,0)) { + $newline eq "\n" and last; + } + } + my $header = find_header(\$buf,$in); + my $anything_written; + my $eof; + my $garbage_read; + + sub read_with_alarm($) { + my ($readsize) = @_; + my ($nread,$alarm,$read_everything); + if($readsize < 0) { + $readsize = -$readsize; + $read_everything = 1; + } + # possibly interrupted by --blocktimeout + eval { + local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required + # --blocktimeout (or 0 if not set) + alarm $timeout; + if($] >= 5.026) { + do { + $nread = sysread $in, $buf, $readsize, length $buf; + if(not $read_everything) { $readsize -= $nread; } + } while($readsize and $nread); + } else { + # Less efficient reading, but 32-bit sysread compatible + do { + $nread = sysread($in,substr($buf,length $buf,0),$readsize,0); + if(not $read_everything) { $readsize -= $nread; } + } while($readsize and $nread); + } + alarm 0; + }; + if ($@) { + die unless $@ eq "alarm\n"; # propagate unexpected errors + $alarm = 1; + } else { + $alarm = 0; + } + # Is this EOF? + return not ($nread or $alarm); + } + + sub read_block_per_jobslot() { + if(not $everything_read) { + # Read everything in readsize of 2^17 + # 21 = 20s + # 20 = 16s + # 19 = 16s,15s + # 18 = 15s + # 17 = 14s + # 16 = 14s + # 15 = 14s + $eof = read_with_alarm(-2**17); + if($eof) { + $everything_read = 1; + # Trick the rest of the code to think we are not done reading blocks yet. + $eof = 0; + # Chop into parts + my $total_size = length($buf); + my $jobslots = $Global::max_jobs_running; + my $parts = -$Global::blocksize * $jobslots; + my $part_size = int($total_size / $parts); + for my $i (0 .. $parts - 1) { + my $start = $i * $part_size; + my $end = ($i == $parts - 1) ? $total_size : $start + $part_size; + # Extract the chunk from buffer + push @parts, substr($buf, $start, $end - $start); + } + $buf=""; + } else { + # What do we do here? Caused by alarm. + ::die_bug("Read block from pipe failed"); + } + } + if(@parts) { + # pop part and return that + $buf .= shift @parts; + } else { + # All parts are done: Let the rest of the code know we are EOF + $eof = 1; + } + } + + sub read_block() { + # Read a --blocksize from STDIN + if($Global::blocksize < 0) { + read_block_per_jobslot(); + } else { + # Add up to the next full block + my $readsize = $blocksize - (length $buf) % $blocksize; + $eof = read_with_alarm($readsize); + } + } + + sub pass_n_line_records() { + # Pass records of N lines + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex64(\$buf,"\n"); + # Go backwards until there are full n-line records + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } + + sub pass_n_regexps() { + # Pass records of N regexps + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + if(not $garbage_read) { + $garbage_read = 1; + if($buf !~ /^$recstart/o) { + # Buf does not start with $recstart => There is garbage. + # Make a single record of the garbage + if($buf =~ + /(?s:^)( + (?:(?:(?!$recend$recstart)(?s:.))*?$recend) + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + } + + my $n_records = + $Global::max_number_of_args * ($Global::max_lines || 1); + # (?!negative lookahead) is needed to avoid backtracking + # See: https://unix.stackexchange.com/questions/439356/ + # (?s:.) = (.|[\n]) but faster + while($buf =~ + /(?s:^)( + # n more times recstart.*recend + (?:$recstart(?:(?!$recend$recstart)(?s:.))*?$recend){$n_records} + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + + sub pass_regexp() { + # Find the last recend-recstart in $buf + $eof and return; + # (?s:.) = (.|[\n]) but faster + if($buf =~ /^((?s:.)*$recend)$recstart(?s:.)*?$/mox) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + + sub pass_csv_record() { + # Pass CVS record + # We define a CSV record as an even number of " + end of line + # This works if you use " as quoting character + my $last_newline_pos = length $buf; + # Go backwards from the last \n and search for a position + # where there is an even number of " + do { + # find last EOL + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + # While uneven " + } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2 + and $last_newline_pos >= 0); + # Chop at $last_newline_pos as that is where CSV record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } + + sub pass_n() { + # Pass n records of --recend/--recstart + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = + $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1 + and + length $buf) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + + sub pass() { + # Pass records of --recend/--recstart + # Split record at fixed string + # Find the last recend+recstart in $buf + $eof and return; + my $i = rindex64(\$buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + + sub increase_blocksize_maybe() { + if(not $anything_written + and not $opt::blocktimeout + and not $Global::no_autoexpand_block) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) + if($blocksize < $two_gb) { + my $old_blocksize = $blocksize; + $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize."); + } + } + } + + while(1) { + $anything_written = 0; + read_block(); + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + if($eof) { + last; + } else { + next; + } + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Pass n-line records + pass_n_line_records(); + } elsif($opt::csv) { + # Pass a full CSV record + pass_csv_record(); + } elsif($opt::regexp) { + # Split record at regexp + if($Global::max_number_of_args) { + pass_n_regexps(); + } else { + pass_regexp(); + } + } else { + # Pass normal --recend/--recstart record + if($Global::max_number_of_args) { + pass_n(); + } else { + pass(); + } + } + $eof and last; + increase_blocksize_maybe(); + ::debug("init", "Round\n"); + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart, + $recend, length $buf); + + if($opt::retries) { + $Global::no_more_input = 1; + # We need to start no more jobs: At most we need to retry some + # of the already running. + my @running = values %Global::running; + # Stop any virgins. + for my $job (@running) { + if(defined $job and $job->virgin()) { + close $job->fh(0,"w"); + } + } + # Wait for running jobs to be done + my $sleep = 1; + while($Global::total_running > 0) { + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + } + } + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + # Flush blocks to roundrobin procs + my $sleep = 1; + while(%Global::running) { + my $something_written = 0; + for my $job (values %Global::running) { + if($job->block_length()) { + $something_written += $job->non_blocking_write(); + } else { + close $job->fh(0,"w"); + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend() { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { + # If both --recstart and --recend is given then both must match + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { + # If --recstart is given it must match start of record + $recstart = $opt::recstart; + $recend = ""; + } elsif(defined($opt::recend)) { + # If --recend is given then it must match end of record + $recstart = ""; + $recend = $opt::recend; + if($opt::regexp and $recend eq '') { + # --regexp --recend '' + $recend = '(?s:.)'; + } + } + + if($opt::regexp) { + # Do not allow /x comments - to avoid having to quote space + $recstart = "(?-x:".$recstart.")"; + $recend = "(?-x:".$recend.")"; + # If $recstart/$recend contains '|' + # the | should only apply to the regexp + $recstart = "(?:".$recstart.")"; + $recend = "(?:".$recend.")"; + } else { + # $recstart/$recend = printf strings (\n) + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + } + return ($recstart,$recend); +} + +sub nindex($$) { + # See if string is in buffer N times + # Returns: + # the position where the Nth copy is found + my ($buf_ref, $str, $n) = @_; + my $i = 0; + for(1..$n) { + $i = index64($buf_ref,$str,$i+1); + if($i == -1) { last } + } + return $i; +} + +{ + my @robin_queue; + my $sleep = 1; + + sub round_robin_write($$$$$) { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + # Returns: + # $something_written = amount of bytes written + my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; + my $written = 0; + my $block_passed = 0; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + if(@robin_queue) { + # Rotate queue once so new blocks get a fair chance + # to be given to another slot + push @robin_queue, shift @robin_queue; + } else { + # Make a queue to spread the blocks evenly + push @robin_queue, (sort { $a->seq() <=> $b->seq() } + values %Global::running); + } + do { + $written = 0; + for my $job (@robin_queue) { + if($job->block_length() > 0) { + $written += $job->non_blocking_write(); + } else { + $job->set_block($header_ref, $buffer_ref, + $endpos, $recstart, $recend); + $block_passed = 1; + $written += $job->non_blocking_write(); + last; + } + } + if($written) { + $sleep = $sleep/1.5+0.001; + } + # Don't sleep if something is written + } while($written and not $block_passed); + $sleep = ::reap_usleep($sleep); + } + return $written; + } +} + +sub index64($$$) { + # Do index on strings > 2GB. + # index in Perl < v5.22 does not work for > 2GB + # Input: + # as index except STR which must be passed as a reference + # Output: + # as index + my $ref = shift; + my $match = shift; + my $pos = shift || 0; + my $max2gb = 2**31-1; + my $strlen = length($$ref); + # No point in doing extra work if we don't need to. + if($strlen < $max2gb or $] > 5.022) { + return index($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos; + while($offset < $strlen) { + $ret = index( + substr($$ref, $offset, $max2gb), + $match, $pos-$offset); + if($ret != -1) { + return $ret + $offset; + } + $offset += ($max2gb - $matchlen - 1); + } + return -1; +} + +sub rindex64($@) { + # Do rindex on strings > 2GB. + # rindex in Perl < v5.22 does not work for > 2GB + # Input: + # as rindex except STR which must be passed as a reference + # Output: + # as rindex + my $ref = shift; + my $match = shift; + my $pos = shift; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # Default: search from end + $pos = defined $pos ? $pos : $strlen; + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return rindex($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos - $block_size + $matchlen; + if($offset < 0) { + # The offset is less than a $block_size + # Set the $offset to 0 and + # Adjust block_size accordingly + $block_size = $block_size + $offset; + $offset = 0; + } + while($offset >= 0) { + $ret = rindex( + substr($$ref, $offset, $block_size), + $match); + if($ret != -1) { + return $ret + $offset; + } + $offset -= ($block_size - $matchlen - 1); + } + return -1; +} + +sub shorten($$) { + # Do: substr($buf,0,$i) = ""; + # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks + # Input: + # $buf_ref = \$buf + # $i = position to shorten to + # Returns: N/A + my ($buf_ref, $i) = @_; + my $two_gb = 2**31-1; + while($i > $two_gb) { + substr($$buf_ref,0,$two_gb) = ""; + $i -= $two_gb; + } + substr($$buf_ref,0,$i) = ""; +} + +sub write_record_to_pipe($$$$$$) { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $buffer_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $buffer_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number, $header_ref, $buffer_ref, + $recstart, $recend, $endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + # Write the block to one of the already running jobs + return round_robin_write($header_ref, $buffer_ref, + $recstart, $recend, $endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login + # or retrying failed jobs. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend); + $job->write_block(); + return 1; +} + + +sub __SEM_MODE__() {} + + +sub acquire_semaphore() { + # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host + # Returns: + # The semaphore to be released when jobs is complete + $Global::host{':'} = SSHLogin->new(":"); + my $sem = Semaphore->new($Semaphore::name, + $Global::host{':'}->max_jobs_running()); + $sem->acquire(); + if($Semaphore::fg) { + # skip + } else { + if(fork()) { + exit(0); + } else { + # If run in the background, the PID will change + $sem->pid_change(); + } + } + return $sem; +} + + +sub __PARSE_OPTIONS__() {} + +sub shell_completion() { + if($opt::shellcompletion eq "zsh") { + # if shell == zsh + zsh_competion(); + } elsif($opt::shellcompletion eq "bash") { + # if shell == bash + bash_competion(); + } elsif($opt::shellcompletion eq "auto") { + if($Global::shell =~ m:/zsh$|^zsh$:) { + # if shell == zsh + zsh_competion(); + } elsif($Global::shell =~ m:/bash$|^bash$:) { + # if shell == bash + bash_competion(); + } else { + ::error("--shellcompletion is not implemented for ". + "'$Global::shell'."); + wait_and_exit(255); + } + } else { + ::error("--shellcompletion is not implemented for ". + "'$opt::shellcompletion'."); + wait_and_exit(255); + } +} + +sub bash_competion() { + # Print: + # complete -F _comp_parallel parallel; + # _comp_parallel() { + # COMPREPLY=($(compgen -W "--options" -- + # "${COMP_WORDS[$COMP_CWORD]}")); + # }; + my @bash_completion = + ("complete -F _comp_parallel parallel;", + '_comp_parallel() { COMPREPLY=($(compgen -W "'); + my @och = options_completion_hash(); + while(@och) { + $_ = shift @och; + # Split input like: + # "joblog|jl=s[Logfile for executed jobs]:logfile:_files" + if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) { + my $opt = $1; + my $desc = $2; + my $argdesc = $3; + my $func = $4; + # opt=s => opt + $opt =~ s/[:=].$//; + if($opt =~ /^_/) { + # internal options start with --_ + # skip + } else { + push @bash_completion, + (map { (length $_ == 1) ? "-$_ " : "--$_ " } + split /\|/, $opt); + } + } + shift @och; + } + push @bash_completion,'" -- "${COMP_WORDS[$COMP_CWORD]}")); };'."\n"; + print @bash_completion; +} + +sub zsh_competion() { + # Print code used for completion in zsh + my @zsh_completion = + ("compdef _comp_parallel parallel; ", + "setopt localoptions extended_glob; ", + "_comp_parallel() { ", + "_arguments "); + my @och = options_completion_hash(); + while(@och) { + $_ = shift @och; + # Split input like: + # "joblog|jl=s[Logfile for executed jobs]:logfile:_files" + if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) { + my $opt = $1; + my $desc = $2; + my $argdesc = $3; + my $func = $4; + # opt=s => opt + $opt =~ s/[:=].$//; + if($opt =~ /^_/) { + # internal options start with --_ + # skip + } else { + # {-o,--option} + my $zsh_opt = join(",", + (map { (length $_ == 1) ? "-$_" : "--$_" } + split /\|/, $opt)); + if($zsh_opt =~ /,/) { $zsh_opt = "{$zsh_opt}"; } + $desc =~ s/'/'"'"'/g; + $argdesc =~ s/'/'"'"'/g; + $func =~ s/'/'"'"'/g; + push @zsh_completion, $zsh_opt."'".$desc.$argdesc.$func."' "; + } + } + shift @och; + } + push @zsh_completion, + q{'(-)1:command:{_command_names -e}' }, + q{'*::arguments:_normal'}, + "};\n"; + print @zsh_completion; +} + +sub options_hash() { + # Returns: + # %hash = for GetOptions + my %och = options_completion_hash(); + my %oh; + my ($k,$v); + while(($k,$v) = each %och) { + # Remove description + $k =~ s/\[.*//; + $oh{$k} = $v; + } + return %oh; +} + +sub options_completion_hash() { + # Returns: + # %hash = for GetOptions and shell completion + return + ("debug|D=s" => \$opt::D, + "xargs[Insert as many arguments as the command line length permits]" + => \$opt::xargs, + "m[Multiple arguments]" => \$opt::m, + ("X[Insert as many arguments with context as the command line ". + "length permits]" + => \$opt::X), + "v[Verbose]" => \@opt::v, + "sql=s[Use --sql-master instead (obsolete)]:DBURL" => \$opt::retired, + ("sql-master|sqlmaster=s". + "[Submit jobs via SQL server. DBURL must point to a table, which ". + "will contain --joblog, the values, and output]:DBURL" + => \$opt::sqlmaster), + ("sql-worker|sqlworker=s". + "[Execute jobs via SQL server. Read the input sources variables ". + "from the table pointed to by DBURL.]:DBURL" + => \$opt::sqlworker), + ("sql-and-worker|sqlandworker=s". + "[--sql-master DBURL --sql-worker DBURL]:DBURL" + => \$opt::sqlandworker), + ("joblog|jl=s[Logfile for executed jobs]:logfile:_files" + => \$opt::joblog), + ("results|result|res=s[Save the output into files]:name:_files" + => \$opt::results), + "resume[Resumes from the last unfinished job]" => \$opt::resume, + ("resume-failed|resumefailed". + "[Retry all failed and resume from the last unfinished job]" + => \$opt::resume_failed), + ("retry-failed|retryfailed[Retry all failed jobs in joblog]" + => \$opt::retry_failed), + "silent[Silent]" => \$opt::silent, + ("keep-order|keeporder|k". + "[Keep sequence of output same as the order of input]" + => \$opt::keeporder), + ("no-keep-order|nokeeporder|nok|no-k". + "[Overrides an earlier --keep-order (e.g. if set in ". + "~/.parallel/config)]" + => \$opt::nokeeporder), + "group[Group output]" => \$opt::group, + "g" => \$opt::retired, + ("ungroup|u". + "[Output is printed as soon as possible and bypasses GNU parallel ". + "internal processing]" + => \$opt::ungroup), + ("latest-line|latestline|ll". + "[Print latest line of each job]" + => \$opt::latestline), + ("line-buffer|line-buffered|linebuffer|linebuffered|lb". + "[Buffer output on line basis]" + => \$opt::linebuffer), + ("tmux". + "[Use tmux for output. Start a tmux session and run each job in a ". + "window in that session. No other output will be produced]" + => \$opt::tmux), + ("tmux-pane|tmuxpane". + "[Use tmux for output but put output into panes in the first ". + "window. Useful if you want to monitor the progress of less than ". + "100 concurrent jobs]" + => \$opt::tmuxpane), + "null|0[Use NUL as delimiter]" => \$opt::null, + "quote|q[Quote command]" => \$opt::quote, + # Replacement strings + ("parens=s[Use parensstring instead of {==}]:parensstring" + => \$opt::parens), + ('rpl=s[Define replacement string]:"tag perl expression"' + => \@opt::rpl), + "plus[Add more replacement strings]" => \$opt::plus, + ("I=s". + "[Use the replacement string replace-str instead of {}]:replace-str" + => \$opt::I), + ("extensionreplace|er=s". + "[Use the replacement string replace-str instead of {.} for input ". + "line without extension]:replace-str" + => \$opt::U), + "U=s" => \$opt::retired, + ("basenamereplace|bnr=s". + "[Use the replacement string replace-str instead of {/} for ". + "basename of input line]:replace-str" + => \$opt::basenamereplace), + ("dirnamereplace|dnr=s". + "[Use the replacement string replace-str instead of {//} for ". + "dirname of input line]:replace-str" + => \$opt::dirnamereplace), + ("basenameextensionreplace|bner=s". + "[Use the replacement string replace-str instead of {/.} for ". + "basename of input line without extension]:replace-str" + => \$opt::basenameextensionreplace), + ("seqreplace=s". + "[Use the replacement string replace-str instead of {#} for job ". + "sequence number]:replace-str" + => \$opt::seqreplace), + ("slotreplace=s". + "[Use the replacement string replace-str instead of {%} for job ". + "slot number]:replace-str" + => \$opt::slotreplace), + ("delay=s". + "[Delay starting next job by duration]:duration" => \$opt::delay), + ("ssh-delay|sshdelay=f". + "[Delay starting next ssh by duration]:duration" + => \$opt::sshdelay), + ("load=s". + "[Only start jobs if load is less than max-load]:max-load" + => \$opt::load), + "noswap[Do not start job is computer is swapping]" => \$opt::noswap, + ("max-line-length-allowed|maxlinelengthallowed". + "[Print maximal command line length]" + => \$opt::max_line_length_allowed), + ("number-of-cpus|numberofcpus". + "[Print the number of physical CPU cores and exit (obsolete)]" + => \$opt::number_of_cpus), + ("number-of-sockets|numberofsockets". + "[Print the number of CPU sockets and exit]" + => \$opt::number_of_sockets), + ("number-of-cores|numberofcores". + "[Print the number of physical CPU cores and exit]" + => \$opt::number_of_cores), + ("number-of-threads|numberofthreads". + "[Print the number of hyperthreaded CPU cores and exit]" + => \$opt::number_of_threads), + ("use-sockets-instead-of-threads|usesocketsinsteadofthreads". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_sockets_instead_of_threads), + ("use-cores-instead-of-threads|usecoresinsteadofthreads". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_cores_instead_of_threads), + ("use-cpus-instead-of-cores|usecpusinsteadofcores". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_cpus_instead_of_cores), + ("shell-quote|shellquote|shell_quote". + "[Does not run the command but quotes it. Useful for making ". + "quoted composed commands for GNU parallel]" + => \@opt::shellquote), + ('nice=i[Run the command at this niceness]:niceness:($(seq -20 19))' + => \$opt::nice), + "tag[Tag lines with arguments]" => \$opt::tag, + ("tag-string|tagstring=s". + "[Tag lines with a string]:str" => \$opt::tagstring), + "ctag[Color tag]:str" => \$opt::ctag, + "ctag-string|ctagstring=s[Colour tagstring]:str" => \$opt::ctagstring, + "color|colour[Colourize output]" => \$opt::color, + ("color-failed|colour-failed|colorfailed|colourfailed|". + "color-fail|colour-fail|colorfail|colourfail|cf". + "[Colour failed jobs red]" + => \$opt::colorfailed), + ("onall[Run all the jobs on all computers given with --sshlogin]" + => \$opt::onall), + "nonall[--onall with no arguments]" => \$opt::nonall, + ("filter-hosts|filterhosts|filter-host[Remove down hosts]" + => \$opt::filter_hosts), + ('sshlogin|S=s'. + '[Distribute jobs to remote computers]'. + ':[@hostgroups/][ncpus/]sshlogin'. + '[,[@hostgroups/][ncpus/]sshlogin[,...]] or @hostgroup'. + ':_users') => \@opt::sshlogin, + ("sshloginfile|slf=s". + "[File with sshlogins on separate lines. Lines starting with '#' ". + "are ignored.]:filename:_files" + => \@opt::sshloginfile), + ("controlmaster|M". + "[Use ssh's ControlMaster to make ssh connections faster]" + => \$opt::controlmaster), + ("ssh=s". + "[Use this command instead of ssh for remote access]:sshcommand" + => \$opt::ssh), + ("transfer-file|transferfile|transfer-files|transferfiles|tf=s". + "[Transfer filename to remote computers]:filename:_files" + => \@opt::transfer_files), + ("return=s[Transfer files from remote computers]:filename:_files" + => \@opt::return), + ("trc=s[--transfer --return filename --cleanup]:filename:_files" + => \@opt::trc), + "transfer[Transfer files to remote computers]" => \$opt::transfer, + "cleanup[Remove transferred files]" => \$opt::cleanup, + ("basefile|bf=s". + "[Transfer file to each sshlogin before first job is started]". + ":file:_files" + => \@opt::basefile), + ("template|tmpl=s". + "[Replace replacement strings in file and save it in repl]". + ":file=repl:_files" + => \%opt::template), + "B=s" => \$opt::retired, + "ctrl-c|ctrlc" => \$opt::retired, + "no-ctrl-c|no-ctrlc|noctrlc" => \$opt::retired, + ("work-dir|workdir|wd=s". + "[Jobs will be run in the dir mydir. (default: the current dir ". + "for the local machine, the login dir for remote computers)]". + ":mydir:_cd" + => \$opt::workdir), + "W=s" => \$opt::retired, + ("rsync-opts|rsyncopts=s[Options to pass on to rsync]:options" + => \$opt::rsync_opts), + ("tmpdir|tempdir=s[Directory for temporary files]:dirname:_cd" + => \$opt::tmpdir), + ("use-compress-program|compress-program|". + "usecompressprogram|compressprogram=s". + "[Use prg for compressing temporary files]:prg:_commands" + => \$opt::compress_program), + ("use-decompress-program|decompress-program|". + "usedecompressprogram|decompressprogram=s". + "[Use prg for decompressing temporary files]:prg:_commands" + => \$opt::decompress_program), + "compress[Compress temporary files]" => \$opt::compress, + "open-tty|o[Open terminal tty]" => \$opt::open_tty, + "tty[Open terminal tty]" => \$opt::tty, + "T" => \$opt::retired, + "H=i" => \$opt::retired, + ("dry-run|dryrun|dr". + "[Print the job to run on stdout (standard output), but do not ". + "run the job]" + => \$opt::dryrun), + "progress[Show progress of computations]" => \$opt::progress, + ("eta[Show the estimated number of seconds before finishing]" + => \$opt::eta), + "bar[Show progress as a progress bar]" => \$opt::bar, + ("total-jobs|totaljobs|total=s". + "[Set total number of jobs]" => \$opt::totaljobs), + "shuf[Shuffle jobs]" => \$opt::shuf, + ("arg-sep|argsep=s". + "[Use sep-str instead of ::: as separator string]:sep-str" + => \$opt::arg_sep), + ("arg-file-sep|argfilesep=s". + "[Use sep-str instead of :::: as separator string ". + "between command and argument files]:sep-str" + => \$opt::arg_file_sep), + ('trim=s[Trim white space in input]:trim_method:'. + '((n\:"No trim" l\:"Left\ trim" r\:"Right trim" '. + 'lr\:"Both trim" rl\:"Both trim"))' + => \$opt::trim), + "env=s[Copy environment variable var]:var:_vars" => \@opt::env, + "recordenv|record-env[Record environment]" => \$opt::record_env, + ('session'. + '[Record names in current environment in $PARALLEL_IGNORED_NAMES '. + 'and exit. Only used with env_parallel. '. + 'Aliases, functions, and variables with names i]' + => \$opt::session), + ('plain[Ignore --profile, $PARALLEL, and ~/.parallel/config]' + => \$opt::plain), + ("profile|J=s". + "[Use profile profilename for options]:profilename:_files" + => \@opt::profile), + "tollef" => \$opt::tollef, + "gnu[Behave like GNU parallel]" => \$opt::gnu, + "link|xapply[Link input sources]" => \$opt::link, + "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource, + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall-of-shame by removing + # these lines + ("bibtex|citation". + "[Print the citation notice and BibTeX entry for GNU parallel, ". + "silence citation notice for all future runs, and exit. ". + "It will not run any commands]" + => \$opt::citation), + "will-cite|willcite|nn|nonotice|no-notice" => \$opt::willcite, + # Termination and retries + ('halt-on-error|haltonerror|halt=s'. + '[When should GNU parallel terminate]'. + ':when:((now\:"kill all running jobs and halt immediately" '. + 'soon\:"wait for all running jobs to complete, start no new jobs"))' + => \$opt::halt), + 'limit=s[Dynamic job limit]:"command args"' => \$opt::limit, + ("memfree=s". + "[Minimum memory free when starting another job]:size" + => \$opt::memfree), + ("memsuspend=s". + "[Suspend jobs when there is less memory available]:size" + => \$opt::memsuspend), + "retries=s[Try failing jobs n times]:n" => \$opt::retries, + ("timeout=s". + "[Time out for command. If the command runs for longer than ". + "duration seconds it will get killed as per --term-seq]:duration" + => \$opt::timeout), + ("term-seq|termseq=s". + "[Termination sequence]:sequence" => \$opt::termseq), + # xargs-compatibility - implemented, man, testsuite + ("max-procs|maxprocs|P|jobs|j=s". + "[Add N to/Subtract N from/Multiply N% with/ the number of CPU ". + "threads or read parameter from file]:+N/-N/N%/N/procfile:_files" + => \$opt::jobs), + ("delimiter|d=s[Input items are terminated by delim]:delim" + => \$opt::d), + ("max-chars|maxchars|s=s[Limit length of command]:max-chars" + => \$opt::max_chars), + ("arg-file|argfile|a=s". + "[Use input-file as input source]:input-file:_files" => \@opt::a), + "no-run-if-empty|norunifempty|r[Do not run empty input]" => \$opt::r, + ("replace|i:s". + "[This option is deprecated; use -I instead]:replace-str" + => \$opt::i), + "E=s" => \$opt::eof, + ("eof|e:s[Set the end of file string to eof-str]:eof-str" + => \$opt::eof), + ("process-slot-var|processslotvar=s". + "[Set this variable to job slot number]:varname" + => \$opt::process_slot_var), + ("max-args|maxargs|n=s". + "[Use at most max-args arguments per command line]:max-args" + => \$opt::max_args), + ("max-replace-args|maxreplaceargs|N=s". + "[Use at most max-args arguments per command line]:max-args" + => \$opt::max_replace_args), + "col-sep|colsep|C=s[Column separator]:regexp" => \$opt::colsep, + "match=s[Matching regexp]:regexp" => \@opt::match, + "csv[Treat input as CSV-format]"=> \$opt::csv, + ("help|h[Print a summary of the options to GNU parallel and exit]" + => \$opt::help), + ("L=s[When used with --pipe: Read records of recsize]:recsize" + => \$opt::L), + ("max-lines|maxlines|l:f". + "[When used with --pipe: Read records of recsize lines]:recsize" + => \$opt::max_lines), + "interactive|p[Ask user before running a job]" => \$opt::interactive, + ("verbose|t[Print the job to be run on stderr (standard error)]" + => \$opt::verbose), + ("version|V[Print the version GNU parallel and exit]" + => \$opt::version), + ('min-version|minversion=i'. + '[Print the version GNU parallel and exit]'. + ':version:($(parallel --minversion 0))' + => \$opt::minversion), + ("show-limits|showlimits". + "[Display limits given by the operating system]" + => \$opt::show_limits), + ("exit|x[Exit if the size (see the -s option) is exceeded]" + => \$opt::x), + # Semaphore + "semaphore[Work as a counting semaphore]" => \$opt::semaphore, + ("semaphore-timeout|semaphoretimeout|st=s". + "[If secs > 0: If the semaphore is not released within secs ". + "seconds, take it anyway]:secs" + => \$opt::semaphoretimeout), + ("semaphore-name|semaphorename|id=s". + "[Use name as the name of the semaphore]:name" + => \$opt::semaphorename), + "fg[Run command in foreground]" => \$opt::fg, + "bg[Run command in background]" => \$opt::bg, + "wait[Wait for all commands to complete]" => \$opt::wait, + # Shebang #!/usr/bin/parallel --shebang + ("shebang|hashbang". + "[GNU parallel can be called as a shebang (#!) command as the ". + "first line of a script. The content of the file will be treated ". + "as inputsource]" + => \$opt::shebang), + ("_pipe-means-argfiles[Internal: Called by --shebang-wrap]" + => \$opt::_pipe_means_argfiles), + "Y" => \$opt::retired, + ("skip-first-line|skipfirstline". + "[Do not use the first line of input]" + => \$opt::skip_first_line), + "_bug" => \$opt::_bug, + "_unsafe" => \$opt::_unsafe, + # --pipe + ("pipe|spreadstdin". + "[Spread input to jobs on stdin (standard input)]" => \$opt::pipe), + ("round-robin|roundrobin|round". + "[Distribute chunks of standard input in a round robin fashion]" + => \$opt::roundrobin), + "recstart=s" => \$opt::recstart, + ("recend=s". + "[Split record between endstring and startstring]:endstring" + => \$opt::recend), + ("regexp|regex". + "[Interpret --recstart and --recend as regular expressions]" + => \$opt::regexp), + ("remove-rec-sep|removerecsep|rrs". + "[Remove record separator]" => \$opt::remove_rec_sep), + ("output-as-files|outputasfiles|files[Save output to files]" + => \$opt::files), + ("output-as-files0|outputasfiles0|files0". + "[Save output to files separated by NUL]" + => \$opt::files0), + ("block-size|blocksize|block=s". + "[Size of block in bytes to read at a time]:size" + => \$opt::blocksize), + ("block-timeout|blocktimeout|bt=s". + "[Timeout for reading block when using --pipe]:duration" + => \$opt::blocktimeout), + "header=s[Use regexp as header]:regexp" => \$opt::header, + "cat[Create a temporary file with content]" => \$opt::cat, + "fifo[Create a temporary fifo with content]" => \$opt::fifo, + ("pipe-part|pipepart[Pipe parts of a physical file]" + => \$opt::pipepart), + "tee[Pipe all data to all jobs]" => \$opt::tee, + ("shard=s". + "[Use shardexpr as shard key and shard input to the jobs]:shardexpr" + => \$opt::shard), + ("bin=s". + "[Use binexpr as binning key and bin input to the jobs]:binexpr" + => \$opt::bin), + "group-by|groupby=s[Group input by value]:val" => \$opt::groupby, + # + ("hgrp|hostgrp|hostgroup|hostgroups[Enable hostgroups on arguments]" + => \$opt::hostgroups), + "embed[Embed GNU parallel in a shell script]" => \$opt::embed, + ("filter=s[Only run jobs where filter is true]:filter" + => \@opt::filter), + "combineexec|combine-exec|combineexecutable|combine-executable=s". + "[Embed GNU parallel in a shell script]" => \$opt::combineexec, + ("filter=s[Only run jobs where filter is true]:filter" + => \@opt::filter), + "fast[Run commands fast]" => \$opt::fast, + "_parset=s[Internal: Generate shell code for parset]" => \$opt::_parset, + ("shell-completion|shellcompletion=s". + "[Generate shell code for shell completion]:shell:(bash zsh)" + => \$opt::shellcompletion), + # Parameter for testing optimal values + "_test=s" => \$opt::_test, + ); +} + +sub get_options_from_array($@) { + # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options (e.g. --profile) + # Uses: + # @ARGV + # Returns: + # true if parsing worked + # false if parsing failed + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return $retval; +} + +sub parse_parset() { + $Global::progname = "parset"; + @Global::parset_vars = split /[ ,]/, $opt::_parset; + my $var_or_assoc = shift @Global::parset_vars; + # Legal names: var _v2ar arrayentry[2] + my @illegal = (grep { not /^[a-zA-Z_][a-zA-Z_0-9]*(\[\d+\])?$/ } + @Global::parset_vars); + if(@illegal) { + ::error + ("@illegal is an invalid variable name.", + "Variable names must be letter followed by letters or digits.", + "Usage:", + " parset varname GNU Parallel options and command"); + wait_and_exit(255); + } + if($var_or_assoc eq "assoc") { + my $var = shift @Global::parset_vars; + print "$var=("; + $Global::parset = "assoc"; + $Global::parset_endstring=")\n"; + } elsif($var_or_assoc eq "var") { + if($#Global::parset_vars > 0) { + $Global::parset = "var"; + } else { + my $var = shift @Global::parset_vars; + print "$var=("; + $Global::parset = "array"; + $Global::parset_endstring=")\n"; + } + } else { + ::die_bug("parset: unknown '$opt::_parset'"); + } +} + +sub parse_options(@) { + # Returns: N/A + init_globals(); + my @argv_before = @ARGV; + @ARGV = read_options(); + # Before changing these line, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be added to a public hall-of-shame by removing the lines + if(defined $opt::citation) { + citation(\@argv_before,\@ARGV); + wait_and_exit(0); + } + # no-* overrides * + if($opt::nokeeporder) { $opt::keeporder = undef; } + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + if($opt::_bug) { ::die_bug("test-bug"); } + $Global::debug = $opt::D; + # + ## Shell + # + if($opt::fast) { + $Global::shell = $ENV{'PARALLEL_SHELL'} || which("dash") || "/bin/sh"; + } else { + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) + || $ENV{'SHELL'} || "/bin/sh"; + } + if(not -x $Global::shell and not which($Global::shell)) { + ::error("Shell '$Global::shell' not found."); + wait_and_exit(255); + } + ::debug("init","Global::shell $Global::shell\n"); + $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:; + $Global::fish = $Global::shell =~ m:(/[-a-z]*)?fish:; + if(defined $opt::_parset) { parse_parset(); } + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::null) { $/ = "\0"; } + if(defined $opt::files) { $Global::files = 1; $Global::files_sep = "\n"; } + if(defined $opt::files0) { $Global::files = 1; $Global::files_sep = "\0"; } + if(defined $opt::d) { $/ = unquote_printf($opt::d) } + parse_replacement_string_options(); + $opt::tag ||= $opt::ctag; + $opt::tagstring ||= $opt::ctagstring; + if(defined $opt::ctag or defined $opt::ctagstring + or defined $opt::color) { + $Global::color = 1; + } + if($opt::linebuffer or $opt::latestline) { + $Global::linebuffer = 1; + Job::latestline_init(); + } + if(defined $opt::tag and not defined $opt::tagstring) { + # Default = {} + $opt::tagstring = $Global::parensleft.$Global::parensright; + } + if(defined $opt::tagstring) { + $opt::tagstring = unquote_printf($opt::tagstring); + if($opt::tagstring =~ + /\Q$Global::parensleft\E.*\S+.*\Q$Global::parensright\E/ + and + $Global::linebuffer) { + # --tagstring contains {= ... =} and --linebuffer => + # recompute replacement string for each use (do not cache) + $Global::cache_replacement_eval = 0; + } + } + if(defined $opt::interactive) { $Global::interactive = $opt::interactive; } + if(defined $opt::quote) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { + $opt::max_args = multiply_binary_prefix($opt::max_args); + $Global::max_number_of_args = $opt::max_args; + if($opt::pipepart and $opt::groupby) { $Global::max_number_of_args = 1; } + } + if(defined $opt::blocktimeout) { + $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout)); + if($Global::blocktimeout < 1) { + ::error("--block-timeout must be at least 1"); + wait_and_exit(255); + } + } + if(defined $opt::timeout) { + $Global::timeoutq = TimeoutQueue->new($opt::timeout); + } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts || + $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR'; + # Default: Same nice level as GNU Parallel is started at + $opt::nice ||= eval { getpriority(0,0) } || 0; + if(defined $opt::help) { usage(); exit(0); } + if(defined $opt::shellcompletion) { shell_completion(); exit(0); } + if(defined $opt::embed) { embed(); exit(0); } + if(defined $opt::sqlandworker) { + $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker; + } + if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::csv) { + if(not $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;") { + ::error("The perl module Text::CSV is not installed."); + ::error("Try installing libtext-csv-perl or perl-Text-CSV."); + wait_and_exit(255); + } + $opt::colsep = defined $opt::colsep ? $opt::colsep : ","; + my $csv_setting = { binary => 1, sep_char => $opt::colsep }; + my $sep = $csv_setting->{sep_char}; + $Global::csv = Text::CSV->new($csv_setting) + or die "Cannot use CSV: ".Text::CSV->error_diag (); + } + if(defined $opt::header) { + $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; + } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { + $Global::arg_file_sep = $opt::arg_file_sep; + } + if(not defined $opt::process_slot_var) { + $opt::process_slot_var = 'PARALLEL_JOBSLOT0'; + } + if(defined $opt::number_of_sockets) { + print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_cpus) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_cores) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_threads) { + print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_line_length_allowed) { + print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_chars) { + $opt::max_chars = multiply_binary_prefix($opt::max_chars); + } + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if($opt::transfer) { + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + push @Global::transfer_files, @opt::transfer_files; + if(%opt::template) { + while (my ($source, $template_name) = each %opt::template) { + push @Global::template_names, $template_name; + push @Global::template_contents, slurp_or_exit($source); + } + } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M"); + if($Global::blocksize > 2**31-1 and not $opt::pipepart) { + warning("--blocksize >= 2G causes problems. Using 2G-1."); + $Global::blocksize = 2**31-1; + } + if($^O eq "cygwin" and + ($opt::pipe or $opt::pipepart or $opt::roundrobin) + and $Global::blocksize > 65535) { + warning("--blocksize >= 64K causes problems on Cygwin."); + } + $opt::memfree = multiply_binary_prefix($opt::memfree); + $opt::memsuspend = multiply_binary_prefix($opt::memsuspend); + $Global::memlimit = $opt::memsuspend + $opt::memfree; + check_invalid_option_combinations(); + if((defined $opt::fifo or defined $opt::cat) and not $opt::pipepart) { + $opt::pipe = 1; + } + if(defined $opt::minversion) { + print $Global::version,"\n"; + if($Global::version < $opt::minversion) { + wait_and_exit(255); + } else { + wait_and_exit(0); + } + } + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; + } + $Global::sshdelayauto = $opt::sshdelay =~ s/auto$//; + $opt::sshdelay = multiply_time_units($opt::sshdelay); + $Global::delayauto = $opt::delay =~ s/auto$//; + $opt::delay = multiply_time_units($opt::delay); + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + + if(defined $opt::results) { + # Is the output a dir or CSV-file? + if($opt::results =~ /\.csv$/i) { + # CSV with , as separator + $Global::csvsep = ","; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.tsv$/i) { + # CSV with TAB as separator + $Global::csvsep = "\t"; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.json$/i) { + # JSON output + $Global::jsonout ||= 1; + $Global::membuffer ||= 1; + } + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + if(($opt::results and not $Global::csvsep) or $Global::files) { + # No need for decompressing + $opt::decompress_program = "cat >/dev/null"; + } + } + if(defined $opt::dryrun) { + # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks + $opt::ungroup = 0; + $opt::group = 1; + } + if(defined $opt::nonall) { + # Append a dummy empty argument if there are no arguments + # on the command line to avoid reading from STDIN. + # arg_sep = random 50 char + # \0noarg => nothing (not the empty string) + $Global::arg_sep = join "", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50); + push @ARGV, $Global::arg_sep, "\0noarg"; + } + if(defined $opt::tee) { + if(not defined $opt::jobs) { + $opt::jobs = 0; + } + } + if(defined $opt::tty) { + # Defaults for --tty: -j1 -u + # Can be overridden with -jXXX -g + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if(not defined $opt::group) { + $opt::ungroup = 1; + } + } + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + if(not @Global::transfer_files) { + # Defaults to --transferfile {} + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + $opt::cleanup = 1; + } + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { + # -l -0 (swallowed -0) + $opt::max_lines = 1; + $opt::null = 1; + $/ = "\0"; + } else { + $opt::max_lines = multiply_binary_prefix($opt::max_lines); + if ($opt::max_lines == 0) { + # If not given (or if 0 is given) => 1 + $opt::max_lines = 1; + } + } + + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + + # Read more than one arg at a time (-L, -N) + if(defined $opt::L) { + $opt::L = multiply_binary_prefix($opt::L); + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + if(defined $opt::max_replace_args) { + $opt::max_replace_args = + multiply_binary_prefix($opt::max_replace_args); + $Global::max_number_of_args = $opt::max_replace_args; + $Global::ContextReplace = 1; + } + if((defined $opt::L or defined $opt::max_replace_args) + and + not ($opt::xargs or $opt::m)) { + $Global::ContextReplace = 1; + } + # Deal with ::: :::+ :::: ::::+ and -a +file + my @ARGV_with_argsep = @ARGV; + @ARGV = read_args_from_command_line(); + if(defined $opt::combineexec) { + pack_combined_executable(\@argv_before,\@ARGV_with_argsep,\@ARGV); + exit(0); + } + parse_semaphore(); + + if(defined $opt::eta) { $opt::progress = $opt::eta; } + if(defined $opt::bar) { $opt::progress = $opt::bar; } + if(defined $opt::bar or defined $opt::latestline) { + my $fh = $Global::status_fd || *STDERR; + # Activate decode_utf8 + eval q{ + # Enable utf8 if possible + use utf8; + binmode $fh, "encoding(utf8)"; + *decode_utf8 = \&Encode::decode_utf8; + }; + if(eval { decode_utf8("x") }) { + # Great: decode works + } else { + # UTF8-decode not supported: Dummy decode + eval q{sub decode_utf8($;$) { $_[0]; }}; + } + # Activate decode_utf8 + eval q{ + # Enable utf8 if possible + use utf8; + use Encode qw( encode_utf8 ); + use Text::CharWidth qw( mbswidth ); + use Unicode::Normalize qw( NFC NFD ); + }; + if(eval { mbswidth("ヌー平行") }) { + # Great: mbswidth works + } else { + # mbswidth not supported: Dummy mbswidth + eval q{ sub mbswidth { return length @_; } }; + } + } + + # If you want GNU Parallel to be maintained in the future you + # should keep this. + # _YOU_ will be harming free software by removing the notice. + # + # Funding a free software project is hard. GNU Parallel is no + # exception. On top of that it seems the less visible a project + # is, the harder it is to get funding. And the nature of GNU + # Parallel is that it will never be seen by "the guy with the + # checkbook", but only by the people doing the actual work. + # + # This problem has been covered by others - though no solution has + # been found: + # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer + # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/ + # + # The FAQ tells you why the citation notice exists: + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # + # If you want GNU Parallel to be maintained in the future, and not + # just wither away like so many other free software tools, you + # need to help finance the development. + # + # The citation notice is a simple way of doing so, as citations + # makes it possible to me to get a job where I can maintain GNU + # Parallel as part of the job. + # + # This means you can help financing development + # + # WITHOUT PAYING A SINGLE CENT! + # + # Before implementing the citation notice it was discussed with + # the users: + # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html + # + # Having to spend 10 seconds on running 'parallel --citation' once + # is no doubt not an ideal solution, but no one has so far come up + # with an ideal solution - neither for funding GNU Parallel nor + # other free software. + # + # If you believe you have the perfect solution, you should try it + # out, and if it works, you should post it on the email + # list. Ideas that will cost work and which have not been tested + # are, however, unlikely to be prioritized. + # + # _YOU_ will be harming free software by removing the notice. You + # accept to be added to a public hall of shame by removing the + # line. That includes you, George and Andreas. + # + # Please note that GPL version 3 gives you the right to fork GNU + # Parallel under a new name, but it does not give you the right to + # distribute modified copies with the citation notice disabled in + # a way where the software can be confused with GNU Parallel. To + # do that you need to be the owner of the GNU Parallel + # trademark. The xt:Commerce case shows this. + # + # Description of the xt:Commerce case in OLG Duesseldorf + # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx + # + # The verdict in German + # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 + # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 + # + # Other free software limiting derivates by the same name: + # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects + # https://tm.joomla.org/trademark-faq.html + # https://www.mozilla.org/en-US/foundation/trademarks/faq/ + # + # Running 'parallel --citation' one single time takes less than 10 + # seconds, and will silence the citation notice for future + # runs. If that is too much trouble for you, why not use one of + # the alternatives instead? + # See a list in: 'man parallel_alternatives' + # + # If you want GNU Parallel to be maintained in the future, you + # should keep this line: + citation_notice(); + # This is because _YOU_ actively make it harder to justify + # spending time developing GNU Parallel by removing it. + + # If you disagree, please read (especially 77-): + # https://www.fordfoundation.org/media/2976/roads-and-bridges-the-unseen-labor-behind-our-digital-infrastructure.pdf + parse_halt(); + + if($ENV{'PARALLEL_ENV'}) { + # Read environment and set $Global::parallel_env + # Must be done before is_acceptable_command_line_length() + my $penv = $ENV{'PARALLEL_ENV'}; + # unset $PARALLEL_ENV: It should not be given to children + # because it takes up a lot of env space + delete $ENV{'PARALLEL_ENV'}; + if(-e $penv) { + # This is a file/fifo: Replace envvar with content of file + $penv = slurp_or_exit($penv); + } + # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV + $penv =~ s/\001/\n/g; + if($penv =~ /\0/) { + ::warning('\0 (NUL) in environment is not supported'); + } + $Global::parallel_env = $penv; + } + + parse_sshlogin(); + if(defined $opt::show_limits) { show_limits(); } + + if(remote_hosts() and + (defined $opt::X or defined $opt::m or defined $opt::xargs)) { + # As we do not know the max line length on the remote machine + # long commands generated by xargs may fail + # If $opt::max_replace_args is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail."); + } + + if(not defined $opt::jobs) { $opt::jobs = "100%"; } + open_joblog(); + open_json_csv(); + if(defined $opt::sqlmaster or defined $opt::sqlworker) { + $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker); + } + if(defined $opt::sqlworker) { $Global::membuffer ||= 1; } + # The sqlmaster groups the arguments, so the should just read one + if(defined $opt::sqlworker and not defined $opt::sqlmaster) { + $Global::max_number_of_args = 1; + } + if(defined $Global::color or defined $opt::colorfailed) { + Job::init_color(); + } +} + +sub check_invalid_option_combinations() { + if(defined $opt::timeout and + $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) { + ::error("--timeout must be seconds or percentage."); + wait_and_exit(255); + } + if(defined $opt::fifo and defined $opt::cat) { + ::error("--fifo cannot be combined with --cat."); + ::wait_and_exit(255); + } + if(defined $opt::retries and defined $opt::roundrobin) { + ::error("--retries cannot be combined with --roundrobin."); + ::wait_and_exit(255); + } + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ". + "--max-lines, and -L."); + wait_and_exit(255); + } + if(defined $opt::group and defined $opt::ungroup) { + ::error("--group cannot be combined with --ungroup."); + ::wait_and_exit(255); + } + if(defined $opt::group and defined $opt::linebuffer) { + ::error("--group cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::ungroup and defined $opt::linebuffer) { + ::error("--ungroup cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::tollef and not defined $opt::gnu) { + ::error("--tollef has been retired.", + "Remove --tollef or use --gnu to override --tollef."); + ::wait_and_exit(255); + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.", + "-B has been retired. Use --bf.", + "-T has been retired. Use --tty.", + "-U has been retired. Use --er.", + "-W has been retired. Use --wd.", + "-Y has been retired. Use --shebang.", + "-H has been retired. Use --halt.", + "--sql has been retired. Use --sqlmaster.", + "--ctrlc has been retired.", + "--noctrlc has been retired."); + ::wait_and_exit(255); + } + if(defined $opt::groupby) { + if(not defined $opt::pipe and not defined $opt::pipepart) { + $opt::pipe = 1; + } + if(defined $opt::remove_rec_sep) { + ::error("--remove-rec-sep is not compatible with --groupby"); + ::wait_and_exit(255); + } + if(defined $opt::recstart) { + ::error("--recstart is not compatible with --groupby"); + ::wait_and_exit(255); + } + if($opt::recend ne "\n") { + ::error("--recend is not compatible with --groupby"); + ::wait_and_exit(255); + } + } + sub unsafe_warn { + # use --_unsafe to only generate a warning + if($opt::_unsafe) { ::warning(@_); } else { ::error(@_); exit(255); } + } + if(defined $opt::results) { + if($opt::nonall or $opt::onall) { + unsafe_warn("--(n)onall + --results not supported (yet)."); + } + } + sub test_safe_chars { + my $var = shift; + if($ENV{$var} =~ m{^[-a-z0-9_+,.%:/= ]*$}is) { + # OK + } else { + unsafe_warn("\$$var can only contain [-a-z0-9_+,.%:/= ]."); + } + } + if($ENV{'TMPDIR'} =~ /\n/) { + if(defined $opt::files) { + ::warning("Use --files0 when \$TMPDIR contains newline."); + } elsif($Global::cshell + and + (defined $opt::cat or defined $opt::fifo)) { + ::warning("--cat/--fifo fails under csh ". + "if \$TMPDIR contains newline."); + } + } elsif($ENV{'TMPDIR'} =~ /\177/) { + unsafe_warn("\$TMPDIR with \\177 (\177) is not supported."); + } else{ + test_safe_chars('TMPDIR'); + } + map { test_safe_chars($_); } qw(PARALLEL_HOME XDG_CONFIG_DIRS + PARALLEL_REMOTE_TMPDIR XDG_CACHE_HOME); +} + +sub init_globals() { + # Defaults: + $Global::version = 20241222; + $Global::progname = 'parallel'; + $::name = "GNU Parallel"; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + # Don't quote every part of the command line + $Global::quoting = 0; + # Quote replacement strings + $Global::quote_replace = 1; + $Global::total_completed = 0; + $Global::cache_replacement_eval = 1; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => + ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '. + '$_ = dirname($_);'), + '{/.}' => 's:.*/::; s:\.[^/.]*$::;', + '{.}' => 's:\.[^/.]*$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$:: || s:.*$::', + # a.b => b; a => '' + '{+.}' => 's:.*\.:: || s:.*$::', + # a.b.c => b.c; a.b => ''; a => '' + '{+..}' => 's:.*\.([^/.]*\.[^/.]*)$:$1: || s:.*$::', + '{+...}' => 's:.*\.([^/.]*\.[^/.]*\.[^/.]*)$:$1: || s:.*$::', + '{..}' => 's:\.[^/.]*\.[^/.]*$::', + '{...}' => 's:\.[^/.]*\.[^/.]*\.[^/.]*$::', + '{/..}' => 's:.*/::; s:\.[^/.]*\.[^/.]*$::', + '{/...}' => 's:.*/::; s:\.[^/.]*\.[^/.]*\.[^/.]*$::', + # n choose k = Binomial coefficient + '{choose_k}' => ('for $t (2..$#arg)'. + '{ if($arg[$t-1] ge $arg[$t]) { skip() } }'), + # unique values: Skip job if any args are the same + '{uniq}' => 'if(::uniq(@arg) != @arg) { skip(); }', + # {##} = number of jobs + '{##}' => '1 $_=total_jobs()', + # {0#} = 0-padded seq + '{0#}' => ('1 $f=1+int((log(total_jobs())/log(10)));'. + '$_=sprintf("%0${f}d",seq())'), + # {0%} = 0-padded jobslot + '{0%}' => ('1 $f=1+int((log($Global::max_jobs_running||1)/log(10)));'. + '$_=sprintf("%0${f}d",slot())'), + # {seq-1} = seq-1 = counting from 0 + '{seq(.*?)}' => '$_=eval q{$job->seq()}.qq{$$1}', + # {seq-1} = jobslot-1 = counting from 0 + '{slot(.*?)}' => '$_=eval q{$job->slot()}.qq{$$1}', + + ## Bash inspired replacement strings + # Bash ${a:-myval} + '{:-([^}]+?)}' => '$_ ||= $$1', + # Bash ${a:2} + '{:(\d+?)}' => 'substr($_,0,$$1) = ""', + # Bash ${a:2:3} + '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);', + # echo {#z.*z.} ::: z.z.z.foo => z.foo + # echo {##z.*z.} ::: z.z.z.foo => foo + # Bash ${a#bc} + '{#([^#}][^}]*?)}' => + '$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;', + # Bash ${a##bc} + '{##([^#}][^}]*?)}' => 's/^$$1//;', + # echo {%.z.*z} ::: foo.z.z.z => foo.z + # echo {%%.z.*z} ::: foo.z.z.z => foo + # Bash ${a%def} + '{%([^}]+?)}' => + '$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;', + # Bash ${a%%def} + '{%%([^}]+?)}' => 's/$$1$//;', + # Bash ${a/def/ghi} ${a/def/} + '{/([^#%}/]+?)/([^}]*?)}' => 's/$$1/$$2/;', + # Bash ${a/#def/ghi} ${a/#def/} + '{/#([^}]+?)/([^}]*?)}' => 's/^$$1/$$2/g;', + # Bash ${a/%def/ghi} ${a/%def/} + '{/%([^}]+?)/([^}]*?)}' => 's/$$1$/$$2/g;', + # Bash ${a//def/ghi} ${a//def/} + '{//([^}]+?)/([^}]*?)}' => 's/$$1/$$2/g;', + # Bash ${a^a} + '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;', + # Bash ${a^^a} + '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;', + # Bash ${a,A} + '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;', + # Bash ${a,,A} + '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;', + + # {slot} = $PARALLEL_JOBSLOT + '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()', + # {host} = ssh host + '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()', + # {sshlogin} = sshlogin + '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()', + # {hgrp} = hostgroups of the host + '{hgrp}' => '1 $_="\${PARALLEL_HOSTGROUPS}";uq()', + # {agrp} = hostgroups of the argument + '{agrp}' => '1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $/ = "\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + $ENV{'PARALLEL_REMOTE_TMPDIR'} ||= "/tmp"; + # bug #55398: set $OLDPWD when using --wd + $ENV{'OLDPWD'} = $ENV{'PWD'}; + if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp."); + $ENV{HOME} = "/tmp"; + } + # no warnings to allow for undefined $XDG_* + no warnings 'uninitialized'; + # If $PARALLEL_HOME is set, but does not exist, try making it. + if(defined $ENV{'PARALLEL_HOME'}) { + eval { File::Path::mkpath($ENV{'PARALLEL_HOME'}); }; + } + # $xdg_config_home is needed to make env_parallel.fish stop complaining + my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'} || $ENV{'HOME'} . "/.config"; + # Use the first config dir that exists from: + # $PARALLEL_HOME + # $XDG_CONFIG_HOME/parallel + # $(each XDG_CONFIG_DIRS)/parallel + # $HOME/.parallel + # + # Keep only dirs that exist + @Global::config_dirs = + (grep { -d $_ } + $ENV{'PARALLEL_HOME'}, + (map { "$_/parallel" } + $xdg_config_home, + split /:/, $ENV{'XDG_CONFIG_DIRS'}), + $ENV{'HOME'} . "/.parallel"); + # Use first dir as config dir + $Global::config_dir = $Global::config_dirs[0] || + $ENV{'HOME'} . "/.parallel"; + if($ENV{'PARALLEL_HOME'} =~ /./ and not -d $ENV{'PARALLEL_HOME'}) { + ::warning("\$PARALLEL_HOME ($ENV{'PARALLEL_HOME'}) does not exist."); + ::warning("Using $Global::config_dir"); + } + # Use the first cache dir that exists from: + # $PARALLEL_HOME + # $XDG_CACHE_HOME/parallel + # Keep only dirs that exist + my $xdg_cache_home = $ENV{'XDG_CACHE_HOME'} || $ENV{'HOME'} . "/.cache"; + @Global::cache_dirs = (grep { -d $_ } + $ENV{'PARALLEL_HOME'}, + $xdg_cache_home."/parallel"); + $Global::cache_dir = $Global::cache_dirs[0] || $ENV{'HOME'} . "/.parallel"; + Job::init_color(); +} + +sub parse_halt() { + # $opt::halt flavours + # Uses: + # $opt::halt + # $Global::halt_when + # $Global::halt_fail + # $Global::halt_success + # $Global::halt_pct + # $Global::halt_count + if(defined $opt::halt) { + my %halt_expansion = ( + "0" => "never", + "1" => "soon,fail=1", + "2" => "now,fail=1", + "-1" => "soon,success=1", + "-2" => "now,success=1", + ); + # Expand -2,-1,0,1,2 into long form + $opt::halt = $halt_expansion{$opt::halt} || $opt::halt; + # --halt 5% == --halt soon,fail=5% + $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/; + # Split: soon,fail=5% + my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt; + if(not grep { $when eq $_ } qw(never soon now)) { + ::error("--halt must have 'never', 'soon', or 'now'."); + ::wait_and_exit(255); + } + $Global::halt_when = $when; + if($when ne "never") { + if($fail_success eq "fail") { + $Global::halt_fail = 1; + } elsif($fail_success eq "success") { + $Global::halt_success = 1; + } elsif($fail_success eq "done") { + $Global::halt_done = 1; + } else { + ::error("--halt $when must be followed by ,success or ,fail."); + ::wait_and_exit(255); + } + if($pct_count =~ /^(\d+)%$/) { + $Global::halt_pct = $1/100; + } elsif($pct_count =~ /^(\d+)$/) { + $Global::halt_count = $1; + } else { + ::error("--halt $when,$fail_success ". + "must be followed by ,number or ,percent%."); + ::wait_and_exit(255); + } + } + } +} + +sub parse_replacement_string_options() { + # Deal with --rpl + # Uses: + # %Global::rpl + # $Global::parensleft + # $Global::parensright + # $opt::parens + # $Global::parensleft + # $Global::parensright + # $opt::plus + # %Global::plus + # $opt::I + # $opt::U + # $opt::i + # $opt::basenamereplace + # $opt::dirnamereplace + # $opt::seqreplace + # $opt::slotreplace + # $opt::basenameextensionreplace + + sub rpl($$) { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; + } + } + my $parens = "{==}"; + if(defined $opt::parens) { $parens = $opt::parens; } + my $parenslen = 0.5*length $parens; + $Global::parensleft = substr($parens,0,$parenslen); + $Global::parensright = substr($parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/\s/,$_,2; + $Global::rpl{$shorthand} = $long; + } +} + +sub parse_semaphore() { + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + # Uses: + # $opt::semaphore + # $Global::semaphore + # $opt::semaphoretimeout + # $Semaphore::timeout + # $opt::semaphorename + # $Semaphore::name + # $opt::fg + # $Semaphore::fg + # $opt::wait + # $Semaphore::wait + # $opt::bg + # @opt::a + # @Global::unget_argv + # $Global::default_simultaneous_sshlogins + # $opt::jobs + # $Global::interactive + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) { + $Global::semaphore = 1; + } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait and not $opt::sqlmaster) { + $Global::semaphore = 1; @ARGV = "true"; + } + if($Global::semaphore) { + if(@opt::a) { + # Assign the first -a to STDIN + open(STDIN,"<",shift @opt::a); + if(@opt::a) { + # We currently have no way of dealing with more -a + ::error("A semaphore cannot take input from more files\n"); + ::wait_and_exit(255); + } + } + @opt::a = ("/dev/null"); + # Append a dummy empty argument + # \0 => nothing (not the empty string) + push(@Global::unget_argv, [Arg->new("\0noarg")]); + $Semaphore::timeout = int(multiply_time_units($opt::semaphoretimeout)) + || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + local $/ = "\n"; + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive."); + ::wait_and_exit(255); + } + } +} + +sub record_env() { + # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars + # Returns: N/A + my $ignore_filename = $Global::config_dir . "/ignored_vars"; + write_or_exit($ignore_filename,map { $_,"\n" } keys %ENV); +} + +sub open_joblog() { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fh + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results."); + ::wait_and_exit(255); + } + if(defined $opt::joblog and $opt::joblog =~ s/^\+//) { + # --joblog +filename = append to filename + $append = 1; + } + if($opt::joblog + and + ($opt::sqlmaster + or + not $opt::sqlworker)) { + # Do not log if --sqlworker + if($opt::resume || $opt::resume_failed || $opt::retry_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Enable utf8 if possible + eval q{ binmode $joblog_fh, "encoding(utf8)"; }; + # Read the joblog + # Override $/ with \n because -d might be set + local $/ = "\n"; + # If there is a header: Open as append later + $append = <$joblog_fh>; + my $joblog_regexp; + if($opt::retry_failed) { + # Make a regexp that matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + my @group; + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + $group[$1-1] = "true"; + } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) { + # Grab out the command + $group[$1-1] = $3; + } else { + chomp; + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + if(@group) { + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + if(grep /\0/, @group) { + # force --null to deal with \n in commandlines + ::warning("Command lines contain newline. ". + "Forcing --null."); + $opt::null = 1; + $/ = "\0"; + } + # Replace \0 with '\n' as used in print_joblog() + print $outfh (map { s/\0/\n/g; $_,$/ } + map { $_ } @group); + seek $outfh, 0, 0; + exit_if_disk_full(); + # Set filehandle to -a + @opt::a = ($outfh); + } + # Remove $command (so -a is run) + @ARGV = (); + } + if($opt::resume || $opt::resume_failed) { + if($opt::resume_failed) { + # Make a regexp that matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + } + close $joblog_fh; + } + # $opt::null may be set if the commands contain \n + if($opt::null) { $/ = "\0"; } + } + if($opt::dryrun) { + # Do not write to joblog in a dry-run + + } elsif($append) { + # Append to joblog + $Global::joblog = open_or_exit(">>", $opt::joblog); + } else { + if($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fh{1}; + } else { + # Overwrite the joblog + $Global::joblog = open_or_exit(">", $opt::joblog); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; + } + } +} + +sub open_json_csv() { + if($opt::results) { + # Output as JSON/CSV/TSV + if($opt::results eq "-.csv" + or + $opt::results eq "-.tsv" + or + $opt::results eq "-.json") { + # Output as JSON/CSV/TSV on stdout + open $Global::csv_fh, ">&", "STDOUT" or + ::die_bug("Can't dup STDOUT in csv: $!"); + # Do not print any other output to STDOUT + # by forcing all other output to /dev/null + open my $fd, ">", "/dev/null" or + ::die_bug("Can't >/dev/null in csv: $!"); + $Global::fh{1} = $fd; + $Global::fh{2} = $fd; + } elsif($Global::csvsep or $Global::jsonout) { + $Global::csv_fh = open_or_exit(">",$opt::results); + } + } +} + +sub find_compression_program() { + # Find a fast compression program + # Returns: + # $compress_program = compress program with options + # $decompress_program = decompress program with options + + # Search for these. Sorted by speed on 128 core + + # seq 120000000|shuf > 1gb & + # apt-get update + # apt install make g++ htop + # wget -O - pi.dk/3 | bash + # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz + # git clone https://github.com/facebook/zstd.git + # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin) + # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz + # chmod +x /usr/local/bin/lrz + # wait + # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2" + # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz" + # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread + # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread + # sort -nk4 jl-? + + # 1-core: + # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip + # 4-cores: + # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip + # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 + # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip + # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip + # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip + + my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip + lrz pxz bzip2 lzma xz clzip); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + +sub read_options() { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = Q(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = Q(Q(shift @ARGV)); # TODO - test if script = " " + my @args = map{ Q($_) } @ARGV; + # exec myself to split $ARGV[0] into separate fields + exec "$0 --_pipe-means-argfiles @options @parser $script ". + "::: @args"; + } + } + if($ARGV[0] =~ / --shebang(-?wrap)? /) { + ::warning("--shebang and --shebang-wrap must be the first ". + "argument.\n"); + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + my @ARGV_orig = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from $PARALLEL_HOME/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + (map { "$_/config" } @Global::config_dirs), + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if($profile =~ m:^\./|^/:) { + # Look for ./profile in . + # Look for /profile in / + push @profiles, grep { -r $_ } $profile; + } else { + # Look for the $profile in @Global::config_dirs + push @profiles, grep { -r $_ } + map { "$_/$profile" } @Global::config_dirs; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + ::debug("init","Read $profile\n"); + local $/ = "\n"; + open (my $in_fh, "<", $profile) || + ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shell_words($_); + } + close $in_fh; + } else { + if(grep /^\Q$profile\E$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable."); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + push @ARGV_env, shell_words($ENV{'PARALLEL'}); + } + # Add options from env_parallel.csh via $PARALLEL_CSH + if($ENV{'PARALLEL_CSH'}) { + push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'}); + } + } + Getopt::Long::Configure("bundling","require_order"); + get_options_from_array(\@ARGV_profile) || die_usage(); + get_options_from_array(\@ARGV_env) || die_usage(); + get_options_from_array(\@ARGV) || die_usage(); + # What were the options given on the command line? + # Used to start --sqlworker + my $ai = arrayindex(\@ARGV_orig, \@ARGV); + @Global::options_in_argv = @ARGV_orig[0..$ai-1]; + # Prepend non-options to @ARGV (such as commands like 'nice') + unshift @ARGV, @ARGV_profile, @ARGV_env; + return @ARGV; +} + +sub arrayindex($$) { + # Similar to Perl's index function, but for arrays + # Input: + # $arr_ref1 = ref to @array1 to search in + # $arr_ref2 = ref to @array2 to search for + # Returns: + # $pos = position of @array1 in @array2, -1 if not found + my ($arr_ref1,$arr_ref2) = @_; + my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1; + my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2; + my $i = index($array1_as_string,$array2_as_string,0); + if($i == -1) { return -1 } + my @before = split /\0/, substr($array1_as_string,0,$i); + return $#before; +} + +sub read_args_from_command_line() { + # Arguments given on the command line after: + # ::: ($Global::arg_sep) + # :::: ($Global::arg_file_sep) + # :::+ ($Global::arg_sep with --link) + # ::::+ ($Global::arg_file_sep with --link) + # Removes the arguments from @ARGV and: + # - puts filenames into -a + # - puts arguments into files and add the files to -a + # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+ + # Input: + # @::ARGV = command option ::: arg arg arg :::: argfiles + # Uses: + # $Global::arg_sep + # $Global::arg_file_sep + # $opt::_pipe_means_argfiles + # $opt::pipe + # @opt::a + # Returns: + # @argv_no_argsep = @::ARGV without ::: and :::: and following args + my %group_sep = ($Global::arg_sep => ":::", + $Global::arg_sep."+" => ":::+", + $Global::arg_file_sep => "::::", + $Global::arg_file_sep."+" => "::::+"); + sub is_linked($) { + # file is linked if file starts with + + local $_ = shift; + if(/^\+(.*)/) { + my $noplus = $1; + if(-e $_ and -e $noplus) { + ::error("It is unclear whether you mean +./$noplus or ./+$noplus"); + wait_and_exit(255); + } elsif(-e $_ and not -e $noplus) { + # This is ./+file = this is not linked + return 0; + } elsif(not -e $_ and -e $noplus) { + # This is +./file = this is linked + return 1; + } elsif(not -e $_ and not -e $noplus) { + # File does not exist, maybe it is stdin? + if($_ eq "-") { + # This is - = this is not linked + return 0; + } elsif($_ eq "+-") { + # This is +- = this is linked + return 1; + } else { + ::error("File not found: $_"); + wait_and_exit(255); + } + } else { + ::die_bug("noplus: $noplus $_"); + } + } + # not linked + return 0; + } + sub cmd_template() { + # remove command template from @ARGV + # keep ::: / :::: in @ARGV if any + my @cmd_template; + while(@ARGV) { + my $arg = shift @ARGV; + if($group_sep{$arg}) { + # Found separator: push it back and exit loop + unshift @ARGV, $arg; + last; + } + push @cmd_template, $arg; + } + return @cmd_template; + } + sub divide_into_groups() { + # Split arguments from @ARGV into groups: + # ::: 1 2 3 :::: a b c ::::+ d e f + # => + # [ ::: 1 2 3 ], [ :::: a b c ], [ ::::+ d e f ] + my @g; + my @grp; + while(@ARGV) { + my $arg = shift @ARGV; + if($group_sep{$arg}) { + # start a new group + push @grp, [@g]; + @g = ($group_sep{$arg}); + } else { + push @g, $arg; + } + } + push @grp, [@g]; + shift @grp; # The first will always be empty + return @grp; + } + sub save_to_file(@) { + # Put args into a file, return open file handle of file + # Create argfile + my ($fh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $fh map { $_,$/ } @_; + seek $fh, 0, 0; + exit_if_disk_full(); + return $fh; + } + my @cmd = cmd_template(); + # The rest of @ARGV is ::: / :::: args + # If there are any -a: Rewrite them to use :::: + if(@opt::a) { unshift @ARGV, $Global::arg_file_sep, @opt::a; } + @opt::a = (); + # Convert ::: and :::: into (linked) files and put those into @opt::a + for my $g_ref (divide_into_groups()) { + my $group_sep = shift @$g_ref; + if($group_sep eq ":::" or $group_sep eq ":::+") { + # Group starts with ::: / :::+ + if($opt::_pipe_means_argfiles and $#$g_ref < 0) { + # TODO + # Deal with --shebang-wrap and ::: on the shebang line + } else { + push @opt::a, save_to_file(@$g_ref); + # if $group_sep == ":::+": it is linked + push @opt::linkinputsource, ($group_sep eq ":::+"); + } + } elsif($group_sep eq "::::" or $group_sep eq "::::+") { + # Group starts with :::: / ::::+ + for my $f (@$g_ref) { + if($group_sep eq "::::+") { + # Linking forced + push @opt::a, $f; + push @opt::linkinputsource, 1; + } elsif($group_sep eq "::::") { + # Auto detect linking + if(is_linked($f)) { + # +file + push @opt::linkinputsource, 1; + $f =~ s/^\+//; + } else { + # file (no plus) + push @opt::linkinputsource, 0; + } + push @opt::a, $f; + } else { + ::die_bug("arg link error"); + } + } + } else { + ::die_bug("arg link error"); + } + } + # Output: command to run with options + return @cmd; +} + +sub cleanup() { + # Returns: N/A + unlink keys %Global::unlink; + map { rmdir $_ } keys %Global::unlink; + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + for(keys %Global::sshmaster) { + # If 'ssh -M's are running: kill them + kill "TERM", $_; + } +} + + +sub __QUOTING_ARGUMENTS_FOR_SHELL__() {} + +sub shell_quote(@) { + # Input: + # @strings = strings to be quoted + # Returns: + # @shell_quoted_strings = string quoted as needed by the shell + return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_); +} + +sub shell_quote_scalar_rc($) { + # Quote for the rc-shell + my $a = $_[0]; + if(defined $a) { + if(($a =~ s/'/''/g) + + + ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_csh($) { + # Quote for (t)csh + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline in csh as \\\n + ($a =~ s/[\n]/"\\\n"/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_default($) { + # Quote for other shells (Bourne compatibles) + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + local $_ = $_[0]; + if(/[^-_.+a-z0-9\/]/i) { + s/'+/'"$&"'/g; # "-quote '-quotes: ''' => "'''" + $_ = "'$_'"; # '-quote entire string + s/^''//; # Remove unneeded '' at ends + s/''$//; # (faster than s/^''|''$//g) + return $_; + } elsif ($_ eq "") { + return "''"; + } else { + # No quoting needed + return $_; + } +} + +sub shell_quote_scalar($) { + # Quote the string so the shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + + # Speed optimization: Choose the correct shell_quote_scalar_* + # and call that directly from now on + no warnings 'redefine'; + if($Global::cshell) { + # (t)csh + *shell_quote_scalar = \&shell_quote_scalar_csh; + } elsif($Global::shell =~ m:(^|/)rc$:) { + # rc-shell + *shell_quote_scalar = \&shell_quote_scalar_rc; + } else { + # other shells + *shell_quote_scalar = \&shell_quote_scalar_default; + } + # The sub is now redefined. Call it + return shell_quote_scalar($_[0]); +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = shell_quote_scalar($_[0]); + no warnings 'redefine'; + *Q = \&::shell_quote_scalar; + return $ret; +} + +sub shell_quote_file($) { + # Quote the string so shell will not expand any special chars + # and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted + # Returns: + # $quoted_filename = filename quoted with \ and ./ if needed + my $a = shift; + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } + } + return Q($a); +} + +sub shell_words(@) { + # Input: + # $string = shell line + # Returns: + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); +} + +sub perl_quote_scalar($) { + # Quote the string so perl's eval will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $perl_quoted = string quoted with \ as needed by perl's eval + my $a = $_[0]; + if(defined $a) { + $a =~ s/[\\\"\$\@]/\\$&/go; + } + return $a; +} + +# -w complains about prototype +sub pQ($) { + # pQ alias for ::perl_quote_scalar + my $ret = perl_quote_scalar($_[0]); + *pQ = \&::perl_quote_scalar; + return $ret; +} + +sub unquote_printf() { + # Convert \t \n \r \xFF \000 \0 + # Inputs: + # $string = string with \t \n \r \num \0 + # Returns: + # $replaced = string with TAB NEWLINE CR NUL + $_ = shift; + s/\\t/\t/g; + s/\\n/\n/g; + s/\\r/\r/g; + # Hex: \xFF + s/\\x([0-9a-fA-F]+)/eval 'sprintf "\\x'.$1.'"'/ge; + # Octal: \007 + s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge; + # Single digit octal: \7 + s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge; + return $_; +} + + +sub __FILEHANDLES__() {} + + +sub save_stdin_stdout_stderr() { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fh + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # TODO Disabled until we have an open3 that will take n filehandles + # for my $fdno (1..61) { + # # /dev/fd/62 and above are used by bash for <(cmd) + # # Find file descriptors that are already opened (by the shell) + # Only focus on stdout+stderr for now + for my $fdno (1..2) { + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fh{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::status_fd, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + +sub enough_file_handles() { + # Check that we have enough filehandles available for starting + # another job + # Uses: + # $opt::ungroup + # %Global::fh + # Returns: + # 1 if ungrouped (thus not needing extra filehandles) + # 0 if too few filehandles + # 1 if enough filehandles + if(not $opt::ungroup) { + my %fh; + my $enough_filehandles = 1; + # perl uses 7 filehandles for something? + # open3 uses 2 extra filehandles temporarily + # We need a filehandle for each redirected file descriptor + # (normally just STDOUT and STDERR) + for my $i (1..(7+2+keys %Global::fh)) { + $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); + } + for (values %fh) { close $_; } + return $enough_filehandles; + } else { + # Ungrouped does not need extra file handles + return 1; + } +} + +sub open_or_exit($$) { + # Open a file name or exit if the file cannot be opened + # Inputs: + # $mode = read:"<" write:">" + # $file = filehandle or filename to open + # Uses: + # $Global::original_stdin + # Returns: + # $fh = file handle to opened file + my $mode = shift; + my $file = shift; + if($file eq "-") { + if($mode eq "<") { + return ($Global::original_stdin || *STDIN); + } else { + return ($Global::original_stderr || *STDERR); + } + } + if(ref $file eq "GLOB") { + # This is an open filehandle + return $file; + } + my $fh = gensym; + if(not open($fh, $mode, $file)) { + ::error("Cannot open `$file': $!"); + wait_and_exit(255); + } + return $fh; +} + +sub slurp_or_exit($) { + # Read content of a file or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # Returns: + # $content = content as scalar + my $fh = open_or_exit("<",shift); + # $/ = undef => slurp whole file + local $/; + my $content = <$fh>; + close $fh; + return $content; +} + +sub write_or_exit(@) { + # Write content to a file or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # @content = content to be written + # Returns: + # N/A + my $file = shift; + sub failed { + error("Cannot write to `$file': $!"); + wait_and_exit(255); + } + my $fh = open_or_exit(">",$file); + print($fh @_) or failed(); + close($fh) or failed(); +} + +sub set_fh_blocking($) { + # Set filehandle as blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + # Get the current flags on the filehandle + fcntl($fh, &F_GETFL, $flags) || die $!; + # Remove non-blocking from the flags + $flags &= ~&O_NONBLOCK; + # Set the flags on the filehandle + fcntl($fh, &F_SETFL, $flags) || die $!; +} + +sub set_fh_non_blocking($) { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + # Get the current flags on the filehandle + fcntl($fh, &F_GETFL, $flags) || die $!; + # Add non-blocking to the flags + $flags |= &O_NONBLOCK; + # Set the flags on the filehandle + fcntl($fh, &F_SETFL, $flags) || die $!; +} + + +sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {} + + +# Variable structure: +# +# $Global::running{$pid} = Pointer to Job-object +# @Global::virgin_jobs = Pointer to Job-object that have received no input +# $Global::host{$sshlogin} = Pointer to SSHLogin-object +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started +# $Global::max_procs_file = filename if --jobs is given a filename +# $Global::JobQueue = JobQueue object for the queue of jobs +# $Global::timeoutq = queue of times where jobs timeout +# $Global::newest_job = Job object of the most recent job started +# $Global::newest_starttime = timestamp of $Global::newest_job +# @Global::sshlogin +# $Global::minimal_command_line_length = min len supported by all sshlogins +# $Global::start_no_new_jobs = should more jobs be started? +# $Global::original_stderr = file handle for STDERR when the program started +# $Global::total_started = total number of jobs started +# $Global::joblog = filehandle of joblog +# $Global::debug = Is debugging on? +# $Global::exitstatus = status code of GNU Parallel +# $Global::quoting = quote the command to run + +sub init_run_jobs() { + # Set Global variables and progress signal handlers + # Do the copying of basefiles + # Returns: N/A + $Global::total_running = 0; + $Global::total_started = 0; + $SIG{USR1} = \&list_running_jobs; + $SIG{USR2} = \&toggle_progress; + if(@opt::basefile) { setup_basefile(); } +} + +{ + my $last_time; + my %last_mtime; + my $max_procs_file_last_mod; + + sub changed_procs_file { + # If --jobs is a file and it is modfied: + # Force recomputing of max_jobs_running for each $sshlogin + # Uses: + # $Global::max_procs_file + # %Global::host + # Returns: N/A + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + $max_procs_file_last_mod ||= 0; + if($mtime > $max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); + } + } + } + } + + sub changed_sshloginfile { + # If --slf is changed: + # reload --slf + # filter_hosts + # setup_basefile + # Uses: + # @opt::sshloginfile + # @Global::sshlogin + # %Global::host + # $opt::filter_hosts + # Returns: N/A + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run", + "--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + + sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # %Global::host + # $Global::start_no_new_jobs + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + changed_procs_file(); + changed_sshloginfile(); + } + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::delay + and + $opt::delay-0.008 > ::now()-$Global::newest_starttime) { + # It has been too short since last start + next; + } + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($opt::limit and $sshlogin->limit()) { + # Over limit + next; + } + if(($opt::memfree or $opt::memsuspend) + and + $sshlogin->memfree() < $Global::memlimit) { + # The server has not enough mem free + ::debug("mem", "Not starting job: not enough mem\n"); + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since last login + next; + } + debug("run", $sshlogin->string(), + " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", + $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + + return $jobs_started; + } +} + +{ + my $no_more_file_handles_warned; + + sub start_another_job() { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + if($job->is_already_in_joblog()) { + $job->free_slot(); + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume + and $job->is_already_in_results())); + debug("start", "Command to run on '", + $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + if($job->virgin()) { + push(@Global::virgin_jobs,$job); + } else { + # Block already set: This is a retry + $job->write_block(); + } + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + my @arg; + for my $record (@{$job->{'commandline'}{'arg_list'}}) { + push @arg, map { $_->orig() } @$record; + } + ::error("No more processes: cannot run a single job. ". + "Something is wrong at @arg."); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ". + "Decreasing number of running jobs to $max.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Try running 'parallel -j0 -N 100 --pipe parallel -j0'", + "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)", + "or increasing 'nofile' in /etc/security/limits.conf", + "or increasing /proc/sys/fs/file-max"); + debug("start", "No more file handles. "); + return 0; + } + } +} + +sub init_progress() { + # Uses: + # $opt::bar + # Returns: + # list of computers for progress output + $|=1; + if($opt::bar) { + return("",""); + } + my $progress = progress(); + my $cpu_units = $opt::use_sockets_instead_of_threads ? "CPU sockets" : + ($opt::use_cores_instead_of_threads ? "CPU cores" : "CPU threads"); + return ("\nComputers / $cpu_units / Max jobs to run\n", + $progress->{'workerlist'},"\n",$progress->{'header'}); +} + +sub drain_job_queue(@) { + # Uses: + # $opt::progress + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + my @command = @_; + my $sleep = 0.2; + my $sleepsum = 0; + do { + while($Global::total_running > 0) { + debug("init",$Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not + # closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my $progress = progress(); + ::status_no_nl("\r",$progress->{'status'}); + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Exponential back-off sleeping + $sleep = ::reap_usleep($sleep); + $sleepsum += $sleep; + if($sleepsum >= 1000) { + # At most do this every second + $sleepsum = 0; + changed_procs_file(); + changed_sshloginfile(); + start_more_jobs(); + } + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on."); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs."); + } + } + while($opt::sqlmaster and not $Global::sql->finished()) { + # SQL master + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + if($Global::start_sqlworker) { + # Start an SQL worker as we are now sure there is work to do + $Global::start_sqlworker = 0; + if(my $pid = fork()) { + $Global::unkilled_sqlworker = $pid; + } else { + # Replace --sql/--sqlandworker with --sqlworker + my @ARGV = (map { s/^--sql(andworker)?$/--sqlworker/; $_ } + @Global::options_in_argv); + # exec the --sqlworker + exec($0,@ARGV,@command); + } + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty() + or + $opt::sqlmaster and not $Global::sql->finished()); + $Global::all_jobs_done = 1; + if($opt::progress) { + my $progress = progress(); + ::status("\r".$progress->{'status'}); + } +} + +sub toggle_progress() { + # Turn on/off progress view + # Uses: + # $opt::progress + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + ::status_no_nl(init_progress()); + } +} + +{ + my $last_header; + my $eol; + + sub progress() { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started + # Returns: + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return {"workerlist" => "", "header" => "", "status" => bar()}; + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } + my $termcols = terminal_columns(); + my @workers = sort keys %Global::host; + my $workerno = 1; + my %wrk; + for my $w (@workers) { + my %i; + $i{'sshlogin'} = $w eq ":" ? "local" : $w; + $i{'no'} = $workerno++; + $i{'ncpu'} = ($Global::host{$w}->ncpus() || "-"); + $i{'jobslots'} = $Global::host{$w}->max_jobs_running(); + $i{'completed'} = ($Global::host{$w}->jobs_completed() || 0); + $i{'running'} = $Global::host{$w}->jobs_running(); + $i{'pct'} = $Global::total_started ? + (($i{'running'}+$i{'completed'})*100 / + $Global::total_started) : 0; + $i{'time'} = $i{'completed'} ? (time-$^T)/($i{'completed'}) : 0; + $wrk{$w} = \%i; + } + + my $workerlist = ""; + for my $w (@workers) { + $workerlist .= + $wrk{$w}{'no'}.":".$wrk{$w}{'sshlogin'} ." / ". + $wrk{$w}{'ncpu'}." / ". + $wrk{$w}{'jobslots'}."\n"; + } + # Force $status to select one of the below formats + $status = "c"x($termcols+1); + # Select an output format that will fit on a single line + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX%/XX.Xs s2:XX/XX/XX%/XX.Xs s3:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/". + "%of started jobs/Average seconds to complete"; + $status = $eta . join(" ",map { + sprintf("%s:%d/%d/%d%%/%.1fs ", + @{$wrk{$_}} + {'sshlogin','running','completed','pct','time'} + ); } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX%/X.Xs 2:XX/XX/XX%/X.Xs 3:XX/XX/XX%/X.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . join(" ",map { + sprintf("%s:%d/%d/%d%%/%.1fs ", + @{$wrk{$_}} + {'no','running','completed','pct','time'} + ); } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . join(" ",map { + sprintf("%s:%d/%d/%d%%", + @{$wrk{$_}} + {'sshlogin','running','completed','pct'} + ); } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . join(" ",map { + sprintf("%s:%d/%d/%d%%", + @{$wrk{$_}} + {'no','running','completed','pct'} + ); } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . join(" ", map { + sprintf("%s:%d/%d", + @{$wrk{$_}} + {'sshlogin','running','completed'} + ); } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . join(" ", map { + sprintf("%s:%d/%d", + @{$wrk{$_}} + {'no','running','completed'} + ); } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = $eta . join(" ", map { + sprintf("%s:%d", + @{$wrk{$_}} + {'sshlogin','completed'} + ); } @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = $eta . join(" ", map { + sprintf("%s:%d", + @{$wrk{$_}} + {'no','completed'} + ); } @workers); + } + if($last_header ne $header) { + $header .= "\n"; + $last_header = $header; + } else { + $header = ""; + } + if(not $eol) { + $eol = `sh -c "tput el /dev/null`; + chomp($eol); + if($eol eq "") { $eol = "\033[K"; } + } + + return {"workerlist" => $workerlist, "header" => $header, + "status" => $status.$eol}; + } +} + +{ + + my ($first_completed, $smoothed_avg_time, $last_eta); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + my $completed = $Global::total_completed; + # In rare cases with -X will $completed > total_jobs() + my $total = ::max($Global::JobQueue->total_jobs(),$completed); + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = ::min($completed / $total,100); + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + if($eta*0.90 < $last_eta and $last_eta < $eta) { + # Eta jumped less that 10% up: Keep the last eta instead + $eta = $last_eta; + } else { + $last_eta = $eta; + } + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar() { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + if($Global::all_jobs_done) { $eta = now()-$Global::start_time; } + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}-> + replace_placeholders(["\177<\177>"],0,0) : ""; + $arg = decode_utf8($arg); + my $eta_dhms = ::seconds_to_time_units($eta); + my $bar_text = + sprintf("%d%% %d:%d=%s %s", + $pctcomplete*100, $completed, $left, $eta_dhms, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + # Prefix with zenity header + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($rows,$columns,$last_update_time); + + sub compute_terminal_size() { + # && true is to force spawning a shell and not just exec'ing + my @tput = qx{ tput lines cols /dev/null && true }; + $rows = 0 + $tput[0]; + $columns = 0 + $tput[1]; + if(not ($rows && $columns)) { + # && true is to force spawning a shell and not just exec'ing + my $stty = qx{ stty -a /dev/null && true }; + # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS + # MacOSX/IRIX/AIX/Tru64 + $stty =~ /(\d+) columns/ and do { $columns = $1; }; + $stty =~ /(\d+) rows/ and do { $rows = $1; }; + # GNU/Linux/Solaris + $stty =~ /columns (\d+)/ and do { $columns = $1; }; + $stty =~ /rows (\d+)/ and do { $rows = $1; }; + # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana + $stty =~ /columns = (\d+)/ and do { $columns = $1; }; + $stty =~ /rows = (\d+)/ and do { $rows = $1; }; + # QNX + $stty =~ /rows=(\d+),(\d+)/ and do { ($rows,$columns) = ($1,$2); }; + } + if(not ($rows && $columns)) { + # && true is to force spawning a shell and not just exec'ing + my $resize = qx{ resize 2>/dev/null && true }; + $resize =~ /COLUMNS=(\d+);/ and do { $columns ||= $1; }; + $resize =~ /LINES=(\d+);/ and do { $rows ||= $1; }; + } + $rows ||= 24; + $columns ||= 80; + } + + sub update_terminal_size() { + # Only update once per second. + if($last_update_time < time) { + $last_update_time = time; + compute_terminal_size(); + # Set signal WINdow CHange to force recompute + $SIG{WINCH} = \&compute_terminal_size; + } + } + + sub terminal_rows() { + # Get the number of rows of the terminal. + # Returns: + # number of rows of the screen + update_terminal_size(); + return $rows; + } + + sub terminal_columns() { + # Get the number of columns of the terminal. + # Returns: + # number of columns of the screen + update_terminal_size(); + return $columns; + } +} + +sub untabify($) { + # Convert \t into spaces + my @out; + my ($src); + # Deal with multi-byte characters + for my $src (split("\t",$_[0])) { + push @out, $src. " "x(8-mbswidth($src)%8); + } + return join "",@out; +} + +# Prototype forwarding +sub get_job_with_sshlogin($); +sub get_job_with_sshlogin($) { + # Input: + # $sshlogin = which host should the job be run on? + # Uses: + # $opt::hostgroups + # $Global::JobQueue + # Returns: + # $job = next job object for $sshlogin if any available + my $sshlogin = shift; + my $job; + + if ($opt::hostgroups) { + my @other_hostgroup_jobs = (); + + while($job = $Global::JobQueue->get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job to be run on a hostgroup of this + # $sshlogin + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } + } + if(not $job->suspended()) { + $job->set_sshlogin($sshlogin); + } + if(defined $opt::retries and $job->failed_here()) { + # This command with these args failed for this sshlogin + my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == + grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { + # It failed the same or more times on another host: + # run it on this host + } else { + # If it failed fewer times on another host: + # Find another job to run + my $nextjob; + if(not $Global::JobQueue->empty()) { + # This can potentially recurse for all args + no warnings 'recursion'; + $nextjob = get_job_with_sshlogin($sshlogin); + } + # Push the command back on the queue + $Global::JobQueue->unget($job); + return $nextjob; + } + } + return $job; +} + + +sub __REMOTE_SSH__() {} + + +sub read_sshloginfiles(@) { + # Read a list of --slf's + # Input: + # @files = files or symbolic file names to read + # Returns: N/A + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); + } +} + +sub expand_slf_shorthand($) { + # Expand --slf shorthand into a read file name + # Input: + # $file = file or symbolic file name to read + # Returns: + # $file = actual file name to read + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $Global::config_dir."/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + for(@Global::config_dirs) { + if(not -r $_."/".$file) { + # Try prepending $PARALLEL_HOME + ::error("Cannot open $file."); + ::wait_and_exit(255); + } else { + $file = $_."/".$file; + last; + } + } + } + return $file; +} + +sub read_sshloginfile($) { + # Read sshloginfile into @Global::sshlogin + # Input: + # $file = file to read + # Uses: + # @Global::sshlogin + # Returns: N/A + local $/ = "\n"; + my $file = shift; + my $close = 1; + my $in_fh; + ::debug("init","--slf ",$file); + if($file eq "-") { + $in_fh = *STDIN; + $close = 0; + } else { + $in_fh = open_or_exit("<", $file); + } + while(<$in_fh>) { + chomp; + /^\s*#/ and next; + /^\s*$/ and next; + push @Global::sshlogin, $_; + } + if($close) { + close $in_fh; + } +} + +sub parse_sshlogin() { + # Parse @Global::sshlogin into %Global::host. + # Keep only hosts that are in one of the given ssh hostgroups. + # Uses: + # @Global::sshlogin + # $Global::minimal_command_line_length + # %Global::host + # $opt::transfer + # @opt::return + # $opt::cleanup + # @opt::basefile + # @opt::trc + # Returns: N/A + sub expand_range($) { + # Expand host[9-11,15]a[09-11]b + # [9-11,15] => 9 10 11 15 + # [09-11] => 09 10 11 + my ($in) = @_; + my ($prefix, $range, $suffix); + if(($prefix, $range, $suffix) = $in =~ /^(.*?)\[([-0-9,]*)\](.*)$/) { + my @res; + while(length $range) { + if($range =~ s/^,//) { + # skip + } elsif($range =~ s/^(\d+)-(\d+)//) { + my ($start, $end) = ($1, $2); + push @res, map { $prefix . $_ . $suffix } $start..$end; + } elsif($range =~ s/^(\d+)//) { + push @res, map { $prefix . $_ . $suffix } $1; + } else { + die "Cannot parse $in (at $range)"; + } + } + return map { expand_range($_) } @res; + } else { + return $in; + } + } + my @login; + if(not @Global::sshlogin) { @Global::sshlogin = (":"); } + for my $sshlogin (@Global::sshlogin) { + # Split up -S sshlogin,sshlogin + # Parse ,, and \, as , but do not split on that + # -S "ssh -J jump1,,jump2 host1,host2" => + # ssh -J jump1,jump2 host1 + # host2 + # Protect \, and ,, as \0 + $sshlogin =~ s/\\,|,,/\0/g; + # Protect , in ranges: [___,___] => [___\0___] + while($sshlogin =~ s/(\[[-0-9\0]*),(.*\])/$1\0$2/g) {} + for my $s (split /,|\n/, $sshlogin) { + # Replace \0 => , + $s =~ s/\0/,/g; + if ($s eq ".." or $s eq "-") { + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); + } else { + $s =~ s/\s*$//; + # Expand host[1-12,15]a[01-10]b + push @login, expand_range($s); + } + } + } + $Global::minimal_command_line_length = 100_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + $sshlogin->set_maxlength(Limits::Command::max_length()); + + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; + } + $Global::usable_command_line_length = + # Usable len = maxlen - 3000 for wrapping, div 2 for hexing + int(($Global::minimal_command_line_length - 3000)/2); + if($opt::max_chars) { + if($opt::max_chars <= $Global::usable_command_line_length) { + $Global::usable_command_line_length = $opt::max_chars; + } else { + ::warning("Value for option -s should be < ". + $Global::usable_command_line_length."."); + } + } + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if(@Global::transfer_files or @opt::return + or $opt::cleanup or @opt::basefile) { + if(not remote_hosts()) { + # There are no remote hosts + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin."); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are ". + "no remote --sshlogin."); + } elsif (@opt::transfer_files) { + ::warning("--transferfile ignored as there ". + "are no remote --sshlogin."); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin."); + } elsif (defined $opt::cleanup and not %opt::template) { + ::warning("--cleanup ignored as there ". + "are no remote --sshlogin."); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there ". + "are no remote --sshlogin."); + } + } + } +} + +sub remote_hosts() { + # Return sshlogins that are not ':' + # Uses: + # %Global::host + # Returns: + # list of sshlogins with ':' removed + return grep !/^:$/, keys %Global::host; +} + +sub setup_basefile() { + # Transfer basefiles to each $sshlogin + # This needs to be done before first jobs on $sshlogin is run + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $rsync_destdir; + my $workdir; + for my $sshlogin (values %Global::host) { + if($sshlogin->local()) { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles."); + ::wait_and_exit(255); + } + if(not $workdir) { + my $dummycmdline = + CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir); + } + } + debug("init", "basesetup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Copying of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub cleanup_basefile() { + # Remove the basefiles transferred + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $workdir; + if(not $workdir) { + my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + for my $sshlogin (values %Global::host) { + if($sshlogin->local()) { next } + for my $file (@opt::basefile) { + push @cmd, $sshlogin->cleanup_cmd($file,$workdir); + } + } + debug("init", "basecleanup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Cleanup of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub run_gnu_parallel() { + my ($stdin,@args) = @_; + my $cmd = join "",map { " $_ & " } split /\n/, $stdin; + print $Global::original_stderr ` $cmd wait` ; + return 0 +} + +sub _run_gnu_parallel() { + # Run GNU Parallel + # This should ideally just fork an internal copy + # and not start it through a shell + # Input: + # $stdin = data to provide on stdin for GNU Parallel + # @args = command line arguments + # Returns: + # $exitstatus = exitcode of GNU Parallel run + # \@stdout = standard output + # \@stderr = standard error + my ($stdin,@args) = @_; + my ($exitstatus,@stdout,@stderr); + my ($stdin_fh,$stdout_fh)=(gensym(),gensym()); + my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par"); + unlink $stderrname; + + my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh, + $0,qw(--plain --shell /bin/sh --will-cite), @args); + if(my $writerpid = fork()) { + close $stdin_fh; + @stdout = <$stdout_fh>; + # Now stdout is closed: + # These pids should be dead or die very soon + while(kill 0, $writerpid) { ::usleep(1); } + die; +# reap $writerpid; +# while(kill 0, $pid) { ::usleep(1); } +# reap $writerpid; + $exitstatus = $?; + seek $stderr_fh, 0, 0; + @stderr = <$stderr_fh>; + close $stdout_fh; + close $stderr_fh; + } else { + close $stdout_fh; + close $stderr_fh; + print $stdin_fh $stdin; + close $stdin_fh; + exit(0); + } + return ($exitstatus,\@stdout,\@stderr); +} + +sub filter_hosts() { + # Remove down --sshlogins from active duty. + # Find ncpus, ncores, maxlen, time-to-login for each host. + # Uses: + # %Global::host + # $Global::minimal_command_line_length + # $opt::use_sockets_instead_of_threads + # $opt::use_cores_instead_of_threads + # $opt::use_cpus_instead_of_cores + # Returns: N/A + + my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref, + $maxlen_ref, $echo_ref, $down_hosts_ref) = + parse_host_filtering(parallelized_host_filtering()); + + delete @Global::host{@$down_hosts_ref}; + @$down_hosts_ref and ::warning("Removed @$down_hosts_ref."); + + $Global::minimal_command_line_length = 100_000_000; + while (my ($string, $sshlogin) = each %Global::host) { + if($sshlogin->local()) { next } + my ($nsockets,$ncores,$nthreads,$time_to_login,$maxlen) = + ($nsockets_ref->{$string},$ncores_ref->{$string}, + $nthreads_ref->{$string},$time_to_login_ref->{$string}, + $maxlen_ref->{$string}); + defined $nsockets or ::die_bug("nsockets missing: $string"); + defined $ncores or ::die_bug("ncores missing: $string"); + defined $nthreads or ::die_bug("nthreads missing: $string"); + defined $time_to_login or ::die_bug("time_to_login missing: $string"); + defined $maxlen or ::die_bug("maxlen missing: $string"); + # ncpus may be set by 4/hostname or may be undefined yet + my $ncpus = $sshlogin->{'ncpus'}; + # $nthreads may be 0 if GNU Parallel is not installed remotely + $ncpus = $nthreads || $ncpus || $sshlogin->ncpus(); + if($opt::use_cpus_instead_of_cores) { + $ncpus = $ncores || $ncpus; + } elsif($opt::use_sockets_instead_of_threads) { + $ncpus = $nsockets || $ncpus; + } elsif($opt::use_cores_instead_of_threads) { + $ncpus = $ncores || $ncpus; + } + $sshlogin->set_ncpus($ncpus); + $sshlogin->set_time_to_login($time_to_login); + $maxlen = $maxlen || Limits::Command::max_length(); + $sshlogin->set_maxlength($maxlen); + ::debug("init", "Timing from -S:$string ", + " ncpus:", $ncpus, + " nsockets:",$nsockets, + " ncores:", $ncores, + " nthreads:",$nthreads, + " time_to_login:", $time_to_login, + " maxlen:", $maxlen, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub parse_host_filtering() { + # Input: + # @lines = output from parallelized_host_filtering() + # Returns: + # \%nsockets = number of sockets of {host} + # \%ncores = number of cores of {host} + # \%nthreads = number of hyperthreaded cores of {host} + # \%time_to_login = time_to_login on {host} + # \%maxlen = max command len on {host} + # \%echo = echo received from {host} + # \@down_hosts = list of hosts with no answer + local $/ = "\n"; + my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo, + @down_hosts); + for (@_) { + ::debug("init","Read: ",$_); + chomp; + my @col = split /\t/, $_; + if($col[0] =~ /^parallel: Warning:/) { + # Timed out job: Ignore it + next; + } elsif(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval .?true.?\s([^\;]+);/ or + ::die_bug("col8 does not contain host: $col[8] in $_"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") { + # exit == 255 or exit == timeout (-1): ssh failed/timedout + # exit == 1: lsh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set nsockets, ncores, nthreads = 1 + ::warning("Could not figure out ". + "number of cpus on $host. Using 1."); + $nsockets{$host} = 1; + $ncores{$host} = 1; + $nthreads{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(/parallel: Warning: Cannot figure out number of/) { + next; + } + if(/\t(perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed|Disconnected from|Received disconnect from)/ + or + /\tWarning: / + or + /\t(Host key fingerprint is|\+-.*-\+|\|.*\|)/ + or + /\t\S+: Undefined variable./ + ) { + # Skip these (from perl): + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + # Disconnected from 127.0.0.1 port 22 + # + # Skip these (from ssh): + # Warning: Permanently added * to the list of known hosts. + # Warning: Identity file * not accessible: * + # (VisualHostKey=yes) + # Host key fingerprint is SHA256:... + # +--[ED25519 256]--+ + # | o | + # +----[SHA256]-----+ + # + # Skip these (from csh): + # MANPATH: Undefined variable. + } elsif(not defined $nsockets{$col[0]}) { + $nsockets{$col[0]} = $col[1]; + } elsif(not defined $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not defined $nthreads{$col[0]}) { + $nthreads{$col[0]} = $col[1]; + } elsif(not defined $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not defined $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + @down_hosts = uniq(@down_hosts); + return(\%nsockets, \%ncores, \%nthreads, \%time_to_login, + \%maxlen, \%echo, \@down_hosts); +} + +sub parallelized_host_filtering() { + # Uses: + # %Global::host + # Returns: + # text entries with: + # * joblog line + # * hostname \t number of cores + # * hostname \t number of cpus + # * hostname \t max-line-length-allowed + # * hostname \t empty + + sub sshwrapped { + # Wrap with ssh and --env + # Return $default_value if command fails + my $sshlogin = shift; + my $command = shift; + # wrapper that returns output "0\n" if the command fails + # E.g. parallel not installed => "0\n" + my $wcmd = q(perl -e '$a=`).$command.q(`; print $? ? "0".v010 : $a'); + my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],[],[],{},{}); + my $job = Job->new($commandline); + $job->set_sshlogin($sshlogin); + $job->wrapped(); + return($job->{'wrapped'}); + } + + my(@sockets, @cores, @threads, @maxline, @echo); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + push(@sockets, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-sockets")."\n\0"); + push(@cores, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0"); + push(@threads, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-threads")."\n\0"); + push(@maxline, $host."\t"."true $host; ". + sshwrapped($sshlogin, + "parallel --max-line-length-allowed")."\n\0"); + # 'echo' is used to get the fastest possible ssh login time + push(@echo, $host."\t"."true $host; ". + $sshlogin->wrap("echo $host")."\n\0"); + } + # --timeout 10: Setting up an SSH connection and running a simple + # command should never take > 10 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon is overloaded, try 3 times + my $cmd = + "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ". + "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true "; + $cmd = $Global::shell." -c ".Q($cmd); + ::debug("init", $cmd, "\n"); + my @out; + my $prepend = ""; + + my ($host_fh,$in,$err); + open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd"); + ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo); + + if(not fork()) { + # Give the commands to run to the $cmd + close $host_fh; + print $in @sockets, @cores, @threads, @maxline, @echo; + close $in; + exit(); + } + close $in; + # If -0: $/ must be \n + local $/ = "\n"; + for(<$host_fh>) { + # TODO incompatible with '-quoting. Needs to be fixed differently + #if(/\'$/) { + # # if last char = ' then append next line + # # This may be due to quoting of \n in environment var + # $prepend .= $_; + # next; + #} + $_ = $prepend . $_; + $prepend = ""; + push @out, $_; + } + close $host_fh; + return @out; +} + +sub onall($@) { + # Runs @command on all hosts. + # Uses parallel to run @command on each host. + # --jobs = number of hosts to run on simultaneously. + # For each host a parallel command with the args will be running. + # Uses: + # $Global::debug + # $Global::exitstatus + # $Global::joblog + # $Global::quoting + # $opt::D + # $opt::arg_file_sep + # $opt::arg_sep + # $opt::colsep + # $opt::files + # $opt::files0 + # $opt::group + # $opt::joblog + # $opt::jobs + # $opt::keeporder + # $opt::linebuffer + # $opt::max_chars + # $opt::plain + # $opt::retries + # $opt::tag + # $opt::tee + # $opt::timeout + # $opt::ungroup + # %Global::host + # @opt::basefile + # @opt::env + # @opt::v + # Input: + # @command = command to run on all hosts + # Returns: N/A + sub tmp_joblog { + # Input: + # $joblog = filename of joblog - undef if none + # Returns: + # $tmpfile = temp file for joblog - undef if none + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my ($input_source_fh_ref,@command) = @_; + if($Global::quoting) { + @command = shell_quote(@command); + } + + # Copy all @input_source_fh (-a and :::) into tempfiles + my @argfiles = (); + for my $fh (@$input_source_fh_ref) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""), + ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""), + ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::group) ? "--group" : ""), + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + (($opt::ungroup == 1) ? "-u" : ""), + ((defined $opt::tee) ? "--tee" : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::files0) ? "--files0" : ""), + ((defined $opt::group) ? "--group" : ""), + ((defined $opt::cleanup) ? "--cleanup" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::plus) ? "--plus" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + (($opt::ungroup == 1) ? "-u" : ""), + ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""), + ((defined $opt::tee) ? "--tee" : ""), + ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""), + (@Global::transfer_files ? map { "--tf ".Q($_) } + @Global::transfer_files : ""), + (@Global::ret_files ? map { "--return ".Q($_) } + @Global::ret_files : ""), + (@opt::env ? map { "--env ".Q($_) } @opt::env : ""), + (map { "-v" } @opt::v), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $qsshlogin = Q($sshlogin->string()); + my $qsshloginpw = Q($sshlogin->pwstring()); + if($qsshloginpw ne $qsshlogin) { + ::warning_once("Using password or SSHPASS with --(n)onall ". + "exposes the password", + "on the command line, ". + "making it visible to local users via `ps`."); + } + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog ".::Q($joblog); + } + my $quad = $opt::arg_file_sep || "::::"; + # If PARALLEL_ENV is set: Pass it on + my $penv=$Global::parallel_env ? + "PARALLEL_ENV=".Q($Global::parallel_env) : ''; + my $results; + if(defined $opt::results) { + $results = Q($opt::results) . $qsshlogin; + } + ::debug("init", "$penv $0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? "--tagstring ".$qsshlogin : ""), + ((defined $opt::results) ? "--results ".$results : ""), + " -S $qsshloginpw ", + join(" ",shell_quote(@command,$quad,@argfiles)),"\n"); + print $parallel_fh "$penv $0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? "--tagstring ".$qsshlogin : ""), + ((defined $opt::results) ? "--results ".$results : ""), + " -S $qsshloginpw ", + join(" ",shell_quote(@command,$quad,@argfiles)),"\0"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("init", "--onall exitvalue ", $?); + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + $Global::debug or unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + my $fh = open_or_exit("<", $joblog); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + + +sub __SIGNAL_HANDLING__() {} + + +sub sigtstp() { + # Send TSTP signal (Ctrl-Z) to all children process groups + # Uses: + # %SIG + # Returns: N/A + signal_children("TSTP"); +} + +sub sigpipe() { + # Send SIGPIPE signal to all children process groups + # Uses: + # %SIG + # Returns: N/A + signal_children("PIPE"); +} + +sub signal_children() { + # Send signal to all children process groups + # and GNU Parallel itself + # Uses: + # %SIG + # Returns: N/A + my $signal = shift; + debug("run", "Sending $signal "); + kill $signal, map { -$_ } keys %Global::running; + # Use default signal handler for GNU Parallel itself + $SIG{$signal} = undef; + kill $signal, $$; +} + +sub save_original_signal_handler() { + # Remember the original signal handler + # Uses: + # %Global::original_sig + # Returns: N/A + $SIG{INT} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + $SIG{TERM} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + %Global::original_sig = %SIG; + $SIG{TERM} = sub {}; # Dummy until jobs really start + $SIG{ALRM} = 'IGNORE'; + # Allow Ctrl-Z to suspend and `fg` to continue + $SIG{TSTP} = \&sigtstp; + $SIG{PIPE} = \&sigpipe; + $SIG{CONT} = sub { + # Set $SIG{TSTP} again (it is undef'ed in sigtstp() ) + $SIG{TSTP} = \&sigtstp; + for my $job (values %Global::running) { + if($job->suspended()) { + # Force jobs to suspend, if they are marked as suspended. + # --memsupspend can suspend a job that will be resumed + # if the user presses CTRL-Z followed by `fg`. + $job->suspend(); + } else { + # Resume the rest of the jobs + $job->resume(); + } + } + }; +} + +sub list_running_jobs() { + # Print running jobs on tty + # Uses: + # %Global::running + # Returns: N/A + for my $job (values %Global::running) { + ::status("$Global::progname: ".$job->replaced()); + } +} + +sub start_no_new_jobs() { + # Start no more jobs + # Uses: + # %Global::original_sig + # %Global::unlink + # $Global::start_no_new_jobs + # Returns: N/A + unlink keys %Global::unlink; + ::status + ("$Global::progname: SIGHUP received. No new jobs will be started.", + "$Global::progname: Waiting for these ".(keys %Global::running). + " jobs to finish. Send SIGTERM to stop now."); + list_running_jobs(); + $Global::start_no_new_jobs ||= 1; +} + +sub reapers() { + # Run reaper until there are no more left + # Returns: + # @pids_reaped = pids of reaped processes + my @pids_reaped; + my $pid; + while($pid = reaper()) { + push @pids_reaped, $pid; + } + return @pids_reaped; +} + +sub reaper() { + # A job finished: + # * Set exitstatus, exitsignal, endtime. + # * Free ressources for new job + # * Update median runtime + # * Print output + # * If --halt = now: Kill children + # * Print progress + # Uses: + # %Global::running + # $opt::timeout + # $Global::timeoutq + # $opt::keeporder + # $Global::total_running + # Returns: + # $stiff = PID of child finished + my $stiff; + debug("run", "Reaper "); + if(($stiff = waitpid(-1, &WNOHANG)) <= 0) { + # No jobs waiting to be reaped + return 0; + } + + # $stiff = pid of dead process + my $job = $Global::running{$stiff}; + + # '-a <(seq 10)' will give us a pid not in %Global::running + # The same will one of the ssh -M: ignore + $job or return 0; + delete $Global::running{$stiff}; + $Global::total_running--; + if($job->{'commandline'}{'skip'}) { + # $job->skip() was called + $job->set_exitstatus(-2); + $job->set_exitsignal(0); + } else { + $job->set_exitsignal($? & 127); + if($job->exitstatus()) { + # Exit status already set - probably by --timeout + } elsif($? & 127) { + # Killed by signal. Many shells return: 128 | $signal + $job->set_exitstatus(128 | $?); + } else { + # Normal exit + $job->set_exitstatus($? >> 8); + } + } + + debug("run", "\nseq ",$job->seq()," died (", $job->exitstatus(), ")"); + if($Global::delayauto or $Global::sshdelayauto) { + if($job->exitstatus()) { + # Job failed: Increase delay (if $opt::(ssh)delay set) + $opt::delay &&= $opt::delay * 1.3; + $opt::sshdelay &&= $opt::sshdelay * 1.3; + } else { + # Job succeeded: Decrease delay (if $opt::(ssh)delay set) + $opt::delay &&= $opt::delay * 0.9; + $opt::sshdelay &&= $opt::sshdelay * 0.9; + } + debug("run", "delay:$opt::delay ssh:$opt::sshdelay "); + } + $job->set_endtime(::now()); + my $sshlogin = $job->sshlogin(); + $sshlogin->dec_jobs_running(); + if($job->should_be_retried()) { + # Free up file handles + $job->free_ressources(); + } else { + # The job is done + $sshlogin->inc_jobs_completed(); + # Free the jobslot + $job->free_slot(); + if($opt::timeout and not $job->exitstatus()) { + # Update average runtime for timeout only for successful jobs + $Global::timeoutq->update_median_runtime($job->runtime()); + } + if($opt::keeporder and not $opt::latestline) { + # --latestline fixes --keeporder in Job::row() + $job->print_earlier_jobs(); + } else { + $job->print(); + } + if($job->should_we_halt() eq "now") { + # Kill children + ::kill_sleep_seq($job->pid()); + ::killall(); + ::wait_and_exit($Global::halt_exitstatus); + } + } + $job->cleanup(); + + if($opt::progress) { + my $progress = progress(); + ::status_no_nl("\r",$progress->{'status'}); + } + + debug("run", "jobdone \n"); + return $stiff; +} + + +sub __USAGE__() {} + + +sub killall() { + # Kill all jobs by killing their process groups + # Uses: + # $Global::start_no_new_jobs = we are stopping + # $Global::killall = Flag to not run reaper + $Global::start_no_new_jobs ||= 1; + # Do not reap killed children: Ignore them instead + $Global::killall ||= 1; + kill_sleep_seq(keys %Global::running); +} + +sub kill_sleep_seq(@) { + # Send jobs TERM,TERM,KILL to processgroups + # Input: + # @pids = list of pids that are also processgroups + # Convert pids to process groups ($processgroup = -$pid) + my @pgrps = map { -$_ } @_; + my @term_seq = split/,/,$opt::termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + # for each signal+waittime: kill process groups still not dead + while(@term_seq) { + @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps); + } +} + +sub kill_sleep() { + # Kill pids with a signal and wait a while for them to die + # Input: + # $signal = signal to send to @pids + # $sleep_max = number of ms to sleep at most before returning + # @pids = pids to kill (actually process groups) + # Uses: + # $Global::killall = set by killall() to avoid calling reaper + # Returns: + # @pids = pids still alive + my ($signal, $sleep_max, @pids) = @_; + ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n"); + kill $signal, @pids; + my $sleepsum = 0; + my $sleep = 0.001; + + while(@pids and $sleepsum < $sleep_max) { + if($Global::killall) { + # Killall => don't run reaper + while(waitpid(-1, &WNOHANG) > 0) { + $sleep = $sleep/2+0.001; + } + } elsif(reapers()) { + $sleep = $sleep/2+0.001; + } + $sleep *= 1.1; + ::usleep($sleep); + $sleepsum += $sleep; + # Keep only living children + @pids = grep { kill(0, $_) } @pids; + } + return @pids; +} + +sub wait_and_exit($) { + # If we do not wait, we sometimes get segfault + # Returns: N/A + my $error = shift; + unlink keys %Global::unlink; + if($error) { + # Kill all jobs without printing + killall(); + } + for (keys %Global::unkilled_children) { + # Kill any (non-jobs) children (e.g. reserved processes) + kill 9, $_; + waitpid($_,0); + delete $Global::unkilled_children{$_}; + } + if($Global::unkilled_sqlworker) { + waitpid($Global::unkilled_sqlworker,0); + } + # Avoid: Warning: unable to close filehandle properly: No space + # left on device during global destruction. + $SIG{__WARN__} = sub {}; + if($opt::_parset) { + # Make the shell script return $error + print "$Global::parset_endstring\nreturn $error"; + } + exit($error); +} + +sub die_usage() { + # Returns: N/A + usage(); + wait_and_exit(255); +} + +sub usage() { + # Returns: N/A + print join + ("\n", + "Usage:", + "", + "$Global::progname [options] [command [arguments]] < list_of_arguments", + "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", + "cat ... | $Global::progname --pipe [options] [command [arguments]]", + "", + "-j n Run n jobs in parallel", + "-k Keep same order", + "-X Multiple arguments with context replace", + "--colsep regexp Split input on regexp for positional replacements", + "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", + "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", + "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", + " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", + "", + "-S sshlogin Example: foo\@server.example.com", + "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", + "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", + "--onall Run the given command with argument on all sshlogins", + "--nonall Run the given command with no arguments on all sshlogins", + "", + "--pipe Split stdin (standard input) to multiple jobs.", + "--recend str Record end separator for --pipe.", + "--recstart str Record start separator for --pipe.", + "", + "GNU Parallel can do much more. See 'man $Global::progname' for details", + "", + "Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + " Tange, O. (2024, December 22). GNU Parallel 20241222 ('Bashar').", + " Zenodo. https://doi.org/10.5281/zenodo.14550073", + "", + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "",); +} + +sub citation_notice() { + # if --will-cite or --plain: do nothing + # if stderr redirected: do nothing + # if $PARALLEL_HOME/will-cite: do nothing + # else: print citation notice to stderr + if($opt::willcite + or + $opt::plain + or + not -t $Global::original_stderr + or + grep { -e "$_/will-cite" } @Global::config_dirs) { + # skip + } else { + ::status + ("Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + " Tange, O. (2024, December 22). GNU Parallel 20241222 ('Bashar').", + " Zenodo. https://doi.org/10.5281/zenodo.14550073", + "", + # Before changing these line, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing these lines + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "More about funding GNU Parallel and the citation notice:", + "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice", + "", + "To silence this citation notice: run 'parallel --citation' once.", + "" + ); + mkdir $Global::config_dir; + # Number of times the user has run GNU Parallel without showing + # willingness to cite + my $runs = 0; + if(open (my $fh, "<", $Global::config_dir. + "/runs-without-willing-to-cite")) { + $runs = <$fh>; + close $fh; + } + $runs++; + if(open (my $fh, ">", $Global::config_dir. + "/runs-without-willing-to-cite")) { + print $fh $runs; + close $fh; + if($runs >= 10) { + ::status("Come on: You have run parallel $runs times. ". + "Isn't it about time ", + "you run 'parallel --citation' once to silence ". + "the citation notice?", + ""); + } + } + } +} + +sub status(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh map { ($_, "\n") } @w; + flush $fh; +} + +sub status_no_nl(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh @w; + flush $fh; +} + +sub warning(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); +} + +{ + my %warnings; + sub warning_once(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + $warnings{@w}++ or + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); + } +} + +sub error(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status(map { ($prog.": Error: ". $_); } @w); +} + +sub die_bug($) { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug. ", + "Please follow\n", + "https://www.gnu.org/software/parallel/man.html#reporting-bugs\n", + "\n", + "Include this in the report:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + ::wait_and_exit(255); +} + +sub version() { + # Returns: N/A + print join + ("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2007-2024 Ole Tange, http://ole.tange.dk and Free Software", + "Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later ", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: https://www.gnu.org/software/${Global::progname}\n", + "When using programs that use GNU Parallel to process data for publication", + "please cite as described in 'parallel --citation'.\n", + ); +} + +sub citation() { + # Returns: N/A + my ($all_argv_ref,$argv_options_removed_ref) = @_; + my $all_argv = "@$all_argv_ref"; + my $no_opts = "@$argv_options_removed_ref"; + $all_argv=~s/--citation//; + if($all_argv ne $no_opts) { + ::warning("--citation ignores all other options and arguments."); + ::status(""); + } + + ::status( + "Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + "\@software{tange_2024_14550073,", + " author = {Tange, Ole},", + " title = {GNU Parallel 20241222 ('Bashar')},", + " month = Dec,", + " year = 2024,", + " note = {{GNU Parallel is a general parallelizer to run", + " multiple serial command line programs in parallel", + " without changing them.}},", + " publisher = {Zenodo},", + " doi = {10.5281/zenodo.14550073},", + " url = {https://doi.org/10.5281/zenodo.14550073}", + "}", + "", + "(Feel free to use \\nocite{tange_2024_14550073})", + "", + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "More about funding GNU Parallel and the citation notice:", + "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html", + "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice", + "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt", + "" + ); + while(not grep { -e "$_/will-cite" } @Global::config_dirs) { + print "\nType: 'will cite' and press enter.\n> "; + my $input = ; + if(not defined $input) { + exit(255); + } + if($input =~ /will cite/i) { + if(mkdir $Global::config_dir) { + # Recompute @Global::config_dirs so we can break out of the loop. + init_globals(); + } + if(open (my $fh, ">", $Global::config_dir."/will-cite")) { + close $fh; + ::status( + "", + "Thank you for your support: You are the reason why there is funding to", + "continue maintaining GNU Parallel. On behalf of future versions of", + "GNU Parallel, which would not exist without your support:", + "", + " THANK YOU SO MUCH", + "", + "It is really appreciated. The citation notice is now silenced.", + ""); + } else { + ::status( + "", + "Thank you for your support. It is much appreciated. The citation", + "cannot permanently be silenced. Use '--will-cite' instead.", + "", + "If you use '--will-cite' in scripts to be run by others you are making", + "it harder for others to see the citation notice. The development of", + "GNU Parallel is indirectly financed through citations, so if users", + "do not know they should cite then you are making it harder to finance", + "development. However, if you pay 10000 EUR, you should feel free to", + "use '--will-cite' in scripts.", + ""); + last; + } + } + } +} + +sub show_limits() { + # Returns: N/A + print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", + "Maximal usable size of command: ", + $Global::usable_command_line_length,"\n", + "\n", + "Execution will continue now, ", + "and it will try to read its input\n", + "and run commands; if this is not ", + "what you wanted to happen, please\n", + "press CTRL-D or CTRL-C\n"); +} + +sub embed() { + # Give an embeddable version of GNU Parallel + # Tested with: bash, zsh, ksh, ash, dash, sh + my $randomstring = "cut-here-".join"", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); + if(not -f $0 or not -r $0) { + ::error("--embed only works if parallel is a readable file"); + exit(255); + } + # Read the source from $0 + my $source = slurp_or_exit($0); + my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER}; + my $env_parallel_source; + my $shell = $Global::shell; + $shell =~ s:.*/::; + for(which("env_parallel.$shell")) { + -r $_ or next; + # Read the source of env_parallel.shellname + $env_parallel_source .= slurp_or_exit($_); + last; + } + print "#!$Global::shell + +# Copyright (C) 2007-2024 $user, Ole Tange, http://ole.tange.dk +# and Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +"; + + print q! +# Embedded GNU Parallel created with --embed +parallel() { + # Start GNU Parallel without leaving temporary files + # + # Not all shells support 'perl <(cat ...)' + # This is a complex way of doing: + # perl <(cat <<'cut-here' + # [...] + # ) "$@" + # and also avoiding: + # [1]+ Done cat + + # Make a temporary fifo that perl can read from + _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo); + do { + $f = "/tmp/parallel-".join"", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $f); + mkfifo($f,0600); + print $f;'` + # Put source code into temporary file + # so it is easy to copy to the fifo + _file_with_GNU_Parallel_source=`mktemp`; +!, + "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n", + $source, + $randomstring,"\n", + q! + # Copy the source code from the file to the fifo + # and remove the file and fifo ASAP + # 'sh -c' is needed to avoid + # [1]+ Done cat + sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &" + + # Read the source from the fifo + perl $_fifo_with_GNU_Parallel_source "$@" +} +!, + $env_parallel_source, + q! + +# This will call the functions above +parallel -k echo ::: Put your code here +env_parallel --session +env_parallel -k echo ::: Put your code here +parset p,y,c,h -k echo ::: Put your code here +echo $p $y $c $h +echo You can also activate GNU Parallel for interactive use by: +echo . "$0" +!; + ::status("Redirect the output to a file and add your changes at the end:", + " $0 --embed > new_script"); +} + +sub pack_combined_executable { + my ($before_ref,$with_argsep_ref,$argv_ref) = @_; + my @parallelopts; + my $skip_next; + # Remove '--combine-exec file' from options + for(@{$before_ref}[0..(arrayindex($before_ref,$with_argsep_ref))-1]) { + if (/^--combine-?exec(utable)?$/ || $skip_next) { + # Also skip the filename given to --combine-exec + $skip_next = !$skip_next; + next; + } + push @parallelopts, $_; + } + # From ::: and to end + my @argsep = @{$with_argsep_ref}[($#ARGV+1)..$#$with_argsep_ref]; + # The executable is now the first in @ARGV + my $execname = shift @ARGV; + # The rest of @ARGV are options for $execname + my @execopts = @ARGV; + debug("combine", + "Parallel opts: @parallelopts ", + "Executable: $execname ", + "Execopts: @execopts ", + "Argsep: @argsep\n"); + # Read the the executable + my $exec = slurp_or_exit(which($execname)); + # Read the source of GNU Parallel and the executable + my $parallel = slurp_or_exit($0); + # Remove possibly __END__ from GNU Parallel + $parallel =~ s/^__END__.*//s; + if(-t $Global::original_stderr) { + ::status( + "Please be aware that combining GNU Parallel and '$execname'", + "into a combined executable will make the whole executable", + "licensed under GPLv3 (section 5.c).", + "", + "If the license of '$execname' is incompatible with GPLv3,", + "you cannot legally convey copies of the combined executable", + "to others. You can, however, still run them yourself.", + "", + "The combined executable will not have a citation notice,", + "so it is your resposibilty to advice that academic tradition", + "requires the users to cite GNU Parallel.", + "" + ); + my $input; + do { + ::status_no_nl("\nType: 'I agree' and press enter.\n> "); + $input = ; + if(not defined $input) { + exit(255); + } + } until($input =~ /I agree/i); + } + write_or_exit($opt::combineexec, + $parallel, + "\n__END__\n", + (map { "$_\0\n" } @parallelopts), "\0\0\n", + $execname, "\0\0\n", + (map { "$_\0\n" } @execopts), "\0\0\n", + (map { "$_\0\n" } @argsep), "\0\0\n", + $exec); + # Set +x permission + chmod 0700, $opt::combineexec; + exit(0); +} + +sub unpack_combined_executable { + # If the script is a combined executable, + # it will have stuff in (I.e. after __END__) + my $combine_exec = join("",); + if(length $combine_exec) { + # Parse the + # + # __END__ + # Option for GNU Parallel\0\n + # Option for GNU Parallel\0\n + # \0\0\n + # Name of executable\0\0\n + # Option for executable\0\n + # Option for executable\0\n + # \0\0\n + # argsep + args if any\0\n + # argsep + args if any\0\n + # \0\0\n + # <> + # + # parallel --combine --pipe -j10% --recend '' myscript --myopt myval + # __END__ + # --pipe\0\n --pipe + # -j10%\0\n -j10% + # --recend\0\n --recend + # \0\n '' + # \0\0\n end-of-parallel-options + # myscript\0\0\n myscript + # --myopt\0\n --myopt + # myval\0\n myval + # \0\0\n end-of-myscript-options + # \0\0\n no argsep + # <> + # + # parallel --combine -j10% myscript ::: + # __END__ + # -j10%\0\n + # \0\0\n end-of-parallel-options + # myscript\0\0\n + # \0\0\n end-of-myscript-options + # :::\0\n + # \0\0\n + # <> + + my ($opts,$execname,$execopts,$argsep,$exec) = + split /\0\0\n/,$combine_exec,5; + # Make a tmpdir with a file called $execname + local %ENV; + $ENV{TMPDIR} ||= "/tmp"; + my $dir = File::Temp::tempdir($ENV{'TMPDIR'} . "/parXXXXX", CLEANUP => 1); + my $script = $dir."/".$execname; + write_or_exit($script,$exec); + # Set +x permission + chmod 0700, $script; + # Mark it for unlinking later + $Global::unlink{$script}++; + $Global::unlink{$dir}++; + # pass the options for GNU Parallel + my @opts = split /\0\n/, $opts; + my @execopts = split /\0\n/, $execopts; + if(length $argsep) { + # Only add argsep if set + unshift(@ARGV, split(/\0\n/,$argsep)); + } + unshift(@ARGV,@opts,$script,@execopts); + } +} + + +sub __GENERIC_COMMON_FUNCTION__() {} + + +sub mkdir_or_die($) { + # If dir is not executable: die + my $dir = shift; + # The eval is needed to catch exception from mkdir + eval { File::Path::mkpath($dir); }; + if(not -x $dir) { + ::error("Cannot change into non-executable dir $dir: $!"); + ::wait_and_exit(255); + } +} + +sub tmpfile(@) { + # Create tempfile as $TMPDIR/parXXXXX + # Returns: + # $filehandle = opened file handle + # $filename = file name created + my($filehandle,$filename) = + ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); + if(wantarray) { + return($filehandle,$filename); + } else { + # Separate unlink due to NFS dealing badly with File::Temp + unlink $filename; + return $filehandle; + } +} + +sub tmpname($) { + # Select a name that does not exist + # Do not create the file as it may be used for creating a socket (by tmux) + # Remember the name in $Global::unlink to avoid hitting the same name twice + my $name = shift; + my($tmpname); + if(not -w $ENV{'TMPDIR'}) { + my $qtmp = ::Q($ENV{'TMPDIR'}); + if(not -e $ENV{'TMPDIR'}) { + ::error("Tmpdir $qtmp does not exist.","Try: mkdir -p $qtmp"); + } else { + ::error("Tmpdir $qtmp is not writable.","Try: chmod +w $qtmp"); + } + ::wait_and_exit(255); + } + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $tmpname or $Global::unlink{$tmpname}++); + return $tmpname; +} + +sub tmpfifo() { + # Find an unused name and mkfifo on it + my $tmpfifo = tmpname("fif"); + mkfifo($tmpfifo,0600); + return $tmpfifo; +} + +sub rm(@) { + # Remove file and remove it from %Global::unlink + # Uses: + # %Global::unlink + delete @Global::unlink{@_}; + unlink @_; +} + +sub size_of_block_dev() { + # Like -s but for block devices + # Input: + # $blockdev = file name of block device + # Returns: + # $size = in bytes, undef if error + my $blockdev = shift; + my $fh = open_or_exit("<", $blockdev); + seek($fh,0,2) || ::die_bug("cannot seek $blockdev"); + my $size = tell($fh); + close $fh; + return $size; +} + +sub qqx(@) { + # Like qx but with clean environment (except for @keep) + # and STDERR ignored + # This is needed if the environment contains functions + # that /bin/sh does not understand + my %env; + # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID + # ssh with Kerberos needs KRB5CCNAME + # sshpass needs SSHPASS + # tmux needs LC_CTYPE + # lsh needs HOME LOGNAME + my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE + HOME LOGNAME SSHPASS); + @env{@keep} = @ENV{@keep}; + local %ENV; + %ENV = %env; + if($Global::debug) { + # && true is to force spawning a shell and not just exec'ing + return qx{ @_ && true }; + } else { + # CygWin does not respect 2>/dev/null + # so we do that by hand + # This trick does not work: + # https://stackoverflow.com/q/13833088/363028 + # local *STDERR; + # open(STDERR, ">", "/dev/null"); + open(local *CHILD_STDIN, '<', '/dev/null') or die $!; + open(local *CHILD_STDERR, '>', '/dev/null') or die $!; + my $out; + # eval is needed if open3 fails (e.g. command line too long) + eval { + my $pid = open3( + '<&CHILD_STDIN', + $out, + '>&CHILD_STDERR', + # && true is to force spawning a shell and not just exec'ing + "@_ && true"); + my @arr = <$out>; + close $out; + # Make sure $? is set + waitpid($pid, 0); + return wantarray ? @arr : join "",@arr; + } or do { + # If eval fails, force $?=false + `false`; + }; + } +} + +sub uniq(@) { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub min(@) { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + +sub max(@) { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub sum(@) { + # Returns: + # Sum of values of array + my @args = @_; + my $sum = 0; + for (@args) { + # Skip undefs + $_ and do { $sum += $_; } + } + return $sum; +} + +sub undef_as_zero($) { + my $a = shift; + return $a ? $a : 0; +} + +sub undef_as_empty($) { + my $a = shift; + return $a ? $a : ""; +} + +sub undef_if_empty($) { + if(defined($_[0]) and $_[0] eq "") { + return undef; + } + return $_[0]; +} + +sub multiply_binary_prefix(@) { + # Evalualte numbers with binary prefix + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 + # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # 13G = 13*1024*1024*1024 = 13958643712 + # Input: + # $s = string with prefixes + # Returns: + # $value = int with prefixes multiplied + my @v = @_; + for(@v) { + defined $_ or next; + s/ki/*1024/gi; + s/mi/*1024*1024/gi; + s/gi/*1024*1024*1024/gi; + s/ti/*1024*1024*1024*1024/gi; + s/pi/*1024*1024*1024*1024*1024/gi; + s/ei/*1024*1024*1024*1024*1024*1024/gi; + s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; + s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + + s/K/*1024/g; + s/M/*1024*1024/g; + s/G/*1024*1024*1024/g; + s/T/*1024*1024*1024*1024/g; + s/P/*1024*1024*1024*1024*1024/g; + s/E/*1024*1024*1024*1024*1024*1024/g; + s/Z/*1024*1024*1024*1024*1024*1024*1024/g; + s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; + s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; + + s/k/*1000/g; + s/m/*1000*1000/g; + s/g/*1000*1000*1000/g; + s/t/*1000*1000*1000*1000/g; + s/p/*1000*1000*1000*1000*1000/g; + s/e/*1000*1000*1000*1000*1000*1000/g; + s/z/*1000*1000*1000*1000*1000*1000*1000/g; + s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $_ = eval $_; + } + return wantarray ? @v : $v[0]; +} + +sub multiply_time_units($) { + # Evalualte numbers with time units + # s=1, m=60, h=3600, d=86400 + # Input: + # $s = string time units + # Returns: + # $value = int in seconds + my @v = @_; + for(@v) { + defined $_ or next; + if(/[dhms]/i) { + s/s/*1+/gi; + s/m/*60+/gi; + s/h/*3600+/gi; + s/d/*86400+/gi; + # 1m/3 => 1*60+/3 => 1*60/3 + s/\+(\D)/$1/gi; + } + $_ = eval $_."-0"; + } + return wantarray ? @v : $v[0]; +} + +sub seconds_to_time_units() { + # Convert seconds into ??d??h??m??s + # s=1, m=60, h=3600, d=86400 + # Input: + # $s = int in seconds + # Returns: + # $str = string time units + my $s = shift; + my $str; + my $d = int($s/86400); + $s -= $d * 86400; + my $h = int($s/3600); + $s -= $h * 3600; + my $m = int($s/60); + $s -= $m * 60; + if($d) { + $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s); + } elsif($h) { + $str = sprintf("%dh%02dm%02ds",$h,$m,$s); + } elsif($m) { + $str = sprintf("%dm%02ds",$m,$s); + } else { + $str = sprintf("%ds",$s); + } + return $str; +} + +{ + my ($disk_full_fh, $b8193, $error_printed); + sub exit_if_disk_full() { + # Checks if $TMPDIR is full by writing 8kb to a tmpfile + # If the disk is full: Exit immediately. + # Returns: + # N/A + if(not $disk_full_fh) { + $disk_full_fh = ::tmpfile(SUFFIX => ".df"); + $b8193 = "b"x8193; + } + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; + if(not $disk_full_fh + or + tell $disk_full_fh != 8193) { + # On raspbian the disk can be full except for 10 chars. + if(not $error_printed) { + ::error("Output is incomplete.", + "Cannot append to buffer file in $ENV{'TMPDIR'}.", + "Is the disk full?", + "Change \$TMPDIR with --tmpdir or use --compress."); + $error_printed = 1; + } + ::wait_and_exit(255); + } + truncate $disk_full_fh, 0; + seek($disk_full_fh, 0, 0) || die; + } +} + +sub spacefree($$) { + # Remove comments and spaces + # Inputs: + # $spaces = keep 1 space? + # $s = string to remove spaces from + # Returns: + # $s = with spaces removed + my $spaces = shift; + my $s = shift; + $s =~ s/#.*//mg; + if(1 == $spaces) { + $s =~ s/\s+/ /mg; + } elsif(2 == $spaces) { + # Keep newlines + $s =~ s/\n\n+/\n/sg; + $s =~ s/[ \t]+/ /mg; + } elsif(3 == $spaces) { + # Keep perl code required space + $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg; + $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg; + } else { + $s =~ s/\s//mg; + } + return $s; +} + +{ + my $hostname; + sub hostname() { + local $/ = "\n"; + if(not $hostname) { + $hostname = `hostname`; + chomp($hostname); + $hostname ||= "nohostname"; + } + return $hostname; + } +} + +sub which(@) { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push(@which, grep { not -d $_ and -x $_ } + map { $_."/".$prg } split(":",$ENV{'PATH'})); + if($prg =~ m:/:) { + # Test if program with full path exists + push(@which, grep { not -d $_ and -x $_ } $prg); + } + } + ::debug("which", "$which[0] in $ENV{'PATH'}\n"); + return wantarray ? @which : $which[0]; +} + +{ + my ($regexp,$shell,%fakename); + + sub parent_shell { + # Input: + # $pid = pid to see if (grand)*parent is a shell + # Returns: + # $shellpath = path to shell - undef if no shell found + my $pid = shift; + ::debug("init","Parent of $pid\n"); + if(not $regexp) { + # All shells known to mankind + # + # ash bash csh dash fdsh fish fizsh ion ksh ksh93 mksh pdksh + # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh + + my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ion ksh + ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh + static-sh tcsh yash zsh -sh -csh -bash), + '-sh (sh)' # sh on FreeBSD + ); + # Can be formatted as: + # [sh] -sh sh busybox sh -sh (sh) + # /bin/sh /sbin/sh /opt/csw/sh + # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh + $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; + $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|\S*busybox |\S*rosetta )'. + '(-?)('. $shell. '))( *$| [^(])'; + %fakename = ( + # sh disguises itself as -sh (sh) on FreeBSD + "-sh (sh)" => ["sh"], + # csh and tcsh disguise themselves as -sh/-csh + # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh + # but sh also disguises itself as -sh + # (TODO When does that happen?) + "-sh" => ["sh"], + "-csh" => ["tcsh", "csh"], + # ash disguises itself as -ash + "-ash" => ["ash", "dash", "sh"], + # dash disguises itself as -dash + "-dash" => ["dash", "ash", "sh"], + # bash disguises itself as -bash + "-bash" => ["bash", "sh"], + # ksh disguises itself as -ksh + "-ksh" => ["ksh", "sh"], + # zsh disguises itself as -zsh + "-zsh" => ["zsh", "sh"], + ); + } + if($^O eq "linux") { + # Optimized for GNU/Linux + my $testpid = $pid; + my $shellpath; + my $shellline; + while($testpid) { + if(open(my $fd, "<", "/proc/$testpid/cmdline")) { + local $/="\0"; + chomp($shellline = <$fd>); + if($shellline =~ /busybox$|rosetta$/) { + # Possibly: busybox \0 sh or .../rosetta \0 /bin/bash + # Skip busybox/rosetta + chomp($shellline = <$fd>); + } + if($shellline =~ /$regexp/o) { + my $shellname = $4 || $8; + my $dash = $3 || $7; + if($shellname eq "sh" and $dash) { + # -sh => csh or sh + if($shellpath = readlink "/proc/$testpid/exe") { + ::debug("init","procpath $shellpath\n"); + if($shellpath =~ m:/$shell$:o) { + ::debug("init", + "proc which ".$shellpath." => "); + return $shellpath; + } + } + } + ::debug("init", "which ".$shellname." => "); + $shellpath = (which($shellname, + @{$fakename{$shellname}}))[0]; + ::debug("init", "shell path $shellpath\n"); + return $shellpath; + } + } + # Get parent pid + if(open(my $fd, "<", "/proc/$testpid/stat")) { + my $line = <$fd>; + close $fd; + # Parent pid is field 4 + $testpid = (split /\s+/, $line)[3]; + } else { + # Something is wrong: fall back to old method + last; + } + } + } + # if -sh or -csh try readlink /proc/$$/exe + my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); + my $shellpath; + my $testpid = $pid; + while($testpid) { + if($name_of_ref->{$testpid} =~ /$regexp/o) { + my $shellname = $4 || $8; + my $dash = $3 || $7; + if($shellname eq "sh" and $dash) { + # -sh => csh or sh + if($shellpath = readlink "/proc/$testpid/exe") { + ::debug("init","procpath $shellpath\n"); + if($shellpath =~ m:/$shell$:o) { + ::debug("init", "proc which ".$shellpath." => "); + return $shellpath; + } + } + } + ::debug("init", "which ".$shellname." => "); + $shellpath = (which($shellname,@{$fakename{$shellname}}))[0]; + ::debug("init", "shell path $shellpath\n"); + $shellpath and last; + } + if($testpid == $parent_of_ref->{$testpid}) { + # In Solaris zones, the PPID of the zsched process is itself + last; + } + $testpid = $parent_of_ref->{$testpid}; + } + return $shellpath; + } +} + +{ + my %pid_parentpid_cmd; + + sub pid_table() { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef |). + q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # Minix uses cols 2,3 and can have newlines in the command + # so lines not having numbers in cols 2,3 must be ignored + my $minix = q( ps -ef |). + q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'android' => $sysv, + 'cygwin' => $sysv, + 'darwin' => $bsd, + 'dec_osf' => $sysv, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'minix' => $minix, + 'msys' => $sysv, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + 'syllable' => "echo ps not supported", + ); + } + $pid_parentpid_cmd{$^O} or + ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + # must match: 24436 21224 <> + # must match: 24436 21224 <> + # or: perl -e 'while($0=" "){}' + if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ + or + /^\s*(\S+)\s+(\S+)\s+()$/) { + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } else { + ::die_bug("pidtable format: $_"); + } + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub now() { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + +sub usleep($) { + # Sleep this many milliseconds. + # Input: + # $ms = milliseconds to sleep + my $ms = shift; + ::debug("timing",int($ms),"ms "); + select(undef, undef, undef, $ms/1000); +} + +sub make_regexp_ungreedy { + my $regexp = shift; + my $class_state = 0; + my $escape_state = 0; + my $found = 0; + my $ungreedy = ""; + my $c; + + for $c (split (//, $regexp)) { + if ($found) { + if($c ne "?") { $ungreedy .= "?"; } + $found = 0; + } + $ungreedy .= $c; + + if ($escape_state) { $escape_state = 0; next; } + if ($c eq "\\") { $escape_state = 1; next; } + if ($c eq '[') { $class_state = 1; next; } + if ($class_state) { + if($c eq ']') { $class_state = 0; } + next; + } + # Quantifiers: + * {...} + if ($c =~ /[*}+]/) { $found = 1; } + } + if($found) { $ungreedy .= '?'; } + return $ungreedy; +} + + +sub __KILLER_REAPER__() {} + +sub reap_usleep() { + # Reap dead children. + # If no dead children: Sleep specified amount with exponential backoff + # Input: + # $ms = milliseconds to sleep + # Returns: + # $ms/2+0.001 if children reaped + # $ms*1.1 if no children reaped + my $ms = shift; + if(reapers()) { + if(not $Global::total_completed % 100) { + if($opt::timeout) { + # Force cleaning the timeout queue for every 100 jobs + # Fixes potential memleak + $Global::timeoutq->process_timeouts(); + } + } + # Sleep exponentially shorter (1/2^n) if a job finished + return $ms/2+0.001; + } else { + if($opt::timeout) { + $Global::timeoutq->process_timeouts(); + } + if($opt::memfree) { + kill_youngster_if_not_enough_mem($opt::memfree*0.5); + } + if($opt::memsuspend) { + suspend_young_if_not_enough_mem($opt::memsuspend); + } + if($opt::limit) { + kill_youngest_if_over_limit(); + } + exit_if_disk_full(); + if($Global::linebuffer) { + my $something_printed = 0; + if($opt::keeporder and not $opt::latestline) { + for my $job (values %Global::running) { + $something_printed += $job->print_earlier_jobs(); + } + } else { + for my $job (values %Global::running) { + $something_printed += $job->print(); + } + } + if($something_printed) { $ms = $ms/2+0.001; } + } + if($ms > 0.002) { + # When a child dies, wake up from sleep (or select(,,,)) + $SIG{CHLD} = sub { kill "ALRM", $$ }; + if($opt::delay and not $Global::linebuffer) { + # The 0.004s is approximately the time it takes for one round + my $next_earliest_start = + $Global::newest_starttime + $opt::delay - 0.004; + my $remaining_ms = 1000 * ($next_earliest_start - ::now()); + # The next job can only start at $next_earliest_start + # so sleep until then (but sleep at least $ms) + usleep(::max($ms,$remaining_ms)); + } else { + usleep($ms); + } + # --compress needs $SIG{CHLD} unset + $SIG{CHLD} = 'DEFAULT'; + } + # Sleep exponentially longer (1.1^n) if a job did not finish, + # though at most 1000 ms. + return (($ms < 1000) ? ($ms * 1.1) : ($ms)); + } +} + +sub kill_youngest_if_over_limit() { + # Check each $sshlogin we are over limit + # If over limit: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}) { + if($sshlogin->limit() == 2) { + $job->kill(); + last; + } + } + } +} + +sub suspend_young_if_not_enough_mem() { + # Check each $sshlogin if there is enough mem. + # If less than $limit free mem: suspend some of the young children + # Else: Resume all jobs + # Uses: + # %Global::running + my $limit = shift; + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + my $free = $sshlogin->memfree(); + if($free < 2*$limit) { + # Suspend all jobs (resume some of them later) + map { $_->suspended() or $_->suspend(); } @{$jobs_of{$sshlogin}}; + my @jobs = (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}); + # how many should be running? + # limit*1 => 1; + # limit*1.5 => 2; + # limit*1.75 => 4; + # free < limit*(2-1/2^n); + # => + # 1/(2-free/limit) < 2^n; + my $run = int(1/(2-$free/$limit)); + $run = ::min($run,$#jobs); + # Resume the oldest running + for my $job ((sort { $a->seq() <=> $b->seq() } @jobs)[0..$run]) { + ::debug("mem","\nResume ",$run+1, " jobs. Seq ", + $job->seq(), " resumed ", + $sshlogin->memfree()," < ",2*$limit); + $job->resume(); + } + } else { + for my $job (@{$jobs_of{$sshlogin}}) { + if($job->suspended()) { + $job->resume(); + ::debug("mem","\nResume ",$#{$jobs_of{$sshlogin}}+1, + " jobs. Seq ", $job->seq(), " resumed ", + $sshlogin->memfree()," > ",2*$limit); + last; + } + } + } + } +} + +sub kill_youngster_if_not_enough_mem() { + # Check each $sshlogin if there is enough mem. + # If less than 50% enough free mem: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my $limit = shift; + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}) { + if($sshlogin->memfree() < $limit) { + ::debug("mem","\n",map { $_->seq()." " } + (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}})); + ::debug("mem","\n", $job->seq(), "killed ", + $sshlogin->memfree()," < ",$limit); + $job->kill(); + $job->set_killreason("mem"); + $sshlogin->memfree_recompute(); + } else { + last; + } + } + ::debug("mem","Free mem OK? ", + $sshlogin->memfree()," > ",$limit); + } +} + + +sub __DEBUGGING__() {} + + +sub debug(@) { + # Uses: + # $Global::debug + # %Global::fh + # Returns: N/A + $Global::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + if($Global::debug eq "all" or $Global::debug eq $_[0]) { + if($Global::fh{2}) { + # Original stderr was saved + my $stderr = $Global::fh{2}; + print $stderr @_[1..$#_]; + } else { + print STDERR @_[1..$#_]; + } + } +} + +sub my_memory_usage() { + # Returns: + # memory usage if found + # 0 otherwise + use strict; + use FileHandle; + + local $/ = "\n"; + my $pid = $$; + if(-e "/proc/$pid/stat") { + my $fh = FileHandle->new("; + chomp $data; + $fh->close; + + my @procinfo = split(/\s+/,$data); + + return undef_as_zero($procinfo[22]); + } else { + return 0; + } +} + +sub my_size() { + # Returns: + # $size = size of object if Devel::Size is installed + # -1 otherwise + my @size_this = (@_); + eval "use Devel::Size qw(size total_size)"; + if ($@) { + return -1; + } else { + return total_size(@_); + } +} + +sub my_dump(@) { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + ::status($err); + return $err; + } else { + return Dumper(@dump_this); + } + } else { + # Create a dummy Data::Dump:dump as Hans Schou sometimes has + # it undefined + eval "sub Data::Dump:dump {}"; + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + +sub my_croak(@) { + eval "use Carp; 1"; + $Carp::Verbose = 1; + croak(@_); +} + +sub my_carp() { + eval "use Carp; 1"; + $Carp::Verbose = 1; + carp(@_); +} + + +sub __OBJECT_ORIENTED_PARTS__() {} + + +package SSHLogin; + +sub new($$) { + my $class = shift; + my $s = shift; + my $origs = $s; + my %hostgroups; + my $ncpus; + my $sshcommand; + my $user; + my $password; + my $host; + my $port; + my $local; + my $string; + # SSHLogins can have these formats: + # @grp1+grp2/ => use only servers in @grp1+grp2 + # [@grp+grp/][ncpu/][ssh command ][[user][:password]@][host[:port]] + # + # Here most combinations are generated: + # grp=grp1+grp2 + # ncpu=4 + # ssh=/usr/bin/ssh + # user=user + # pass=pass + # host=host + # port=port + # parallel -k echo {1}{2}{3}{4}{5}{=1'$_ = ($arg[4]||$arg[5]) ? "\@" : ""' =}$host{6} ::: '' @$grp/ ::: '' $ncpu/ ::: '' $ssh' ' ::: '' $user ::: '' :$pass ::: '' :$port + # host + # host:port + # :pass@host + # :pass@host:port + # user@host + # user@host:port + # user:pass@host + # user:pass@host:port + # /usr/bin/ssh host + # /usr/bin/ssh host:port + # /usr/bin/ssh :pass@host + # /usr/bin/ssh :pass@host:port + # /usr/bin/ssh user@host + # /usr/bin/ssh user@host:port + # /usr/bin/ssh user:pass@host + # /usr/bin/ssh user:pass@host:port + # ncpu/host + # ncpu/host:port + # ncpu/:pass@host + # ncpu/:pass@host:port + # ncpu/user@host + # ncpu/user@host:port + # ncpu/user:pass@host + # ncpu/user:pass@host:port + # ncpu//usr/bin/ssh host + # ncpu//usr/bin/ssh host:port + # ncpu//usr/bin/ssh :pass@host + # ncpu//usr/bin/ssh :pass@host:port + # ncpu//usr/bin/ssh user@host + # ncpu//usr/bin/ssh user@host:port + # ncpu//usr/bin/ssh user:pass@host + # ncpu//usr/bin/ssh user:pass@host:port + # @grp1+grp2/host + # @grp1+grp2/host:port + # @grp1+grp2/:pass@host + # @grp1+grp2/:pass@host:port + # @grp1+grp2/user@host + # @grp1+grp2/user@host:port + # @grp1+grp2/user:pass@host + # @grp1+grp2/user:pass@host:port + # @grp1+grp2//usr/bin/ssh host + # @grp1+grp2//usr/bin/ssh host:port + # @grp1+grp2//usr/bin/ssh :pass@host + # @grp1+grp2//usr/bin/ssh :pass@host:port + # @grp1+grp2//usr/bin/ssh user@host + # @grp1+grp2//usr/bin/ssh user@host:port + # @grp1+grp2//usr/bin/ssh user:pass@host + # @grp1+grp2//usr/bin/ssh user:pass@host:port + # @grp1+grp2/ncpu/host + # @grp1+grp2/ncpu/host:port + # @grp1+grp2/ncpu/:pass@host + # @grp1+grp2/ncpu/:pass@host:port + # @grp1+grp2/ncpu/user@host + # @grp1+grp2/ncpu/user@host:port + # @grp1+grp2/ncpu/user:pass@host + # @grp1+grp2/ncpu/user:pass@host:port + # @grp1+grp2/ncpu//usr/bin/ssh host + # @grp1+grp2/ncpu//usr/bin/ssh host:port + # @grp1+grp2/ncpu//usr/bin/ssh :pass@host + # @grp1+grp2/ncpu//usr/bin/ssh :pass@host:port + # @grp1+grp2/ncpu//usr/bin/ssh user@host + # @grp1+grp2/ncpu//usr/bin/ssh user@host:port + # @grp1+grp2/ncpu//usr/bin/ssh user:pass@host + # @grp1+grp2/ncpu//usr/bin/ssh user:pass@host:port + + # [@grp+grp/][ncpu/][ssh command ][[user][:password]@][host[:port]] + if($s =~ s:^\@([^/]+)/::) { + # Look for SSHLogin hostgroups + %hostgroups = map { $_ => 1 } split(/\+|,/, $1); + } + + # [ncpu/][ssh command ][[user][:password]@][host[:port]] + if ($s =~ s:^(\d+)/::) { $ncpus = $1; } + + # Why disallow space in password? + # Example: + # C:/bin/ssh user:C:/bin/ssh@host + # Should this parse as: + # user 'C' with password '/bin/ssh user:C:/bin/ssh' + # or + # cmd 'C:/bin/ssh' user 'user' with password 'C:/bin/ssh' + # This is impossible to determine. + # With space forbidden in password it uniquely parses as the 2nd. + # [ssh command ][[user][:password]@][host[:port]] + if($s =~ s/^(.*) //) { $sshcommand = $1; } + + # [[user][:password]@][host[:port]] + # An SSHLogin is always in the hostgroup of its "user:pass@host:port" + $hostgroups{$s} = 1; + if($s =~ s/^(.*)@//) { + my $userpw = $1; + # user[:pass] + if($userpw =~ s/:(.*)//) { + $password = $1; + if($password eq "") { $password = $ENV{'SSHPASS'} } + if(not ::which("sshpass")) { + ::error("--sshlogin with password requires sshpass installed"); + ::wait_and_exit(255); + } + } + # This might be empty - that is OK + $user = $userpw; + } + # [host[:port]] + # host can have these formats: + # hostname (: as local is dealt with later) + # IPv4 i.p.n.o + # with port + # * :22 + # * :ssh + # IPv6: + # * \[b11010000011101] + # * \[o64072/14] + # * \[xd074/14] + # * \[208.116.0.0/14] + # with port + # * .22 + # * p22 + # * #22 + if(not $s =~ /:.*:/ + and + $s =~ s/^([-a-z0-9._]+)//i) { + # Not IPv6 (IPv6 has 2 or more ':') + $host = $1; + } elsif($s =~ s/^(\\[\[\]box0-9a-f.]+)//i) { + # RFC2673 allows for: + # \[b11010000011101] \[o64072/14] \[xd074/14] \[208.116.0.0/14] + $host = $1; + } elsif($s =~ s/^\[([0-9a-f:]+)\]//i + or + $s =~ s/^([0-9a-f:]+)//i) { + # RFC5952 + # [2001:db8::1]:80 + # 2001:db8::1.80 + # 2001:db8::1p80 + # 2001:db8::1#80 + # 2001:db8::1:80 - not supported + # 2001:db8::1 port 80 - not supported + $host = $1; + } + # [:port] + if($s =~ s/^:(\w+)//i) { + $port = $1; + } elsif($s =~ s/^[p\.\#](\w+)//i) { + # RFC5952 + # 2001:db8::1.80 + # 2001:db8::1p80 + # 2001:db8::1#80 + $port = $1; + } + + if($s and $s ne ':') { + ::die_bug("SSHLogin parser failed on '$origs' => '$s'"); + } + + $string = + # Only include the sshcommand in $string if it is set by user + ($sshcommand && $sshcommand." "). + ($user && $user."@"). + ($host && $host). + ($port && ":$port"); + my $userpassword = ($user && $user).($password && ":".$password); + my $pwstring = + # Only include the sshcommand in $string if it is set by user + ($sshcommand && $sshcommand." "). + ($userpassword && $userpassword."@"). + ($host && $host). + ($port && ":$port"); + if($host eq ':') { + $local = 1; + $string = ":"; + } else { + $sshcommand ||= $opt::ssh || $ENV{'PARALLEL_SSH'} || "ssh"; + } + # An SSHLogin is always in the hostgroup of its $string-name + $hostgroups{$string} = 1; + @Global::hostgroups{keys %hostgroups} = values %hostgroups; + # Used for file names for loadavg + my $no_slash_string = $string; + $no_slash_string =~ s/[^-a-z0-9:]/_/gi; + return bless { + 'string' => $string, + 'pwstring' => $pwstring, + 'jobs_running' => 0, + 'jobs_completed' => 0, + 'maxlength' => undef, + 'max_jobs_running' => undef, + 'orig_max_jobs_running' => undef, + 'ncpus' => $ncpus, + 'sshcommand' => $sshcommand, + 'user' => $user, + 'password' => $password, + 'host' => $host, + 'port' => $port, + 'hostgroups' => \%hostgroups, + 'local' => $local, + 'control_path_dir' => undef, + 'control_path' => undef, + 'time_to_login' => undef, + 'last_login_at' => undef, + 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" . + $no_slash_string . "/loadavg", + 'loadavg' => undef, + 'last_loadavg_update' => 0, + 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" . + $no_slash_string . "/swap_activity", + 'swap_activity' => undef, + }, ref($class) || $class; +} + +sub DESTROY($) { + my $self = shift; + # Remove temporary files if they are created. + ::rm($self->{'loadavg_file'}); + ::rm($self->{'swap_activity_file'}); +} + +sub string($) { + my $self = shift; + return $self->{'string'}; +} + +sub pwstring($) { + my $self = shift; + return $self->{'pwstring'}; +} + +sub host($) { + my $self = shift; + return $self->{'host'}; +} + +sub sshcmd($) { + # Give the ssh command without hostname + # Returns: + # "sshpass -e ssh -p port -l user" + my $self = shift; + my @local; + # [sshpass -e] ssh -p port -l user + if($self->{'password'}) { push @local, "sshpass -e"; } + # [ssh] -p port -l user + # TODO sshpass + space + push @local, $self->{'sshcommand'}; + # [-p port] -l user + if($self->{'port'}) { push @local, '-p',$self->{'port'}; } + # [-l user] + if($self->{'user'}) { push @local, '-l',$self->{'user'}; } + if($opt::controlmaster) { + # Use control_path to make ssh faster + my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; + + if(not $self->{'control_path'}{$control_path}++) { + # Master is not running for this control_path + # Start it + my $pid = fork(); + if($pid) { + $Global::sshmaster{$pid} ||= 1; + } else { + push @local, "-S", $control_path; + $SIG{'TERM'} = undef; + # Run a sleep that outputs data, so it will discover + # if the ssh connection closes. + my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}'); + # Ignore the 'foo' being printed + open(STDOUT,">","/dev/null"); + # STDERR >/dev/null to ignore + open(STDERR,">","/dev/null"); + open(STDIN,"<","/dev/null"); + exec(@local, "-MT", $self->{'host'}, "--", + "perl", "-e", $sleep); + } + } + push @local, "-S", ::Q($control_path); + } + return "@local"; +} + +sub wrap($@) { + # Input: + # @cmd = shell command to run on remote + # Returns: + # $sshwrapped = ssh remote @cmd + my $self = shift; + my @remote = @_; + return(join " ", + $self->sshcmd(), $self->{'host'}, "--", "exec", @remote); +} + +sub hexwrap($@) { + # Input: + # @cmd = perl expresion to eval + # Returns: + # $hexencoded = perl command that decodes hex and evals @cmd + my $self = shift; + my $cmd = join("",@_); + + # "#" is needed because Perl on MacOS X adds NULs + # when running pack q/H10000000/ + my $hex = unpack "H*", $cmd."#"; + # csh does not deal well with > 1000 chars in one word + # Insert space every 1000 char + $hex =~ s/\G.{1000}\K/ /sg; + # Explanation: + # Write this without special chars: eval pack 'H*', join '',@ARGV + # GNU_Parallel_worker = String so people can see this is from GNU Parallel + # eval+ = way to write 'eval ' without space (gives warning) + # pack+ = way to write 'pack ' without space + # q/H10000000/, = almost the same as "H*" but does not use * + # join+q//, = join '', + return('perl -X -e '. + 'GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV '. + $hex); +} + +sub jobs_running($) { + my $self = shift; + return ($self->{'jobs_running'} || "0"); +} + +sub inc_jobs_running($) { + my $self = shift; + $self->{'jobs_running'}++; +} + +sub dec_jobs_running($) { + my $self = shift; + $self->{'jobs_running'}--; +} + +sub set_maxlength($$) { + my $self = shift; + $self->{'maxlength'} = shift; +} + +sub maxlength($) { + my $self = shift; + return $self->{'maxlength'}; +} + +sub jobs_completed() { + my $self = shift; + return $self->{'jobs_completed'}; +} + +sub in_hostgroups() { + # Input: + # @hostgroups = the hostgroups to look for + # Returns: + # true if intersection of @hostgroups and the hostgroups of this + # SSHLogin is non-empty + my $self = shift; + return grep { defined $self->{'hostgroups'}{$_} } @_; +} + +sub hostgroups() { + my $self = shift; + return keys %{$self->{'hostgroups'}}; +} + +sub inc_jobs_completed($) { + my $self = shift; + $self->{'jobs_completed'}++; + $Global::total_completed++; +} + +sub set_max_jobs_running($$) { + my $self = shift; + if(defined $self->{'max_jobs_running'}) { + $Global::max_jobs_running -= $self->{'max_jobs_running'}; + } + $self->{'max_jobs_running'} = shift; + + if(defined $self->{'max_jobs_running'}) { + # max_jobs_running could be resat if -j is a changed file + $Global::max_jobs_running += $self->{'max_jobs_running'}; + } + # Initialize orig to the first non-zero value that comes around + $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; +} + +sub memfree() { + # Returns: + # $memfree in bytes + my $self = shift; + $self->memfree_recompute(); + # Return 1 if not defined. + return (not defined $self->{'memfree'} or $self->{'memfree'}) +} + +sub memfree_recompute() { + my $self = shift; + my $script = memfreescript(); + + # TODO add sshlogin and backgrounding + # Run the script twice if it gives 0 (typically intermittent error) + $self->{'memfree'} = ::qqx($script) || ::qqx($script); + if(not $self->{'memfree'}) { + ::die_bug("Less than 1 byte memory free"); + } + #::debug("mem","New free:",$self->{'memfree'}," "); +} + +{ + my $script; + + sub memfreescript() { + # Returns: + # shellscript for giving available memory in bytes + if(not $script) { + my %script_of = ( + # /proc/meminfo + # MemFree: 7012 kB + # Buffers: 19876 kB + # Cached: 431192 kB + # SwapCached: 0 kB + "linux" => ( + q{ + print 1024 * qx{ + awk '/^((Swap)?Cached|MemFree|Buffers):/ + { sum += \$2} END { print sum }' + /proc/meminfo } + }), + # Android uses same code as GNU/Linux + "android" => ( + q{ + print 1024 * qx{ + awk '/^((Swap)?Cached|MemFree|Buffers):/ + { sum += \$2} END { print sum }' + /proc/meminfo } + }), + # $ vmstat 1 1 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99 + "hpux" => ( + q{ + print (((reverse `vmstat 1 1`)[0] + =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) + }), + # $ vmstat 1 2 + # kthr memory page disk faults cpu + # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97 + # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92 + # + # The second free value is correct + "solaris" => ( + q{ + print (((reverse `vmstat 1 2`)[0] + =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) + }), + # hw.pagesize: 4096 + # vm.stats.vm.v_cache_count: 0 + # vm.stats.vm.v_inactive_count: 79574 + # vm.stats.vm.v_free_count: 4507 + "freebsd" => ( + q{ + for(qx{/sbin/sysctl -a}) { + if (/^([^:]+):\s+(.+)\s*$/s) { + $sysctl->{$1} = $2; + } + } + print $sysctl->{"hw.pagesize"} * + ($sysctl->{"vm.stats.vm.v_cache_count"} + + $sysctl->{"vm.stats.vm.v_inactive_count"} + + $sysctl->{"vm.stats.vm.v_free_count"}); + }), + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # Pages free: 198061. + # Pages active: 159701. + # Pages inactive: 47378. + # Pages speculative: 29707. + # Pages wired down: 89231. + # "Translation faults": 928901425. + # Pages copy-on-write: 156988239. + # Pages zero filled: 271267894. + # Pages reactivated: 48895. + # Pageins: 1798068. + # Pageouts: 257. + # Object cache: 6603 hits of 1713223 lookups (0% hit rate) + 'darwin' => ( + q{ + $vm = `vm_stat`; + print (($vm =~ /page size of (\d+)/)[0] * + (($vm =~ /Pages free:\s+(\d+)/)[0] + + ($vm =~ /Pages inactive:\s+(\d+)/)[0])); + }), + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate command + for my $os (keys %script_of) { + $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}'; + } + $script = "perl -e " . ::Q(::spacefree(1,$perlscript)); + } + return $script; + } +} + +sub limit($) { + # Returns: + # 0 = Below limit. Start another job. + # 1 = Over limit. Start no jobs. + # 2 = Kill youngest job + my $self = shift; + + if(not defined $self->{'limitscript'}) { + my %limitscripts = + ("io" => q! + io() { + limit=$1; + io_file=$2; + # Do the measurement in the background + ((tmp=$(mktemp || tempfile); + LANG=C iostat -x 1 2 > $tmp; + mv $tmp $io_file) /dev/null & ); + perl -e '-e $ARGV[0] or exit(1); + for(reverse <>) { + /Device/ and last; + /(\S+)$/ and $max = $max > $1 ? $max : $1; } + exit ('$limit' < $max)' $io_file; + }; + io %s %s + !, + "mem" => q! + mem() { + limit=$1; + awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2} + END { + if (sum*1024 < '$limit'/2) { exit 2; } + else { exit (sum*1024 < '$limit') } + }' /proc/meminfo; + }; + mem %s; + !, + "load" => q! + load() { + limit=$1; + ps ax -o state,command | + grep -E '^[DOR].[^[]' | + wc -l | + perl -ne 'exit ('$limit' < $_)'; + }; + load %s + !, + ); + my ($cmd,@args) = split /\s+/,$opt::limit; + if($limitscripts{$cmd}) { + my $tmpfile = ::tmpname("parlmt"); + ++$Global::unlink{$tmpfile}; + $self->{'limitscript'} = + ::spacefree(1, sprintf($limitscripts{$cmd}, + ::multiply_binary_prefix(@args),$tmpfile)); + } else { + $self->{'limitscript'} = $opt::limit; + } + } + + my %env = %ENV; + local %ENV = %env; + $ENV{'SSHLOGIN'} = $self->string(); + system($Global::shell,"-c",$self->{'limitscript'}); + ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n"); + return $?>>8; +} + + +sub swapping($) { + my $self = shift; + my $swapping = $self->swap_activity(); + return (not defined $swapping or $swapping) +} + +sub swap_activity($) { + # If the currently known swap activity is too old: + # Recompute a new one in the background + # Returns: + # last swap activity computed + my $self = shift; + # Should we update the swap_activity file? + my $update_swap_activity_file = 0; + # Test with (on 64 core machine): + # seq 100 | parallel --lb -j100 'seq 1000 | parallel --noswap -j 1 true' + if(open(my $swap_fh, "<", $self->{'swap_activity_file'})) { + my $swap_out = <$swap_fh>; + close $swap_fh; + if($swap_out =~ /^(\d+)$/) { + $self->{'swap_activity'} = $1; + ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); + } + ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); + if(time - $self->{'last_swap_activity_update'} > 10) { + # last swap activity update was started 10 seconds ago + ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); + $update_swap_activity_file = 1; + } + } else { + ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); + $self->{'swap_activity'} = undef; + $update_swap_activity_file = 1; + } + if($update_swap_activity_file) { + ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); + $self->{'last_swap_activity_update'} = time; + my $dir = ::dirname($self->{'swap_activity_file'}); + -d $dir or eval { File::Path::mkpath($dir); }; + my $swap_activity; + $swap_activity = swapactivityscript(); + if(not $self->local()) { + $swap_activity = $self->wrap($swap_activity); + } + # Run swap_activity measuring. + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + my $file = $self->{'swap_activity_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); + ::debug("swap", "\n", $swap_activity, "\n"); + my $qtmp = ::Q($tmpfile); + my $qfile = ::Q($file); + ::qqx("($swap_activity > $qtmp && mv $qtmp $qfile || rm $qtmp &)"); + } + return $self->{'swap_activity'}; +} + +{ + my $script; + + sub swapactivityscript() { + # Returns: + # shellscript for detecting swap activity + # + # arguments for vmstat are OS dependant + # swap_in and swap_out are in different columns depending on OS + # + if(not $script) { + my %vmstat = ( + # linux: $7*$8 + # $ vmstat 1 2 + # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- + # r b swpd free buff cache si so bi bo in cs us sy id wa + # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 + # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 + 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # solaris: $6*$7 + # $ vmstat -S 1 2 + # kthr memory page disk faults cpu + # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 + # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 + 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], + + # darwin (macosx): $21*$22 + # $ vm_stat -c 2 1 + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts + # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 + # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 + 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], + + # ultrix: $12*$13 + # $ vmstat -S 1 2 + # procs faults cpu memory page disk + # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 + # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 + # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 + 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], + + # aix: $6*$7 + # $ vmstat 1 2 + # System configuration: lcpu=1 mem=2048MB + # + # kthr memory page faults cpu + # ----- ----------- ------------------------ ------------ ----------- + # r b avm fre re pi po fr sr cy in sy cs us sy id wa + # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 + # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 + 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], + + # freebsd: $8*$9 + # $ vmstat -H 1 2 + # procs memory page disks faults cpu + # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id + # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 + # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 + 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], + + # mirbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id + # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 + # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 + 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # netbsd: $7*$8 + # $ vmstat 1 2 + # procs memory page disks faults cpu + # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id + # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 + # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 + 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # openbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id + # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 + # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 + 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # hpux: $8*$9 + # $ vmstat 1 2 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 + # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 + 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # dec_osf (tru64): $11*$12 + # $ vmstat 1 2 + # Virtual Memory Statistics: (pagesize = 8192) + # procs memory pages intr cpu + # r w u act free wire fault cow zero react pin pout in sy cs us sy id + # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 + # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 + 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], + + # gnu (hurd): $7*$8 + # $ vmstat -k 1 2 + # (pagesize: 4, size: 512288, swap size: 894972) + # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree + # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 + # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 + 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], + + # -nto (qnx has no swap) + #-irix + #-svr5 (scosysv) + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate vmstat command + for my $os (keys %vmstat) { + $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ + $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . + $vmstat{$os}[1] . '}"` }'; + } + $script = "perl -e " . ::Q($perlscript); + } + return $script; + } +} + +sub too_fast_remote_login($) { + my $self = shift; + if($self->{'last_login_at'} and $self->{'time_to_login'}) { + # sshd normally allows 10 simultaneous logins + # A login takes time_to_login + # So time_to_login/5 should be safe + # If now <= last_login + time_to_login/5: Then it is too soon. + my $too_fast = (::now() <= $self->{'last_login_at'} + + $self->{'time_to_login'}/5); + ::debug("run", "Too fast? $too_fast "); + return $too_fast; + } else { + # No logins so far (or time_to_login not computed): it is not too fast + return 0; + } +} + +sub last_login_at($) { + my $self = shift; + return $self->{'last_login_at'}; +} + +sub set_last_login_at($$) { + my $self = shift; + $self->{'last_login_at'} = shift; +} + +sub loadavg_too_high($) { + my $self = shift; + my $loadavg = $self->loadavg(); + if(defined $loadavg) { + ::debug("load", "Load $loadavg > ",$self->max_loadavg()); + return $loadavg >= $self->max_loadavg(); + } else { + # Unknown load: Assume load is too high + return 1; + } +} + + + +sub loadavg($) { + # If the currently know loadavg is too old: + # Recompute a new one in the background + # The load average is computed as the number of processes waiting + # for disk or CPU right now. So it is the server load this instant + # and not averaged over several minutes. This is needed so GNU + # Parallel will at most start one job that will push the load over + # the limit. + # + # Returns: + # $last_loadavg = last load average computed (undef if none) + + my $self = shift; + sub loadavg_cmd() { + if(not $Global::loadavg_cmd) { + # aix => "ps -ae -o state,command" # state wrong + # bsd => "ps ax -o state,command" + # sysv => "ps -ef -o s -o comm" + # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \ + # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + # awk '{print $2,$1}' + # dec_osf => bsd + # dragonfly => bsd + # freebsd => bsd + # gnu => bsd + # hpux => ps -el|awk '{print $2,$14,$15}' + # irix => ps -ef -o state -o comm + # linux => bsd + # minix => ps el|awk '{print \$1,\$11}' + # mirbsd => bsd + # netbsd => bsd + # openbsd => bsd + # solaris => sysv + # svr5 => sysv + # ultrix => ps -ax | awk '{print $3,$5}' + # unixware => ps -el|awk '{print $2,$14,$15}' + my $ps = ::spacefree(1,q{ + $sysv="ps -ef -o s -o comm"; + $sysv2="ps -ef -o state -o comm"; + $bsd="ps ax -o state,command"; + # Treat threads as processes + $bsd2="ps axH -o state,command"; + $psel="ps -el|awk '{ print \$2,\$14,\$15 }'"; + $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n"; + /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + awk '{print $2,$1}' }; + $dummy="echo S COMMAND;echo R dummy"; + %ps=( + # TODO Find better code for AIX/Android + 'aix' => "uptime", + 'android' => "uptime", + 'cygwin' => $cygwin, + 'darwin' => $bsd, + 'dec_osf' => $sysv2, + 'dragonfly' => $bsd, + 'freebsd' => $bsd2, + 'gnu' => $bsd, + 'hpux' => $psel, + 'irix' => $sysv2, + 'linux' => $bsd2, + 'minix' => "ps el|awk '{print \$1,\$11}'", + 'mirbsd' => $bsd, + 'msys' => $cygwin, + 'netbsd' => $bsd, + 'nto' => $dummy, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $psel, + 'ultrix' => "ps -ax | awk '{print \$3,\$5}'", + 'MSWin32' => $sysv, + ); + print `$ps{$^O}`; + }); + # The command is too long for csh, so base64_wrap the command + $Global::loadavg_cmd = $self->hexwrap($ps); + } + return $Global::loadavg_cmd; + } + # Should we update the loadavg file? + my $update_loadavg_file = 0; + if(open(my $load_fh, "<", $self->{'loadavg_file'})) { + local $/; # $/ = undef => slurp whole file + my $load_out = <$load_fh>; + close $load_fh; + if($load_out =~ /\S/) { + # Content can be empty if ~/ is on NFS + # due to reading being non-atomic. + # + # Count lines starting with D,O,R but command does not start with [ + my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm); + if($load > 0) { + # load is overestimated by 1 + $self->{'loadavg'} = $load - 1; + ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n"); + } elsif ($load_out=~/average: (\d+.\d+)/) { + # AIX does not support instant load average + # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55 + $self->{'loadavg'} = $1; + } else { + ::die_bug("loadavg_invalid_content: " . + $self->{'loadavg_file'} . "\n$load_out"); + } + } + $update_loadavg_file = 1; + } else { + ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); + $self->{'loadavg'} = undef; + $update_loadavg_file = 1; + } + if($update_loadavg_file) { + ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); + $self->{'last_loadavg_update'} = time; + my $dir = ::dirname($self->{'swap_activity_file'}); + -d $dir or eval { File::Path::mkpath($dir); }; + -w $dir or ::die_bug("Cannot write to $dir"); + my $cmd = ""; + if($self->{'string'} ne ":") { + $cmd = $self->wrap(loadavg_cmd()); + } else { + $cmd .= loadavg_cmd(); + } + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + ::debug("load", "Update load\n"); + my $file = ::Q($self->{'loadavg_file'}); + # tmpfile on same filesystem as $file + my $tmpfile = $file.$$; + $ENV{'SSHPASS'} = $self->{'password'}; + ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )"); + } + return $self->{'loadavg'}; +} + +sub max_loadavg($) { + my $self = shift; + # If --load is a file it might be changed + if($Global::max_load_file) { + my $mtime = (stat($Global::max_load_file))[9]; + if($mtime > $Global::max_load_file_last_mod) { + $Global::max_load_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_loadavg(undef); + } + } + } + if(not defined $self->{'max_loadavg'}) { + $self->{'max_loadavg'} = + $self->compute_max_loadavg($opt::load); + } + ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); + return $self->{'max_loadavg'}; +} + +sub set_max_loadavg($$) { + my $self = shift; + $self->{'max_loadavg'} = shift; +} + +sub compute_max_loadavg($) { + # Parse the max loadaverage that the user asked for using --load + # Returns: + # max loadaverage + my $self = shift; + my $loadspec = shift; + my $load; + if(defined $loadspec) { + if($loadspec =~ /^\+(\d+)$/) { + # E.g. --load +2 + my $j = $1; + $load = + $self->ncpus() + $j; + } elsif ($loadspec =~ /^-(\d+)$/) { + # E.g. --load -2 + my $j = $1; + $load = + $self->ncpus() - $j; + } elsif ($loadspec =~ /^(\d+)\%$/) { + my $j = $1; + $load = + $self->ncpus() * $j / 100; + } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { + $load = $1; + } elsif (-f $loadspec) { + $Global::max_load_file = $loadspec; + $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; + $load = $self->compute_max_loadavg( + ::slurp_or_exit($Global::max_load_file) + ); + } else { + ::error("Parsing of --load failed."); + ::die_usage(); + } + if($load < 0.01) { + $load = 0.01; + } + } + return $load; +} + +sub time_to_login($) { + my $self = shift; + return $self->{'time_to_login'}; +} + +sub set_time_to_login($$) { + my $self = shift; + $self->{'time_to_login'} = shift; +} + +sub max_jobs_running($) { + my $self = shift; + if(not defined $self->{'max_jobs_running'}) { + my $nproc = $self->compute_number_of_processes($opt::jobs); + $self->set_max_jobs_running($nproc); + } + return $self->{'max_jobs_running'}; +} + +sub orig_max_jobs_running($) { + my $self = shift; + return $self->{'orig_max_jobs_running'}; +} + +sub compute_number_of_processes($) { + # Number of processes wanted and limited by system resources + # Returns: + # Number of processes + my $self = shift; + my $opt_P = shift; + my $wanted_processes = $self->user_requested_processes($opt_P); + if(not defined $wanted_processes) { + $wanted_processes = $Global::default_simultaneous_sshlogins; + } + ::debug("load", "Wanted procs: $wanted_processes\n"); + my $system_limit = + $self->processes_available_by_system_limit($wanted_processes); + ::debug("load", "Limited to procs: $system_limit\n"); + return $system_limit; +} + +{ + my @children; + my $max_system_proc_reached; + my $more_filehandles; + my %fh; + my $tmpfhname; + my $count_jobs_already_read; + my @jobs; + my $job; + my @args; + my $arg; + + sub reserve_filehandles($) { + # Reserves filehandle + my $n = shift; + for (1..$n) { + $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null"); + } + } + + sub reserve_process() { + # Spawn a dummy process + my $child; + if($child = fork()) { + push @children, $child; + $Global::unkilled_children{$child} = 1; + } elsif(defined $child) { + # This is the child + # The child takes one process slot + # It will be killed later + $SIG{'TERM'} = $Global::original_sig{'TERM'}; + if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") { + # The exec does not work on Cygwin and QNX + sleep 10101010; + } else { + # 'exec sleep' takes less RAM than sleeping in perl + exec 'sleep', 10101; + } + exit(0); + } else { + # Failed to spawn + $max_system_proc_reached = 1; + } + } + + sub get_args_or_jobs() { + # Get an arg or a job (depending on mode) + if($Global::semaphore or ($opt::pipe and not $opt::tee)) { + # Skip: No need to get args + return 1; + } elsif(defined $opt::retries and $count_jobs_already_read) { + # For retries we may need to run all jobs on this sshlogin + # so include the already read jobs for this sshlogin + $count_jobs_already_read--; + return 1; + } else { + if($opt::X or $opt::m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } else { + $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + return 1; + } + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + # Replacement must happen here due to seq() + $job and $job->replaced(); + push(@jobs, $job); + return 1; + } + } + } + } + + sub cleanup() { + # Cleanup: Close the files + for (values %fh) { close $_ } + # Cleanup: Kill the children + for my $pid (@children) { + kill 9, $pid; + waitpid($pid,0); + delete $Global::unkilled_children{$pid}; + } + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(@args); + @args = (); + $Global::JobQueue->unget(@jobs); + @jobs = (); + } + + sub processes_available_by_system_limit($) { + # If the wanted number of processes is bigger than the system limits: + # Limit them to the system limits + # Limits are: File handles, number of input lines, processes, + # and taking > 1 second to spawn 10 extra processes + # Returns: + # Number of processes + my $self = shift; + my $wanted_processes = shift; + my $system_limit = 0; + my $slow_spawning_warning_printed = 0; + my $time = time; + $more_filehandles = 1; + $tmpfhname = "TmpFhNamE"; + + # perl uses 7 filehandles for something? + # parallel uses 1 for memory_usage + # parallel uses 4 for ? + reserve_filehandles(12); + # Two processes for load avg and ? + reserve_process(); + reserve_process(); + + # For --retries count also jobs already run + $count_jobs_already_read = $Global::JobQueue->next_seq(); + my $wait_time_for_getting_args = 0; + my $start_time = time; + if($wanted_processes < $Global::infinity) { + $Global::dummy_jobs = 1; + } + while(1) { + $system_limit >= $wanted_processes and last; + not $more_filehandles and last; + $max_system_proc_reached and last; + + my $before_getting_arg = time; + if(!$Global::dummy_jobs) { + get_args_or_jobs() or last; + } + $wait_time_for_getting_args += time - $before_getting_arg; + $system_limit++; + + # Every simultaneous process uses 2 filehandles to write to + # and 2 filehandles to read from + reserve_filehandles(4); + + # System process limit + reserve_process(); + + my $forktime = time - $time - $wait_time_for_getting_args; + ::debug("run", "Time to fork $system_limit procs: ". + $wait_time_for_getting_args, " ", $forktime, + " (processes so far: ", $system_limit,")\n"); + if($system_limit > 10 and + $forktime > 1 and + $forktime > $system_limit * 0.01) { + # It took more than 0.01 second to fork a processes on avg. + # Give the user a warning. He can press Ctrl-C if this + # sucks. + ::warning_once( + "Starting $system_limit processes took > $forktime sec.", + "Consider adjusting -j. Press CTRL-C to stop."); + } + } + cleanup(); + + if($system_limit < $wanted_processes) { + # The system_limit is less than the wanted_processes + if($system_limit < 1 and not $Global::JobQueue->empty()) { + ::warning("Cannot spawn any jobs.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + ::wait_and_exit(255); + } + if(not $more_filehandles) { + ::warning("Only enough file handles to run ". + $system_limit. " jobs in parallel.", + "Try running 'parallel -j0 -N $system_limit --pipe parallel -j0'", + "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)", + "or increasing 'nofile' in /etc/security/limits.conf", + "or increasing /proc/sys/fs/file-max"); + } + if($max_system_proc_reached) { + ::warning("Only enough available processes to run ". + $system_limit. " jobs in parallel.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + } + } + if($] == 5.008008 and $system_limit > 1000) { + # https://savannah.gnu.org/bugs/?36942 + $system_limit = 1000; + } + if($Global::JobQueue->empty()) { + $system_limit ||= 1; + } + if($self->string() ne ":" and + $system_limit > $Global::default_simultaneous_sshlogins) { + $system_limit = + $self->simultaneous_sshlogin_limit($system_limit); + } + return $system_limit; + } +} + +sub simultaneous_sshlogin_limit($) { + # Test by logging in wanted number of times simultaneously + # Returns: + # min($wanted_processes,$working_simultaneous_ssh_logins-1) + my $self = shift; + my $wanted_processes = shift; + if($self->{'time_to_login'}) { + return $wanted_processes; + } + + # Try twice because it guesses wrong sometimes + # Choose the minimal + my $ssh_limit = + ::min($self->simultaneous_sshlogin($wanted_processes), + $self->simultaneous_sshlogin($wanted_processes)); + if($ssh_limit < $wanted_processes) { + my $serverlogin = $self->string(); + ::warning("ssh to $serverlogin only allows ". + "for $ssh_limit simultaneous logins.", + "You may raise this by changing", + "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.", + "You can also try --sshdelay 0.1", + "Using only ".($ssh_limit-1)." connections ". + "to avoid race conditions."); + # Race condition can cause problem if using all sshs. + if($ssh_limit > 1) { $ssh_limit -= 1; } + } + return $ssh_limit; +} + +sub simultaneous_sshlogin($) { + # Using $sshlogin try to see if we can do $wanted_processes + # simultaneous logins + # (ssh host echo simul-login & ssh host echo simul-login & ...) | + # grep simul|wc -l + # Input: + # $wanted_processes = Try for this many logins in parallel + # Returns: + # $ssh_limit = Number of succesful parallel logins + local $/ = "\n"; + my $self = shift; + my $wanted_processes = shift; + my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; + # TODO sh -c wrapper to work for csh + my $cmd = ($sshdelay.$self->wrap("echo simultaneouslogin"). + "&1 &")x$wanted_processes; + ::debug("init","Trying $wanted_processes logins at ".$self->string()."\n"); + open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or + ::die_bug("simultaneouslogin"); + my $ssh_limit = <$simul_fh>; + close $simul_fh; + chomp $ssh_limit; + return $ssh_limit; +} + +sub set_ncpus($$) { + my $self = shift; + $self->{'ncpus'} = shift; +} + +sub user_requested_processes($) { + # Parse the number of processes that the user asked for using -j + # Input: + # $opt_P = string formatted as for -P + # Returns: + # $processes = the number of processes to run on this sshlogin + my $self = shift; + my $opt_P = shift; + my $processes; + if(defined $opt_P) { + if (-f $opt_P and not $opt_P =~ /^[-+]?\d+%?$/) { + # This is a file. Ignore files called +10% 4 -3 + $Global::max_procs_file = $opt_P; + my $opt_P_file = ::slurp_or_exit($Global::max_procs_file); + if($opt_P_file !~ /\S/) { + ::warning_once("$Global::max_procs_file is empty. ". + "Treated as 100%"); + $opt_P_file = "100%"; + } + $processes = $self->user_requested_processes($opt_P_file); + } else { + if($opt_P eq "0") { + # -P 0 = infinity (or at least close) + $processes = $Global::infinity; + } else { + # -P +3 and -P -1 + $opt_P =~ s/^([-+])/\$self->ncpus()$1/; + # -P 40% + $opt_P =~ s:%$:*\$self->ncpus()/100:; + $processes = eval $opt_P; + if($processes <= 0) { + # Do not go below 1 + $processes = 1; + } + } + } + $processes = ::ceil($processes); + } + return $processes; +} + +sub ncpus($) { + # Number of CPU threads + # --use_sockets_instead_of_threads = count socket instead + # --use_cores_instead_of_threads = count physical cores instead + # Returns: + # $ncpus = number of cpu (threads) on this sshlogin + local $/ = "\n"; + my $self = shift; + if(not defined $self->{'ncpus'}) { + if($self->local()) { + if($opt::use_sockets_instead_of_threads) { + $self->{'ncpus'} = socket_core_thread()->{'sockets'}; + } elsif($opt::use_cores_instead_of_threads) { + $self->{'ncpus'} = socket_core_thread()->{'cores'}; + } else { + $self->{'ncpus'} = socket_core_thread()->{'threads'}; + } + } else { + my $ncpu; + $ENV{'SSHPASS'} = $self->{'password'}; + ::debug("init",("echo | ".$self->wrap("parallel --number-of-sockets"))); + if($opt::use_sockets_instead_of_threads + or + $opt::use_cpus_instead_of_cores) { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-sockets")); + } elsif($opt::use_cores_instead_of_threads) { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-cores")); + } else { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-threads")); + } + chomp $ncpu; + if($ncpu =~ /^\s*[0-9]+\s*$/s) { + $self->{'ncpus'} = $ncpu; + } else { + ::warning("Could not figure out ". + "number of cpus on ".$self->string." ($ncpu). Using 1."); + $self->{'ncpus'} = 1; + } + } + } + return $self->{'ncpus'}; +} + + +sub nproc() { + # Returns: + # Number of threads using `nproc` + my $no_of_threads = ::qqx("nproc"); + chomp $no_of_threads; + return $no_of_threads; +} + +sub no_of_sockets() { + return socket_core_thread()->{'sockets'}; +} + +sub no_of_cores() { + return socket_core_thread()->{'cores'}; +} + +sub no_of_threads() { + return socket_core_thread()->{'threads'}; +} + +sub socket_core_thread() { + # Returns: + # { + # 'sockets' => #sockets = number of socket with CPU present + # 'cores' => #cores = number of physical cores + # 'threads' => #threads = number of compute cores (hyperthreading) + # 'active' => #taskset_threads = number of taskset limited cores + # } + my $cpu; + if ($^O eq 'linux') { + $cpu = sct_gnu_linux($cpu); + } elsif ($^O eq 'android') { + $cpu = sct_android($cpu); + } elsif ($^O eq 'freebsd') { + $cpu = sct_freebsd($cpu); + } elsif ($^O eq 'netbsd') { + $cpu = sct_netbsd($cpu); + } elsif ($^O eq 'openbsd') { + $cpu = sct_openbsd($cpu); + } elsif ($^O eq 'gnu') { + $cpu = sct_hurd($cpu); + } elsif ($^O eq 'darwin') { + $cpu = sct_darwin($cpu); + } elsif ($^O eq 'solaris') { + $cpu = sct_solaris($cpu); + } elsif ($^O eq 'aix') { + $cpu = sct_aix($cpu); + } elsif ($^O eq 'hpux') { + $cpu = sct_hpux($cpu); + } elsif ($^O eq 'nto') { + $cpu = sct_qnx($cpu); + } elsif ($^O eq 'svr5') { + $cpu = sct_openserver($cpu); + } elsif ($^O eq 'irix') { + $cpu = sct_irix($cpu); + } elsif ($^O eq 'dec_osf') { + $cpu = sct_tru64($cpu); + } else { + # Try all methods until we find something that works + $cpu = (sct_gnu_linux($cpu) + || sct_android($cpu) + || sct_freebsd($cpu) + || sct_netbsd($cpu) + || sct_openbsd($cpu) + || sct_hurd($cpu) + || sct_darwin($cpu) + || sct_solaris($cpu) + || sct_aix($cpu) + || sct_hpux($cpu) + || sct_qnx($cpu) + || sct_openserver($cpu) + || sct_irix($cpu) + || sct_tru64($cpu) + ); + } + if(not $cpu) { + # Fall back: Set all to nproc + my $nproc = nproc(); + if($nproc) { + $cpu->{'sockets'} = + $cpu->{'cores'} = + $cpu->{'threads'} = + $cpu->{'active'} = + $nproc; + } + } + if(not $cpu) { + ::warning("Cannot figure out number of cpus. Using 1."); + $cpu->{'sockets'} = + $cpu->{'cores'} = + $cpu->{'threads'} = + $cpu->{'active'} = + 1 + } + $cpu->{'sockets'} ||= 1; + $cpu->{'threads'} ||= $cpu->{'cores'}; + $cpu->{'active'} ||= $cpu->{'threads'}; + chomp($cpu->{'sockets'}, + $cpu->{'cores'}, + $cpu->{'threads'}, + $cpu->{'active'}); + # Choose minimum of active and actual + my $mincpu; + $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'}); + $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'}); + $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'}); + return $mincpu; +} + +sub sct_gnu_linux($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + my $cpu = shift; + + sub read_topology($) { + my $prefix = shift; + my %sibiling; + my %socket; + my $thread; + for($thread = 0; + -r "$prefix/cpu$thread/topology/physical_package_id"; + $thread++) { + $socket{::slurp_or_exit( + "$prefix/cpu$thread/topology/physical_package_id")}++; + } + for($thread = 0; + -r "$prefix/cpu$thread/topology/thread_siblings"; + $thread++) { + $sibiling{::slurp_or_exit( + "$prefix/cpu$thread/topology/thread_siblings")}++; + } + $cpu->{'sockets'} = keys %socket; + $cpu->{'cores'} = keys %sibiling; + $cpu->{'threads'} = $thread; + } + + sub read_cpuinfo(@) { + my @cpuinfo = @_; + $cpu->{'sockets'} = 0; + $cpu->{'cores'} = 0; + $cpu->{'threads'} = 0; + my %seen; + my %phy_seen; + my $physicalid; + for(@cpuinfo) { + # physical id : 0 + if(/^physical id.*[:](.*)/) { + $physicalid = $1; + if(not $phy_seen{$1}++) { + $cpu->{'sockets'}++; + } + } + # core id : 3 + if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) { + $cpu->{'cores'}++; + } + # processor : 2 + /^processor.*[:]\s*\d/i and $cpu->{'threads'}++; + } + $cpu->{'cores'} ||= $cpu->{'threads'}; + $cpu->{'cpus'} ||= $cpu->{'threads'}; + $cpu->{'sockets'} ||= 1; + } + + sub read_lscpu(@) { + my @lscpu = @_; + my $threads_per_core; + my $cores_per_socket; + for(@lscpu) { + # lscpu + /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1; + /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1; + /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1; + /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2; + # lscpu --all --extended + # 3 0 0 1 1:1:1:0 yes 2900.0000 800.0000 1995.6210 + # 1 0 0 1 1:1:1 yes 1300.0000 800.0000 800.0000 + /^\s+(\d+)\s+\d+\s+(\d+)\s+(\d+)\s+\d+:\d+:\S+\s/ and do { + # Really only the last line matters + $cpu->{'threads'} = $1+1; + $cpu->{'sockets'} = $2+1; + $cpu->{'cores'} = $3+1; + }; + } + if($cores_per_socket and $cpu->{'sockets'}) { + $cpu->{'cores'} = $cores_per_socket * $cpu->{'sockets'}; + } + if($threads_per_core and $cpu->{'cores'}) { + $cpu->{'threads'} = $threads_per_core * $cpu->{'cores'}; + } + if($threads_per_core and $cpu->{'threads'}) { + $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core; + } + $cpu->{'cpus'} ||= $cpu->{'threads'}; + } + + local $/ = "\n"; # If delimiter is set, then $/ will be wrong + my @cpuinfo; + my @lscpu; + if($ENV{'PARALLEL_CPUINFO'}) { + # Use CPUINFO from environment - used for testing only + read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'}); + } elsif($ENV{'PARALLEL_LSCPU'}) { + # Use LSCPU from environment - used for testing only + read_lscpu(split/\n/,$ENV{'PARALLEL_LSCPU'}); + } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") { + # Use CPUPREFIX from environment - used for testing only + read_topology($ENV{'PARALLEL_CPUPREFIX'}); + } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) { + # Skip /proc/cpuinfo - already set + } else { + # Not debugging: Look at this computer + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + open(my $in_fh, "-|", "lscpu --all --extended")) { + # Parse output from lscpu + read_lscpu(<$in_fh>); + close $in_fh; + } + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") { + read_topology("/sys/devices/system/cpu"); + } + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + open(my $in_fh, "<", "/proc/cpuinfo")) { + # Read /proc/cpuinfo + read_cpuinfo(<$in_fh>); + close $in_fh; + } + } + if(-e "/proc/self/status" + and not $ENV{'PARALLEL_CPUINFO'} + and not $ENV{'PARALLEL_LSCPU'}) { + # if 'taskset' is used to limit number of threads + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return $cpu; +} + +sub sct_android($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + # Use GNU/Linux + return sct_gnu_linux($_[0]); +} + +sub sct_freebsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }) + or + ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })); + $cpu->{'threads'} ||= + (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }) + or + ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })); + return $cpu; +} + +sub sct_netbsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu"); + return $cpu; +} + +sub sct_openbsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu'); + return $cpu; +} + +sub sct_hurd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("nproc"); + return $cpu; +} + +sub sct_darwin($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + (::qqx('sysctl -n hw.physicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' })); + $cpu->{'threads'} ||= + (::qqx('sysctl -n hw.logicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' })); + return $cpu; +} + +sub sct_solaris($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/bin/kstat") { + my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id"); + if($#chip_id >= 0) { + $cpu->{'sockets'} ||= $#chip_id +1; + } + my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq"); + if($#core_id >= 0) { + $cpu->{'cores'} ||= $#core_id +1; + } + } + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo -p"); + if($#psrinfo >= 0) { + $cpu->{'sockets'} ||= $psrinfo[0]; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); + if($#prtconf >= 0) { + $cpu->{'cores'} ||= $#prtconf +1; + } + } + } + return $cpu; +} + +sub sct_aix($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/sbin/lscfg") { + if(open(my $in_fh, "-|", + "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) { + $cpu->{'cores'} = <$in_fh>; + close $in_fh; + } + } + } + if(not $cpu->{'threads'}) { + if(-x "/usr/bin/vmstat") { + if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) { + while(<$in_fh>) { + /lcpu=([0-9]*) / and $cpu->{'threads'} = $1; + } + close $in_fh; + } + } + } + return $cpu; +} + +sub sct_hpux($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'}); + $cpu->{'threads'} ||= + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'}); + return $cpu; +} + +sub sct_qnx($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + # BUG: It is not known how to calculate this. + + return $cpu; +} + +sub sct_openserver($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo"); + if($#psrinfo >= 0) { + $cpu->{'cores'} = $#psrinfo +1; + } + } + } + $cpu->{'sockets'} ||= $cpu->{'cores'}; + return $cpu; +} + +sub sct_irix($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' }); + return $cpu; +} + +sub sct_tru64($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("sizer -pr"); + $cpu->{'sockets'} ||= $cpu->{'cores'}; + $cpu->{'threads'} ||= $cpu->{'cores'}; + + return $cpu; +} + +sub sshcommand($) { + # Returns: + # $sshcommand = the command (incl options) to run when using ssh + my $self = shift; + if (not defined $self->{'sshcommand'}) { + ::die_bug("sshcommand not set"); + } + return $self->{'sshcommand'}; +} + +sub local($) { + my $self = shift; + return $self->{'local'}; +} + +sub control_path_dir($) { + # Returns: + # $control_path_dir = dir of control path (for -M) + my $self = shift; + if(not defined $self->{'control_path_dir'}) { + $self->{'control_path_dir'} = + # Use $ENV{'TMPDIR'} as that is typically not + # NFS mounted. + # The file system must support UNIX domain sockets + File::Temp::tempdir($ENV{'TMPDIR'} + . "/ctrlpath-XXXX", + CLEANUP => 1); + } + return $self->{'control_path_dir'}; +} + +sub rsync_transfer_cmd($) { + # Command to run to transfer a file + # Input: + # $file = filename of file to transfer + # $workdir = destination dir + # Returns: + # $cmd = rsync command to run to transfer $file ("" if unreadable) + my $self = shift; + my $file = shift; + my $workdir = shift; + if(not -r $file) { + ::warning($file. " is not readable and will not be transferred."); + return "true"; + } + my $rsync_destdir; + my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./? + if($relpath) { + $rsync_destdir = ::shell_quote_file($workdir); + } else { + # rsync /foo/bar / + $rsync_destdir = "/"; + } + $file = ::shell_quote_file($file); + # Make dir if it does not exist + return($self->wrap("mkdir -p $rsync_destdir") . " && " . + $self->rsync()." $file ".$self->{'host'}.":$rsync_destdir"); +} + +{ + my $rsync_fix; + my $rsync_version; + + sub rsync($) { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + # rsync 3.2.4 introduces a quoting bug: Add --old-args for that + # Returns: + # $rsync = "rsync" or "rsync --protocol 30 --old-args" + sub rsync_version { + if(not $rsync_version) { + my @out = `rsync --version`; + if(not @out) { + if(::which("rsync")) { + ::die_bug("'rsync --version' gave no output."); + } else { + ::error("'rsync' is not in \$PATH."); + ::wait_and_exit(255); + } + } + for (@out) { + # rsync version 3.1.3 protocol version 31 + # rsync version v3.2.3 protocol version 31 + if(/version v?(\d+)\.(\d+)(\.(\d+))?/) { + # 3.2.27 => 03.0227 + $rsync_version = sprintf "%02d.%02d%02d",$1,$2,$4; + } + } + $rsync_version or + ::die_bug("Cannot figure out version of rsync: @out"); + } + } + + sub rsync_fixup { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + # Returns: + # $rsync = "rsync" or "rsync --protocol 30" + if(not $rsync_fix) { + rsync_version(); + if($rsync_version >= 3.01) { + # Version 3.1.0 or later: Downgrade to protocol 30 + $rsync_fix .= " --protocol 30"; + } + if($rsync_version >= 3.0204) { + # Version 3.2.4 .. 3.2.8: --old-args + $rsync_fix .= " --old-args"; + } + } + return $rsync_fix; + } + my $self = shift; + + return "rsync".rsync_fixup()." ".$ENV{'PARALLEL_RSYNC_OPTS'}. + " -e".::Q($self->sshcmd()); + } +} + +sub cleanup_cmd($$$) { + # Command to run to remove the remote file + # Input: + # $file = filename to remove + # $workdir = destination dir + # Returns: + # $cmd = ssh command to run to remove $file and empty parent dirs + my $self = shift; + my $file = shift; + my $workdir = shift; + my $f = $file; + if($f =~ m:/\./:) { + # foo/bar/./baz/quux => workdir/baz/quux + # /foo/bar/./baz/quux => workdir/baz/quux + $f =~ s:.*/\./:$workdir/:; + } elsif($f =~ m:^[^/]:) { + # foo/bar => workdir/foo/bar + $f = $workdir."/".$f; + } + my @subdirs = split m:/:, ::dirname($f); + my @rmdir; + my $dir = ""; + for(@subdirs) { + $dir .= $_."/"; + unshift @rmdir, ::shell_quote_file($dir); + } + my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : ""; + if(defined $opt::workdir and $opt::workdir eq "...") { + $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; + } + my $rmf = "sh -c ". + ::Q("rm -f ".::shell_quote_file($f)." 2>/dev/null;".$rmdir); + return $self->wrap(::Q($rmf)); +} + +package JobQueue; + +sub new($) { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my $commandlinequeue = CommandLineQueue->new + ($commandref, $read_from, $context_replace, $max_number_of_args, + $transfer_files, $return_files, $template_names, $template_contents); + my @unget = (); + return bless { + 'unget' => \@unget, + 'commandlinequeue' => $commandlinequeue, + 'this_job_no' => 0, + 'total_jobs' => undef, + }, ref($class) || $class; +} + +sub get($) { + my $self = shift; + + $self->{'this_job_no'}++; + if(@{$self->{'unget'}}) { + my $job = shift @{$self->{'unget'}}; + # {%} may have changed, so flush computed values + $job && $job->flush_cache(); + return $job; + } else { + my $commandline = $self->{'commandlinequeue'}->get(); + if(defined $commandline) { + return Job->new($commandline); + } else { + $self->{'this_job_no'}--; + return undef; + } + } +} + +sub unget($) { + my $self = shift; + unshift @{$self->{'unget'}}, @_; + $self->{'this_job_no'} -= @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'commandlinequeue'}->empty(); + ::debug("run", "JobQueue->empty $empty "); + return $empty; +} + +sub total_jobs($) { + my $self = shift; + if(not defined $self->{'total_jobs'}) { + if($opt::pipe and not $opt::tee) { + ::error("--pipe is incompatible with --eta/--bar/--shuf"); + ::wait_and_exit(255); + } + if($opt::totaljobs) { + $self->{'total_jobs'} = $opt::totaljobs; + } elsif($opt::sqlworker) { + $self->{'total_jobs'} = $Global::sql->total_jobs(); + } else { + my $record; + my @arg_records; + my $record_queue = $self->{'commandlinequeue'}{'arg_queue'}; + my $start = time; + while($record = $record_queue->get()) { + push @arg_records, $record; + if(time - $start > 10) { + ::warning("Reading ".scalar(@arg_records). + " arguments took longer than 10 seconds."); + $opt::eta && ::warning("Consider removing --eta."); + $opt::bar && ::warning("Consider removing --bar."); + $opt::shuf && ::warning("Consider removing --shuf."); + last; + } + } + while($record = $record_queue->get()) { + push @arg_records, $record; + } + if($opt::shuf and @arg_records) { + my $i = @arg_records; + while (--$i) { + my $j = int rand($i+1); + @arg_records[$i,$j] = @arg_records[$j,$i]; + } + } + $record_queue->unget(@arg_records); + # $#arg_records = number of args - 1 + # We have read one @arg_record for this job (so add 1 more) + my $num_args = $#arg_records + 2; + # This jobs is not started so -1 + my $started_jobs = $self->{'this_job_no'} - 1; + my $max_args = ::max($Global::max_number_of_args,1); + $self->{'total_jobs'} = ::ceil($num_args / $max_args) + + $started_jobs; + ::debug("init","Total jobs: ".$self->{'total_jobs'}. + " ($num_args/$max_args + $started_jobs)\n"); + } + } + return $self->{'total_jobs'}; +} + +sub flush_total_jobs($) { + # Unset total_jobs to force recomputing + my $self = shift; + ::debug("init","flush Total jobs: "); + $self->{'total_jobs'} = undef; +} + +sub next_seq($) { + my $self = shift; + + return $self->{'commandlinequeue'}->seq(); +} + +sub quote_args($) { + my $self = shift; + return $self->{'commandlinequeue'}->quote_args(); +} + + +package Job; + +sub new($) { + my $class = shift; + my $commandlineref = shift; + return bless { + 'commandline' => $commandlineref, # CommandLine object + 'workdir' => undef, # --workdir + # filehandle for stdin (used for --pipe) + # filename for writing stdout to (used for --files) + # remaining data not sent to stdin (used for --pipe) + # tmpfiles to cleanup when job is done + 'unlink' => [], + # amount of data sent via stdin (used for --pipe) + 'transfersize' => 0, # size of files using --transfer + 'returnsize' => 0, # size of files using --return + 'pid' => undef, + # hash of { SSHLogins => number of times the command failed there } + 'failed' => undef, + 'sshlogin' => undef, + # The commandline wrapped with rsync and ssh + 'sshlogin_wrap' => undef, + 'exitstatus' => undef, + 'exitsignal' => undef, + # Timestamp for timeout if any + 'timeout' => undef, + 'virgin' => 1, + # Output used for SQL and CSV-output + 'output' => { 1 => [], 2 => [] }, + 'halfline' => { 1 => [], 2 => [] }, + }, ref($class) || $class; +} + +sub flush_cache($) { + my $self = shift; + $self->{'commandline'}->flush_cache(); +} + +sub replaced($) { + my $self = shift; + $self->{'commandline'} or ::die_bug("commandline empty"); + return $self->{'commandline'}->replaced(); +} + +{ + my $next_available_row; + + sub row($) { + my $self = shift; + if(not defined $self->{'row'}) { + if($opt::keeporder) { + $self->{'row'} = $self->seq(); + } else { + $self->{'row'} = ++$next_available_row; + } + } + return $self->{'row'}; + } +} + +sub seq($) { + my $self = shift; + return $self->{'commandline'}->seq(); +} + +sub set_seq($$) { + my $self = shift; + return $self->{'commandline'}->set_seq(shift); +} + +sub slot($) { + my $self = shift; + return $self->{'commandline'}->slot(); +} + +sub free_slot($) { + my $self = shift; + push @Global::slots, $self->slot(); +} + +{ + my($cattail); + + sub cattail() { + # Returns: + # $cattail = perl program for: + # cattail "decomp-prg" wpid [file_stdin] [file_to_unlink] + # decomp-prg = decompress program + # wpid = pid of writer program + # file_stdin = file_to_decompress + # file_to_unlink = unlink this file + if(not $cattail) { + $cattail = q{ + # cat followed by tail (possibly with rm as soon at the file is opened) + # If $writerpid dead: finish after this round + use Fcntl; + $|=1; + + my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV; + if($read_file) { + open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); + } else { + *IN = *STDIN; + } + while(! -s $comfile) { + # Writer has not opened the buffer file, so we cannot remove it yet + $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + # The writer and we have both opened the file, so it is safe to unlink it + unlink $unlink_file; + unlink $comfile; + + my $first_round = 1; + my $flags; + fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= O_NONBLOCK; # Add non-blocking to the flags + fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle + + while(1) { + # clear EOF + seek(IN,0,1); + my $writer_running = kill 0, $writerpid; + $read = sysread(IN,$buf,131072); + if($read) { + if($first_round) { + # Only start the command if there any input to process + $first_round = 0; + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + } + + # Blocking print + while($buf) { + my $bytes_written = syswrite(OUT,$buf); + # syswrite may be interrupted by SIGHUP + substr($buf,0,$bytes_written) = ""; + } + # Something printed: Wait less next time + $sleep /= 2; + } else { + if(eof(IN) and not $writer_running) { + # Writer dead: There will never be sent more to the decompressor + close OUT; + exit; + } + # TODO This could probably be done more efficiently using select(2) + # Nothing read: Wait longer before next read + # Up to 100 milliseconds + $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + } + + sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + select(undef, undef, undef, $secs/1000); + } + }; + $cattail =~ s/#.*//mg; + $cattail =~ s/\s+/ /g; + } + return $cattail; + } +} + +sub openoutputfiles($) { + # Open files for STDOUT and STDERR + # Set file handles in $self->fh + my $self = shift; + my ($outfhw, $errfhw, $outname, $errname); + + if($opt::latestline) { + # Do not save to files: Use non-blocking pipe + my ($outfhr, $errfhr); + pipe($outfhr, $outfhw) || die; + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$outfhw); + $self->set_fh(1,'r',$outfhr); + $self->set_fh(2,'r',$outfhr); + # Make it possible to read non-blocking from the pipe + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + # Return immediately because we do not need setting filenames + return; + } elsif($Global::linebuffer and not + ($opt::keeporder or $Global::files or $opt::results or + $opt::compress or $opt::compress_program or + $opt::decompress_program)) { + # Do not save to files: Use non-blocking pipe + my ($outfhr, $errfhr); + pipe($outfhr, $outfhw) || die; + pipe($errfhr, $errfhw) || die; + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'r',$outfhr); + $self->set_fh(2,'r',$errfhr); + # Make it possible to read non-blocking from the pipe + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + # Return immediately because we do not need setting filenames + return; + } elsif($opt::results and not $Global::csvsep and not $Global::jsonout) { + # If --results, but not --results *.csv/*.tsv + my $out = $self->{'commandline'}->results_out(); + my $seqname; + if($out eq $opt::results or $out =~ m:/$:) { + # $opt::results = simple string or ending in / + # => $out is a dir/ + # prefix/name1/val1/name2/val2/seq + $seqname = $out."seq"; + # prefix/name1/val1/name2/val2/stdout + $outname = $out."stdout"; + # prefix/name1/val1/name2/val2/stderr + $errname = $out."stderr"; + } else { + # $opt::results = replacement string not ending in / + # => $out is a file + $outname = $out; + $errname = "$out.err"; + $seqname = "$out.seq"; + } + ::write_or_exit($seqname, $self->seq()); + $outfhw = ::open_or_exit("+>", $outname); + $errfhw = ::open_or_exit("+>", $errname); + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",""); + if($opt::sqlworker) { + # Save the filenames in SQL table + $Global::sql->update("SET Stdout = ?, Stderr = ? ". + "WHERE Seq = ". $self->seq(), + $outname, $errname); + } + } elsif(not $opt::ungroup) { + # To group we create temporary files for STDOUT and STDERR + # To avoid the cleanup unlink the files immediately (but keep them open) + if($Global::files) { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + # --files => only remove stderr + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",$errname); + } else { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + } else { + # --ungroup + open($outfhw,">&",$Global::fh{1}) || die; + open($errfhw,">&",$Global::fh{2}) || die; + # File name must be empty as it will otherwise be printed + $outname = ""; + $errname = ""; + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + # Set writing FD + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'name',$outname); + $self->set_fh(2,'name',$errname); + if($opt::compress) { + $self->filter_through_compress(); + } elsif(not $opt::ungroup) { + $self->grouped(); + } + if($Global::linebuffer) { + # Make it possible to read non-blocking from + # the buffer files + # Used for --linebuffer with -k, --files, --res, --compress* + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + } +} + +sub print_verbose_dryrun($) { + # If -v set: print command to stdout (possibly buffered) + # This must be done before starting the command + my $self = shift; + if($Global::verbose or $opt::dryrun) { + my $fh = $self->fh(1,"w"); + if($Global::verbose <= 1) { + print $fh $self->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print $fh $self->wrapped(),"\n"; + } + } + if($opt::sqlworker) { + $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(), + $self->replaced()); + } +} + +sub add_rm($) { + # Files to remove when job is done + my $self = shift; + push @{$self->{'unlink'}}, @_; +} + +sub get_rm($) { + # Files to remove when job is done + my $self = shift; + return @{$self->{'unlink'}}; +} + +sub cleanup($) { + # Remove files when job is done + my $self = shift; + unlink $self->get_rm(); + delete @Global::unlink{$self->get_rm()}; +} + +sub grouped($) { + my $self = shift; + # Set reading FD if using --group (--ungroup does not need) + for my $fdno (1,2) { + # Re-open the file for reading + # so fdw can be closed seperately + # and fdr can be seeked seperately (for --line-buffer) + my $fdr = ::open_or_exit("<", $self->fh($fdno,'name')); + $self->set_fh($fdno,'r',$fdr); + # Unlink if not debugging + $Global::debug or ::rm($self->fh($fdno,"unlink")); + } +} + +sub empty_input_wrapper($) { + # If no input: exit(0) + # If some input: Pass input as input to command on STDIN + # This avoids starting the command if there is no input. + # Input: + # $command = command to pipe data to + # Returns: + # $wrapped_command = the wrapped command + my $command = shift; + # The optimal block size differs + # It has been measured on: + # AMD 6376: 59000 + # /dev/null'; + my $script = + ::spacefree(0,q{ + if(sysread(STDIN, $buf, 1)) { + open($fh, "|-", @ARGV) || die; + syswrite($fh, $buf); + while($read = sysread(STDIN, $buf, 59000)) { + syswrite($fh, $buf); + } + close $fh; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + } + }); + ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n"); + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + return 'perl -e '.::Q($script)." ". + base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } else { + return 'perl -e '.::Q($script)." ". + $Global::shell." -c ".::Q($command); + } +} + +sub filter_through_compress($) { + my $self = shift; + # Send stdout to stdin for $opt::compress_program(1) + # Send stderr to stdin for $opt::compress_program(2) + # cattail get pid: $pid = $self->fh($fdno,'rpid'); + my $cattail = cattail(); + + for my $fdno (1,2) { + # Make a communication file. + my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac"); + close $fh; + # Compressor: (echo > $comfile; compress pipe) > output + # When the echo is written to $comfile, + # it is known that output file is opened, + # thus output file can then be removed by the decompressor. + # empty_input_wrapper is needed for plzip + my $qcom = ::Q($comfile); + my $wpid = open(my $fdw,"|-", "(echo > $qcom; ". + empty_input_wrapper($opt::compress_program).") >". + ::Q($self->fh($fdno,'name'))) || die $?; + $self->set_fh($fdno,'w',$fdw); + $self->set_fh($fdno,'wpid',$wpid); + # Decompressor: open output; -s $comfile > 0: rm $comfile output; + # decompress output > stdout + my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile, + $opt::decompress_program, $wpid, + $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) + || die $?; + $self->set_fh($fdno,'r',$fdr); + $self->set_fh($fdno,'rpid',$rpid); + } +} + +sub set_fh($$$$) { + # Set file handle + my ($self, $fd_no, $key, $fh) = @_; + $self->{'fd'}{$fd_no,$key} = $fh; +} + +sub fh($) { + # Get file handle + my ($self, $fd_no, $key) = @_; + return $self->{'fd'}{$fd_no,$key}; +} + +sub write_block($) { + my $self = shift; + my $stdin_fh = $self->fh(0,"w"); + if(fork()) { + # Close in parent + close $stdin_fh; + } else { + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + + for my $part ( + grep { defined $_ } + $self->{'header'},$self->{'block'}) { + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + while($written = syswrite($stdin_fh,$$part)) { + substr($$part,0,$written) = ""; + } + } + close $stdin_fh; + exit(0); + } +} + +sub write($) { + my $self = shift; + my $remaining_ref = shift; + my $stdin_fh = $self->fh(0,"w"); + + my $len = length $$remaining_ref; + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + while($written = syswrite($stdin_fh,$$remaining_ref)){ + substr($$remaining_ref,0,$written) = ""; + } +} + +sub set_block($$$$$$) { + # Copy stdin buffer from $block_ref up to $endpos + # Prepend with $header_ref if virgin (i.e. not --roundrobin) + # Remove $recstart and $recend if needed + # Input: + # $header_ref = ref to $header to prepend + # $buffer_ref = ref to $buffer containing the block + # $endpos = length of $block to pass on + # $recstart = --recstart regexp + # $recend = --recend regexp + # Returns: + # N/A + my $self = shift; + my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_; + $self->{'header'} = $header_ref; + if($opt::roundrobin or $opt::remove_rec_sep or defined $opt::retries) { + my $a = ""; + if(($opt::roundrobin or defined $opt::retries) and $self->virgin()) { + $a .= $$header_ref; + } + # Job is no longer virgin + $self->set_virgin(0); + # Make a full copy because $buffer will change + $a .= substr($$buffer_ref,0,$endpos); + $self->{'block'} = \$a; + if($opt::remove_rec_sep) { + remove_rec_sep($self->{'block'},$recstart,$recend); + } + $self->{'block_length'} = length ${$self->{'block'}}; + } else { + $self->set_virgin(0); + for(substr($$buffer_ref,0,$endpos)) { + $self->{'block'} = \$_; + } + $self->{'block_length'} = $endpos + length ${$self->{'header'}}; + } + $self->{'block_pos'} = 0; + $self->add_transfersize($self->{'block_length'}); +} + +sub block_ref($) { + my $self = shift; + return $self->{'block'}; +} + +sub block_length($) { + my $self = shift; + return $self->{'block_length'}; +} + +sub remove_rec_sep($) { + # Remove --recstart and --recend from $block + # Input: + # $block_ref = reference to $block to be modified + # $recstart = --recstart + # $recend = --recend + # Uses: + # $opt::regexp = Are --recstart/--recend regexp? + # Returns: + # N/A + my ($block_ref,$recstart,$recend) = @_; + # Remove record separator + if($opt::regexp) { + $$block_ref =~ s/$recend$recstart//gom; + $$block_ref =~ s/^$recstart//os; + $$block_ref =~ s/$recend$//os; + } else { + $$block_ref =~ s/\Q$recend$recstart\E//gom; + $$block_ref =~ s/^\Q$recstart\E//os; + $$block_ref =~ s/\Q$recend\E$//os; + } +} + +sub non_blocking_write($) { + my $self = shift; + my $something_written = 0; + + my $in = $self->fh(0,"w"); + my $rv = syswrite($in, + substr(${$self->{'block'}},$self->{'block_pos'})); + if (!defined($rv) && $! == ::EAGAIN()) { + # would block - but would have written + $something_written = 0; + # avoid triggering auto expanding block size + $Global::no_autoexpand_block ||= 1; + } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) { + # incomplete write + # Remove the written part + $self->{'block_pos'} += $rv; + $something_written = $rv; + } else { + # successfully wrote everything + # Empty block to free memory + my $a = ""; + $self->set_block(\$a,\$a,0,"",""); + $something_written = $rv; + } + ::debug("pipe", "Non-block: ", $something_written); + return $something_written; +} + + +sub virgin($) { + my $self = shift; + return $self->{'virgin'}; +} + +sub set_virgin($$) { + my $self = shift; + $self->{'virgin'} = shift; +} + +sub pid($) { + my $self = shift; + return $self->{'pid'}; +} + +sub set_pid($$) { + my $self = shift; + $self->{'pid'} = shift; +} + +sub starttime($) { + # Returns: + # UNIX-timestamp this job started + my $self = shift; + return sprintf("%.3f",$self->{'starttime'}); +} + +sub set_starttime($@) { + my $self = shift; + my $starttime = shift || ::now(); + $self->{'starttime'} = $starttime; + $opt::sqlworker and + $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(), + $starttime); +} + +sub runtime($) { + # Returns: + # Run time in seconds with 3 decimals + my $self = shift; + return sprintf("%.3f", + int(($self->endtime() - $self->starttime())*1000)/1000); +} + +sub endtime($) { + # Returns: + # UNIX-timestamp this job ended + # 0 if not ended yet + my $self = shift; + return ($self->{'endtime'} || 0); +} + +sub set_endtime($$) { + my $self = shift; + my $endtime = shift; + $self->{'endtime'} = $endtime; + $opt::sqlworker and + $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(), + $self->runtime()); +} + +sub is_timedout($) { + # Is the job timedout? + # Input: + # $delta_time = time that the job may run + # Returns: + # True or false + my $self = shift; + my $delta_time = shift; + return time > $self->{'starttime'} + $delta_time; +} + +sub kill($) { + my $self = shift; + $self->set_exitstatus(-1); + ::kill_sleep_seq($self->pid()); +} + +sub killreason($) { + my $self = shift; + return $self->{'killreason'}; +} + +sub set_killreason($) { + my $self = shift; + $self->{'killreason'} = shift; +} + +sub suspend($) { + my $self = shift; + my @pgrps = map { -$_ } $self->pid(); + kill "STOP", @pgrps; + $self->set_suspended(1); +} + +sub set_suspended($$) { + my $self = shift; + $self->{'suspended'} = shift; +} + +sub suspended($) { + my $self = shift; + return $self->{'suspended'}; +} + +sub resume($) { + my $self = shift; + my @pgrps = map { -$_ } $self->pid(); + kill "CONT", @pgrps; + $self->set_suspended(0); +} + +sub failed($) { + # return number of times failed for this $sshlogin + # Input: + # $sshlogin + # Returns: + # Number of times failed for $sshlogin + my $self = shift; + my $sshlogin = shift; + return $self->{'failed'}{$sshlogin}; +} + +sub failed_here($) { + # return number of times failed for the current $sshlogin + # Returns: + # Number of times failed for this sshlogin + my $self = shift; + return $self->{'failed'}{$self->sshlogin()}; +} + +sub add_failed($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + $self->{'failed'}{$sshlogin}++; +} + +sub add_failed_here($) { + # increase the number of times failed for the current $sshlogin + my $self = shift; + $self->{'failed'}{$self->sshlogin()}++; +} + +sub reset_failed($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + delete $self->{'failed'}{$sshlogin}; +} + +sub reset_failed_here($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + delete $self->{'failed'}{$self->sshlogin()}; +} + +sub min_failed($) { + # Returns: + # the number of sshlogins this command has failed on + # the minimal number of times this command has failed + my $self = shift; + my $min_failures = + ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); + my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; + return ($number_of_sshlogins_failed_on,$min_failures); +} + +sub total_failed($) { + # Returns: + # $total_failures = the number of times this command has failed + my $self = shift; + my $total_failures = 0; + for (values %{$self->{'failed'}}) { + $total_failures += $_; + } + return $total_failures; +} + +{ + my $script; + + sub postpone_exit_and_cleanup { + # Command to remove files and dirs (given as args) without + # affecting the exit value in $?/$status. + if(not $script) { + $script = "perl -e '". + ::spacefree(0,q{ + $bash=shift; + $csh=shift; + for(@ARGV){ + unlink; + rmdir; + } + if($bash=~s/(\d+)h/$1/) { + exit $bash; + } + exit $csh; + }). + # `echo \$?h` is needed to make fish not complain + "' ".'"`echo \\\\\\\\\$?h`" "$status" '; + } + return $script + } +} + +{ + my $script; + + sub fifo_wrap() { + # Script to create a fifo, run a command on the fifo + # while copying STDIN to the fifo, and finally + # remove the fifo and return the exit code of the command. + if(not $script) { + # {} == $PARALLEL_TMP for --fifo + # To make it csh compatible a wrapper needs to: + # * mkfifo + # * spawn $command & + # * cat > fifo + # * waitpid to get the exit code from $command + # * be less than 1000 chars long + + # The optimal block size differs + # It has been measured on: + # AMD 6376: 4095 + # ppar -a big --pipepart --block -1 --test $1 --fifo 'cat {} >/dev/null'; + $script = "perl -e '". + (::spacefree + (0, q{ + ($s,$c,$f) = @ARGV; + # mkfifo $PARALLEL_TMP + system "mkfifo", $f; + # spawn $shell -c $command & + $pid = fork || exec $s, "-c", $c; + open($o,">",$f) || die $!; + # cat > $PARALLEL_TMP + while(sysread(STDIN,$buf,4095)){ + syswrite $o, $buf; + } + close $o; + # waitpid to get the exit code from $command + waitpid $pid,0; + # Cleanup + unlink $f; + exit $?/256; + }))."'"; + } + return $script; + } +} + +sub wrapped($) { + # Wrap command with: + # * --shellquote + # * --nice + # * --cat + # * --fifo + # * --sshlogin + # * --pipepart (@Global::cat_prepends) + # * --tee (@Global::cat_prepends) + # * --pipe + # * --tmux + # The ordering of the wrapping is important: + # * --nice/--cat/--fifo should be done on the remote machine + # * --pipepart/--pipe should be done on the local machine inside --tmux + # Uses: + # @opt::shellquote + # $opt::nice + # $Global::shell + # $opt::cat + # $opt::fifo + # @Global::cat_prepends + # $opt::pipe + # $opt::tmux + # Returns: + # $self->{'wrapped'} = the command wrapped with the above + my $self = shift; + if(not defined $self->{'wrapped'}) { + my $command = $self->replaced(); + # Bug in Bash and Ksh when running multiline aliases + # This will force them to run correctly, but will fail in + # tcsh so we do not do it. + # $command .= "\n\n"; + if(@opt::shellquote) { + # Quote one time for each --shellquote + my $c = $command; + for(@opt::shellquote) { + $c = ::Q($c); + } + # Prepend "echo" (it is written in perl because + # quoting '-e' causes problem in some versions and + # csh's version does something wrong) + $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c); + } + if($Global::parallel_env) { + # If $PARALLEL_ENV set, put that in front of the command + # Used for env_parallel.* + if($Global::shell =~ /zsh/) { + # The extra 'eval' will make aliases work, too + $command = $Global::parallel_env."\n". + "eval ".::Q($command); + } else { + $command = $Global::parallel_env."\n".$command; + } + } + if($opt::cat) { + # In '--cat' and '--fifo' {} == $PARALLEL_TMP. + # This is to make it possible to compute $PARALLEL_TMP on + # the fly when running remotely. + # $ENV{PARALLEL_TMP} is set in the remote wrapper before + # the command is run. + # + # Prepend 'cat > $PARALLEL_TMP;' + # Append 'unlink $PARALLEL_TMP without affecting $?' + $command = + 'cat > "$PARALLEL_TMP";'. + $command.";". postpone_exit_and_cleanup(). + '"$PARALLEL_TMP"'; + } elsif($opt::fifo) { + # Prepend fifo-wrapper. In essence: + # mkfifo {} + # ( $command ) & + # # $command must read {}, otherwise this 'cat' will block + # cat > {}; + # wait; rm {} + # without affecting $? + $command = fifo_wrap(). " ". + $Global::shell. " ". ::Q($command). ' "$PARALLEL_TMP"'. ';'; + } + # Wrap with ssh + tranferring of files + $command = $self->sshlogin_wrap($command); + if(@Global::cat_prepends) { + # --pipepart: prepend: + # < /tmp/foo perl -e 'while(@ARGV) { + # sysseek(STDIN,shift,0) || die; $left = shift; + # while($read = sysread(STDIN,$buf, ($left > 60800 ? 60800 : $left))){ + # $left -= $read; syswrite(STDOUT,$buf); + # } + # }' 0 0 0 11 | + # + # --pipepart --tee: prepend: + # < dash-a-file + # + # --pipe --tee: wrap: + # (rm fifo; ... ) < fifo + # + # --pipe --shard X: + # (rm fifo; ... ) < fifo + $command = (shift @Global::cat_prepends). "($command)". + (shift @Global::cat_appends); + } elsif($opt::pipe and not $opt::roundrobin) { + # Wrap with EOF-detector to avoid starting $command if EOF. + $command = empty_input_wrapper($command); + } + if($opt::tmux) { + # Wrap command with 'tmux' + $command = $self->tmux_wrap($command); + } + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + $command = base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } + $self->{'wrapped'} = $command; + } + return $self->{'wrapped'}; +} + +sub set_sshlogin($$) { + my $self = shift; + my $sshlogin = shift; + $self->{'sshlogin'} = $sshlogin; + delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong + delete $self->{'wrapped'}; + + if($opt::sqlworker) { + # Identify worker as --sqlworker often runs on different machines + # If local: Use hostname + my $host = $sshlogin->local() ? ::hostname() : $sshlogin->host(); + $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host); + } +} + +sub sshlogin($) { + my $self = shift; + return $self->{'sshlogin'}; +} + +sub string_base64($) { + # Base64 encode strings into 1000 byte blocks. + # 1000 bytes is the largest word size csh supports + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + my @base64 = unpack("(A1000)*",encode_base64((join"",@_),"")); + return @base64; +} + +sub string_zip_base64($) { + # Pipe string through 'bzip2 -9' and base64 encode it into 1000 + # byte blocks. + # 1000 bytes is the largest word size csh supports + # Zipping will make exporting big environments work, too + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + my($zipin_fh, $zipout_fh,@base64); + ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9"); + if(fork) { + close $zipin_fh; + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + # Split base64 encoded into 1000 byte blocks + @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),"")); + close $zipout_fh; + } else { + close $zipout_fh; + print $zipin_fh @_; + close $zipin_fh; + exit; + } + ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n"); + return @base64; +} + +sub base64_zip_eval() { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * pipes through 'bzip2 -dc' + # * evals the result + # Reverse of string_zip_base64 + eval + # Will be wrapped in ' so single quote is forbidden + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64"; + eval"@GNU_Parallel"; + $chld = $SIG{CHLD}; + $SIG{CHLD} = "IGNORE"; + # Search for bzip2. Not found => use default path + my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; + # $in = stdin on $zip, $out = stdout from $zip + # Forget my() to save chars for csh + # my($in, $out,$eval); + open3($in,$out,">&STDERR",$zip,"-dc"); + if(my $perlpid = fork) { + close $in; + $eval = join "", <$out>; + close $out; + } else { + close $out; + # Pipe decoded base64 into 'bzip2 -dc' + print $in (decode_base64(join"",@ARGV)); + close $in; + exit; + } + wait; + $SIG{CHLD} = $chld; + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub base64_wrap($) { + # base64 encode Perl code + # Split it into chunks of < 1000 bytes + # Prepend it with a decoder that eval's it + # Input: + # $eval_string = Perl code to run + # Returns: + # $shell_command = shell command that runs $eval_string + my $eval_string = shift; + return + "perl -e ". + ::Q(base64_zip_eval())." ". + join" ",::shell_quote(string_zip_base64($eval_string)); +} + +sub base64_eval($) { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * evals the result + # Reverse of string_base64 + eval + # Will be wrapped in ' so single quote is forbidden. + # Spaces are stripped so spaces cannot be significant. + # The funny 'use IPC::Open3'-syntax is to avoid spaces and + # to make it clear that this is a GNU Parallel command + # when looking at the process table. + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval "@GNU_Parallel"; + my $eval = decode_base64(join"",@ARGV); + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub sshlogin_wrap($) { + # Wrap the command with the commands needed to run remotely + # Input: + # $command = command to run + # Returns: + # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands + sub monitor_parent_sshd_script { + # This script is to solve the problem of + # * not mixing STDERR and STDOUT + # * terminating with ctrl-c + # If its parent is ssh: all good + # If its parent is init(1): ssh died, so kill children + my $monitor_parent_sshd_script; + + if(not $monitor_parent_sshd_script) { + $monitor_parent_sshd_script = + # This will be packed in ', so only use " + ::spacefree + (0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'. + '$tmpdir = $ENV{"TMPDIR"} || "'. + ::perl_quote_scalar($ENV{'PARALLEL_REMOTE_TMPDIR'}).'";'. + '$nice = '.$opt::nice.';'. + '$termseq = "'.$opt::termseq.'";'. + # } + q{ + # Check that $tmpdir is writable + -w $tmpdir || + die("$tmpdir\040is\040not\040writable.". + "\040Set\040PARALLEL_REMOTE_TMPDIR"); + # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR + do { + $ENV{PARALLEL_TMP} = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $ENV{PARALLEL_TMP}); + # Set $script to a non-existent file name in $TMPDIR + do { + $script = $tmpdir."/par-job-$ENV{PARALLEL_SEQ}_". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $script); + # Create a script from the hex code + # that removes itself and runs the commands + open($fh,">",$script) || die; + # \040 = space - but we remove spaces in the script + # ' needed due to rc-shell + print($fh("rm\040\'$script\'\n",$bashfunc.$cmd)); + close $fh; + my $parent = getppid; + my $done = 0; + $SIG{CHLD} = sub { $done = 1; }; + $pid = fork; + unless($pid) { + # Make own process group to be able to kill HUP it later + eval { setpgrp }; + # Set nice value + eval { setpriority(0,0,$nice) }; + # Run the script + exec($shell,$script); + die("exec\040failed: $!"); + } + while((not $done) and (getppid == $parent)) { + # Parent pid is not changed, so sshd is alive + # Exponential sleep up to 1 sec + $s = $s < 1 ? 0.001 + $s * 1.03 : $s; + select(undef, undef, undef, $s); + } + if(not $done) { + # sshd is dead: User pressed Ctrl-C + # Kill as per --termseq + my @term_seq = split/,/,$termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + while(@term_seq && kill(0,-$pid)) { + kill(shift @term_seq, -$pid); + select(undef, undef, undef, (shift @term_seq)/1000); + } + } + wait; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + }); + } + return $monitor_parent_sshd_script; + } + + sub vars_to_export { + # Uses: + # @opt::env + my @vars = ("parallel_bash_environment"); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + for (@vars) { + if(-r $_ and not -d) { + # Read as environment definition bug #44041 + # TODO parse this + $Global::envdef = ::slurp_or_exit($_); + } + } + if(grep { /^_$/ } @vars) { + local $/ = "\n"; + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' ". + "in a clean environment first."); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2) + # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%% + + push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", + "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST", + "PARALLEL_HOSTGROUPS", "PARALLEL_ARGHOSTGROUPS", + "PARALLEL_JOBSLOT", $opt::process_slot_var, + map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars); + # Keep only defined variables + return grep { defined($ENV{$_}) } @vars; + } + + sub env_as_eval { + # Returns: + # $eval = '$ENV{"..."}=...; ...' + my @vars = vars_to_export(); + my $csh_friendly = not grep { /\n/ } @ENV{@vars}; + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = (grep { !/PARALLEL_ENV/ } + grep { substr($ENV{$_},0,4) ne "() {" } @vars); + + # eval of @envset will set %ENV + my $envset = join"", map { + '$ENV{"'.::perl_quote_scalar($_).'"}="'. + ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions; + + # running @bashfunc on the command line, will set the functions + my @bashfunc = map { + my $v=$_; + s/BASH_FUNC_(.*)(\(\)|%%)/$1/; + "$_$ENV{$v};\nexport -f $_ 2> /dev/null;\n" } @bash_functions; + # eval $bashfuncset will set $bashfunc + my $bashfuncset; + if(@bashfunc) { + # Functions are not supported for all shells + if($Global::shell !~ m:(^|/)(ash|bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell."); + } + $bashfuncset = + '@bash_functions=qw('."@bash_functions".");". + ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{ + if($shell=~/csh/) { + print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n"; + exec "false"; + } + }). + "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";'; + } else { + $bashfuncset = '$bashfunc = "";' + } + if($ENV{'parallel_bash_environment'}) { + $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; + } + ::debug("base64",$envset,$bashfuncset,"\n"); + return $csh_friendly,$envset,$bashfuncset; + } + + my $self = shift; + my $command = shift; + # TODO test that *sh -c 'parallel --env' use *sh + if(not defined $self->{'sshlogin_wrap'}{$command}) { + my $sshlogin = $self->sshlogin(); + $ENV{'PARALLEL_SEQ'} = $self->seq(); + $ENV{$opt::process_slot_var} = -1 + + ($ENV{'PARALLEL_JOBSLOT'} = $self->slot()); + $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string(); + $ENV{'PARALLEL_SSHHOST'} = $sshlogin->host(); + if ($opt::hostgroups) { + $ENV{'PARALLEL_HOSTGROUPS'} = join '+', $sshlogin->hostgroups(); + $ENV{'PARALLEL_ARGHOSTGROUPS'} = join '+', $self->hostgroups(); + } + $ENV{'PARALLEL_PID'} = $$; + if($sshlogin->local()) { + if($opt::workdir) { + # Create workdir if needed. Then cd to it. + my $wd = $self->workdir(); + if($opt::workdir eq "." or $opt::workdir eq "...") { + # If $wd does not start with '/': Prepend $HOME + $wd =~ s:^([^/]):$ENV{'HOME'}/$1:; + } + ::mkdir_or_die($wd); + my $post = ""; + if($opt::workdir eq "...") { + $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";"); + + } + $command = "cd ".::Q($wd)." || exit 255; " . + $command . $post;; + } + if(@opt::env) { + # Prepend with environment setter, which sets functions in zsh + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $perl_code = $envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. + "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;"; + if(length $perl_code > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code); + } else { + $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code); + } + } else { + $self->{'sshlogin_wrap'}{$command} = $command; + } + } else { + my $pwd = ""; + if($opt::workdir) { + # Create remote workdir if needed. Then cd to it. + my $wd = ::pQ($self->workdir()); + $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}. + qq{print(STDERR "parallel: Cannot chdir to $wd\\n") &&}. + qq{exit 255;}; + } + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $cmd = $command; + # q// does not quote \, so we must do that + $cmd =~ s/\\/\\\\/g; + + my $remote_command = $sshlogin->hexwrap + ($pwd.$envset.$bashfuncset.'$cmd='."q\0".$cmd."\0;". + monitor_parent_sshd_script()); + my ($pre,$post,$cleanup)=("","",""); + # --transfer + $pre .= $self->sshtransfer(); + # --return + $post .= $self->sshreturn(); + # --cleanup + $post .= $self->sshcleanup(); + if($post) { + # We need to save the exit status of the job + $post = exitstatuswrapper($post); + } + $self->{'sshlogin_wrap'}{$command} = + ($pre + . $sshlogin->wrap($remote_command) + . ";" + . $post); + } + } + return $self->{'sshlogin_wrap'}{$command}; +} + +sub fill_templates($) { + # Replace replacement strings in template(s) + # Returns: + # @templates - File names of replaced templates + my $self = shift; + + if(%opt::template) { + my @template_name = + map { $self->{'commandline'}->replace_placeholders([$_],0,0) } + @{$self->{'commandline'}{'template_names'}}; + ::debug("tmpl","Names: @template_name\n"); + for(my $i = 0; $i <= $#template_name; $i++) { + ::write_or_exit + ($template_name[$i], + $self->{'commandline'}-> + replace_placeholders([$self->{'commandline'} + {'template_contents'}[$i]],0,0)); + } + if($opt::cleanup) { + $self->add_rm(@template_name); + } + } +} + +sub filter($) { + # Replace replacement strings in filter(s) and evaluate them + # Returns: + # $run - 1=yes, undef=no + my $self = shift; + my $run = 1; + if(@opt::filter) { + for my $eval ($self->{'commandline'}-> + replace_placeholders(\@opt::filter,0,0)) { + $run &&= eval $eval; + } + $self->{'commandline'}{'skip'} ||= not $run; + } + return $run; +} + +sub transfer($) { + # Files to transfer + # Non-quoted and with {...} substituted + # Returns: + # @transfer - File names of files to transfer + my $self = shift; + + my $transfersize = 0; + my @transfer = $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'transfer_files'},0,0); + for(@transfer) { + # filesize + if(-e $_) { + $transfersize += (stat($_))[7]; + } + } + $self->add_transfersize($transfersize); + return @transfer; +} + +sub transfersize($) { + my $self = shift; + return $self->{'transfersize'}; +} + +sub add_transfersize($) { + my $self = shift; + my $transfersize = shift; + $self->{'transfersize'} += $transfersize; + $opt::sqlworker and + $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(), + $self->{'transfersize'}); +} + +sub sshtransfer($) { + # Returns for each transfer file: + # rsync $file remote:$workdir + my $self = shift; + my @pre; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + for my $file ($self->transfer()) { + push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; + } + return join("",@pre); +} + +sub return($) { + # Files to return + # Non-quoted and with {...} substituted + # Returns: + # @non_quoted_filenames + my $self = shift; + return $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'return_files'},0,0); +} + +sub returnsize($) { + # This is called after the job has finished + # Returns: + # $number_of_bytes transferred in return + my $self = shift; + for my $file ($self->return()) { + if(-e $file) { + $self->{'returnsize'} += (stat($file))[7]; + } + } + return $self->{'returnsize'}; +} + +sub add_returnsize($) { + my $self = shift; + my $returnsize = shift; + $self->{'returnsize'} += $returnsize; + $opt::sqlworker and + $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(), + $self->{'returnsize'}); +} + +sub sshreturn($) { + # Returns for each return-file: + # rsync remote:$workdir/$file . + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $pre = ""; + for my $file ($self->return()) { + $file =~ s:^\./::g; # Remove ./ if any + my $relpath = ($file !~ m:^/:) || + ($file =~ m:/\./:); # Is the path relative or /./? + my $cd = ""; + my $wd = ""; + if($relpath) { + # rsync -avR /foo/./bar/baz.c remote:/tmp/ + # == (on old systems) + # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ + $wd = ::shell_quote_file($self->workdir()."/"); + } + # Only load File::Basename if actually needed + $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; + # dir/./file means relative to dir, so remove dir on remote + $file =~ m:(.*)/\./:; + my $basedir = $1 ? ::shell_quote_file($1."/") : ""; + my $nobasedir = $file; + $nobasedir =~ s:.*/\./::; + $cd = ::shell_quote_file(::dirname($nobasedir)); + my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync"); + my $basename = ::Q(::shell_quote_file(::basename($file))); + # --return + # mkdir -p /home/tange/dir/subdir/; + # rsync (--protocol 30) -rlDzR + # --rsync-path="cd /home/tange/dir/subdir/; rsync" + # server:file.gz /home/tange/dir/subdir/ + $pre .= "mkdir -p $basedir$cd" . " && " . + $sshlogin->rsync(). " $rsync_cd -- ".$sshlogin->host().':'. + $basename . " ".$basedir.$cd.";"; + } + return $pre; +} + +sub sshcleanup($) { + # Return the sshcommand needed to remove the file + # Returns: + # ssh command needed to remove files from sshlogin + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + my $cleancmd = ""; + + for my $file ($self->remote_cleanup()) { + my @subworkdirs = parentdirs_of($file); + $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; + } + if(defined $opt::workdir and $opt::workdir eq "...") { + $cleancmd .= $sshlogin->wrap("rm -rf " . ::Q($workdir).';'); + } + return $cleancmd; +} + +sub remote_cleanup($) { + # Returns: + # Files to remove at cleanup + my $self = shift; + if($opt::cleanup) { + my @transfer = $self->transfer(); + my @return = $self->return(); + return (@transfer,@return); + } else { + return (); + } +} + +sub exitstatuswrapper(@) { + # Input: + # @shellcode = shell code to execute + # Returns: + # shell script that returns current status after executing @shellcode + if($Global::cshell) { + return ('set _EXIT_status=$status; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } elsif($Global::fish) { + return ('export _EXIT_status=$status; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } else { + return ('_EXIT_status=$?; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } +} + +sub workdir($) { + # Returns: + # the workdir on a remote machine + my $self = shift; + if(not defined $self->{'workdir'}) { + my $workdir; + if(defined $opt::workdir) { + if($opt::workdir eq ".") { + # . means current dir + my $home = $ENV{'HOME'}; + eval 'use Cwd'; + my $cwd = cwd(); + $workdir = $cwd; + if($home) { + # If homedir exists: remove the homedir from + # workdir if cwd starts with homedir + # E.g. /home/foo/my/dir => my/dir + # E.g. /tmp/my/dir => /tmp/my/dir + my ($home_dev, $home_ino) = (stat($home))[0,1]; + my $parent = ""; + my @dir_parts = split(m:/:,$cwd); + my $part; + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $parent .= "/".$part; + my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; + if($parent_dev == $home_dev and $parent_ino == $home_ino) { + # dev and ino is the same: We found the homedir. + $workdir = join("/",@dir_parts); + last; + } + } + } + if($workdir eq "") { + $workdir = "."; + } + } elsif($opt::workdir eq "...") { + $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ + . "-" . $self->seq(); + } else { + $workdir = $self->{'commandline'}-> + replace_placeholders([$opt::workdir],0,0); + #$workdir = $opt::workdir; + # Rsync treats /./ special. We dont want that + $workdir =~ s:/\./:/:g; # Remove /./ + $workdir =~ s:(.)/+$:$1:; # Remove ending / if any + $workdir =~ s:^\./::g; # Remove starting ./ if any + } + } else { + $workdir = "."; + } + $self->{'workdir'} = $workdir; + } + return $self->{'workdir'}; +} + +sub parentdirs_of($) { + # Return: + # all parentdirs except . of this dir or file - sorted desc by length + my $d = shift; + my @parents = (); + while($d =~ s:/[^/]+$::) { + if($d ne ".") { + push @parents, $d; + } + } + return @parents; +} + +sub start($) { + # Setup STDOUT and STDERR for a job and start it. + # Returns: + # job-object or undef if job not to run + + sub open3_setpgrp_internal { + # Run open3+setpgrp followed by the command + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + my $pid; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + # The eval is needed to catch exception from open3 + eval { + if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) { + # Each child gets its own process group to make it safe to killall + eval{ setpgrp(0,0) }; + eval{ setpriority(0,0,$opt::nice) }; + exec($Global::shell,"-c",$command) + || ::die_bug("open3-$stdin_fh ".substr($command,0,200)); + } + }; + return $pid; + } + + sub open3_setpgrp_external { + # Run open3 on $command wrapped with a perl script doing setpgrp + # Works on systems that do not support open3(,,,"-") + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + + my $pid; + my @setpgrp_wrap = + ('perl','-e', + "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command) + || ::die_bug("open3-$stdin_fh"); + 1; + }; + return $pid; + } + + sub redefine_open3_setpgrp { + my $setgprp_cache = shift; + # Select and run open3_setpgrp_internal/open3_setpgrp_external + no warnings 'redefine'; + my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst"); + # Test to see if open3(x,x,x,"-") is fully supported + # Can an exported bash function be called via open3? + my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '. + 'else { exec("bash","-c","testfun && true"); }'; + my $bash = + ::shell_quote_scalar_default( + "testfun() { rm $name; }; export -f testfun; ". + "perl -MIPC::Open3 -e ". + ::Q(::Q($script)) + ); + my $redefine_eval; + # Redirect STDERR temporarily, + # so errors on MacOS X are ignored. + open my $saveerr, ">&STDERR"; + open STDERR, '>', "/dev/null"; + # Run the test + ::debug("init",qq{bash -c $bash 2>/dev/null}); + qx{ bash -c $bash 2>/dev/null }; + open STDERR, ">&", $saveerr; + + if(-e $name) { + # Does not support open3(x,x,x,"-") + # or does not have bash: + # Use (slow) external version + unlink($name); + $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external'; + ::debug("init","open3_setpgrp_external chosen\n"); + } else { + # Supports open3(x,x,x,"-") + # This is 0.5 ms faster to run + $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal'; + ::debug("init","open3_setpgrp_internal chosen\n"); + } + if(open(my $fh, ">", $setgprp_cache)) { + print $fh $redefine_eval; + close $fh; + } else { + ::debug("init","Cannot write to $setgprp_cache"); + } + eval $redefine_eval; + } + + sub open3_setpgrp { + my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" . + ::hostname() . "/setpgrp_func"; + sub read_cache() { + -e $setgprp_cache || return 0; + local $/ = undef; + open(my $fh, "<", $setgprp_cache) || return 0; + eval <$fh> || return 0; + close $fh; + return 1; + } + if(not read_cache()) { + redefine_open3_setpgrp($setgprp_cache); + } + # The sub is now redefined. Call it + return open3_setpgrp(@_); + } + + my $job = shift; + # Get the shell command to be executed (possibly with ssh infront). + my $command = $job->wrapped(); + my $pid; + + if($Global::interactive or $Global::stderr_verbose) { + $job->interactive_start(); + } + $job->openoutputfiles(); + # Must be run after $job->interactive_start(): + # $job->interactive_start() may call $job->skip() + if($job->{'commandline'}{'skip'} + or + not $job->filter()) { + # $job->skip() was called or job filtered + $command = "true"; + } else { + $job->print_verbose_dryrun(); + } + my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); + if($opt::dryrun or $opt::sqlmaster) { $command = "true"; } + $ENV{'PARALLEL_SEQ'} = $job->seq(); + $ENV{'PARALLEL_PID'} = $$; + $ENV{$opt::process_slot_var} = -1 + + ($ENV{'PARALLEL_JOBSLOT'} = $job->slot()); + $ENV{'PARALLEL_TMP'} = ::tmpname("par"); + $job->add_rm($ENV{'PARALLEL_TMP'}); + $job->fill_templates(); + $ENV{'SSHPASS'} = $job->{'sshlogin'}->{'password'}; + ::debug("run", $Global::total_running, " processes . Starting (", + $job->seq(), "): $command\n"); + if($opt::pipe) { + my ($stdin_fh) = ::gensym(); + $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command); + if($opt::roundrobin and not $opt::keeporder) { + # --keep-order will make sure the order will be reproducible + ::set_fh_non_blocking($stdin_fh); + } + $job->set_fh(0,"w",$stdin_fh); + if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); } + } elsif(($opt::tty or $opt::open_tty) and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { + # Give /dev/tty to the command if no one else is using it + # The eval is needed to catch exception from open3 + local (*IN,*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + my @wrap = ('perl','-e', + "eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command) + || ::die_bug("open3-/dev/tty"); + 1; + }; + close $devtty_fh; + $job->set_virgin(0); + } elsif($Global::semaphore) { + # Allow sem to read from stdin + $pid = open3_setpgrp("<&STDIN",$stdout_fh,$stderr_fh,$command); + $job->set_virgin(0); + } else { + $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command); + $job->set_virgin(0); + } + if($pid) { + # A job was started + $Global::total_running++; + $Global::total_started++; + $job->set_pid($pid); + $job->set_starttime(); + $Global::running{$job->pid()} = $job; + if($opt::timeout) { + $Global::timeoutq->insert($job); + } + $Global::newest_job = $job; + $Global::newest_starttime = ::now(); + return $job; + } else { + # No more processes + ::debug("run", "Cannot spawn more jobs.\n"); + return undef; + } +} + +sub interactive_start($) { + my $self = shift; + my $command = $self->wrapped(); + if($Global::interactive) { + my $answer; + ::status_no_nl("$command ?..."); + do{ + my $tty_fh = ::open_or_exit("<","/dev/tty"); + $answer = <$tty_fh>; + close $tty_fh; + # Sometime we get an empty string (not even \n) + # Do not know why, so let us just ignore it and try again + } while(length $answer < 1); + if (not ($answer =~ /^\s*y/i)) { + $self->{'commandline'}->skip(); + } + } else { + print $Global::original_stderr "$command\n"; + } +} + +{ + my $tmuxsocket; + my $qsocket; + + sub tmux_wrap($) { + # Wrap command with tmux for session pPID + # Input: + # $actual_command = the actual command being run (incl ssh wrap) + my $self = shift; + my $actual_command = shift; + # Temporary file name. Used for fifo to communicate exit val + my $tmpfifo = ::tmpname("tmx"); + $self->add_rm($tmpfifo); + if(length($tmpfifo) >=100) { + ::error("tmux does not support sockets with path > 100."); + ::wait_and_exit(255); + } + if($opt::tmuxpane) { + # Move the command into a pane in window 0 + $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '. + $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '. + $actual_command; + } + my $visual_command = $self->replaced(); + my $title = $visual_command; + if($visual_command =~ /\0/) { + ::error("Command line contains NUL. tmux is confused by NUL."); + ::wait_and_exit(255); + } + # ; causes problems + # ascii 194-245 annoys tmux + $title =~ tr/[\011-\016;\302-\365]/ /s; + $title = ::Q($title); + + my $l_act = length($actual_command); + my $l_tit = length($title); + my $l_fifo = length($tmpfifo); + # The line to run contains a 118 chars extra code + the title 2x + my $l_tot = 2 * $l_tit + $l_act + $l_fifo; + + my $quoted_space75 = ::Q(" ")x75; + while($l_tit < 1000 and + ( + (890 < $l_tot and $l_tot < 1350) + or + (9250 < $l_tot and $l_tot < 9800) + )) { + # tmux blocks for certain lengths: + # 900 < title + command < 1200 + # 9250 < title + command < 9800 + # but only if title < 1000, so expand the title with 75 spaces + # The measured lengths are: + # 996 < (title + whole command) < 1127 + # 9331 < (title + whole command) < 9636 + $title .= $quoted_space75; + $l_tit = length($title); + $l_tot = 2 * $l_tit + $l_act + $l_fifo; + } + + my $tmux; + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not $tmuxsocket) { + $tmuxsocket = ::tmpname("tms"); + $qsocket = ::Q($tmuxsocket); + ::debug("tmux", "Start: $ENV{'PARALLEL_TMUX'} -S $qsocket attach"); + if($opt::fg) { + if(not fork) { + # Run tmux in the foreground + # Wait for the socket to appear + while (not -e $tmuxsocket) { } + `$ENV{'PARALLEL_TMUX'} -S $qsocket attach`; + exit; + } + } + ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $qsocket attach"); + } + $tmux = "sh -c ".::Q( + $ENV{'PARALLEL_TMUX'}. + " -S $qsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1").";" . + $ENV{'PARALLEL_TMUX'}. + " -S $qsocket new-window -t p$$ -n $title"; + + ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ", + $Limits::Command::line_max_len, " tot ", + $l_tot, "\n"); + return "mkfifo ".::Q($tmpfifo)." && $tmux ". + # Run in tmux + ::Q + ( + "(".$actual_command.');'. + # The triple print is needed - otherwise the testsuite fails + q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ]. + ::Q($tmpfifo)."&". + "echo $title; echo \007Job finished at: `date`;sleep 10" + ). + # Run outside tmux + # Read a / separated line: 0h/2 for csh, 2/0 for bash. + # If csh the first will be 0h, so use the second as exit value. + # Otherwise just use the first value as exit value. + q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; }. + q{/(\d+)h/ and exit($1);exit$c' }.::Q($tmpfifo); + } +} + +sub is_already_in_results($) { + # Do we already have results for this job? + # Returns: + # $job_already_run = bool whether there is output for this or not + my $job = $_[0]; + if($Global::csvsep) { + if($opt::joblog) { + # OK: You can look for job run in joblog + return 0 + } else { + ::warning_once( + "--resume --results .csv/.tsv/.json is not supported yet\n"); + # TODO read and parse the file + return 0 + } + } + my $out = $job->{'commandline'}->results_out(); + ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n"); + return(-e $out."stdout" or -f $out); +} + +sub is_already_in_joblog($) { + my $job = shift; + return vec($Global::job_already_run,$job->seq(),1); +} + +sub set_job_in_joblog($) { + my $job = shift; + vec($Global::job_already_run,$job->seq(),1) = 1; +} + +sub retry() { + # This command should be retried + my $self = shift; + + $self->set_endtime(undef); + $self->reset_exitstatus(); + $self->set_killreason(undef); + $Global::JobQueue->unget($self); + ::debug("run", "Retry ", $self->seq(), "\n"); + return 1; +} + +sub should_be_retried($) { + # Should this job be retried? + # Returns + # 0 - do not retry + # 1 - job queued for retry + my $self = shift; + if($opt::memfree and $self->killreason() eq "mem") { + # Job was killed due to memfree => retry + return $self->retry(); + } + if (not defined $opt::retries) { return 0; } + if(not $self->exitstatus() and not $self->exitsignal()) { + # Completed with success. If there is a recorded failure: forget it + $self->reset_failed_here(); + return 0; + } else { + # The job failed. Should it be retried? + $self->add_failed_here(); + my $retries = $self->{'commandline'}-> + replace_placeholders([$opt::retries],0,0); + # 0 = Inf + if($retries == 0) { $retries = 2**31; } + # Ignore files already unlinked to avoid memory leak + $self->{'unlink'} = [ grep { -e $_ } @{$self->{'unlink'}} ]; + map { -e $_ or delete $Global::unlink{$_} } keys %Global::unlink; + if($self->total_failed() == $retries) { + # This has been retried enough + return 0; + } else { + # This command should be retried + return $self->retry(); + } + } +} + +{ + my (%print_later,$job_seq_to_print); + + sub print_earlier_jobs($) { + # Print jobs whose output is postponed due to --keep-order + # Returns: N/A + my $job = shift; + $print_later{$job->seq()} = $job; + $job_seq_to_print ||= 1; + my $returnsize = 0; + ::debug("run", "Looking for: $job_seq_to_print ", + "This: ", $job->seq(), "\n"); + for(;vec($Global::job_already_run,$job_seq_to_print,1); + $job_seq_to_print++) {} + while(my $j = $print_later{$job_seq_to_print}) { + $returnsize += $j->print(); + if($j->endtime()) { + # Job finished - look at the next + delete $print_later{$job_seq_to_print}; + $job_seq_to_print++; + next; + } else { + # Job not finished yet - look at it again next round + last; + } + } + return $returnsize; + } +} + +sub print($) { + # Print the output of the jobs + # Returns: N/A + my $self = shift; + + ::debug("print", ">>joboutput ", $self->replaced(), "\n"); + if($opt::dryrun) { + # Nothing was printed to this job: + # cleanup tmp files if --files was set + ::rm($self->fh(1,"name")); + } + if($opt::pipe and $self->virgin() and not $opt::tee) { + # Skip --joblog, --dryrun, --verbose + } else { + if($opt::ungroup) { + # NULL returnsize = 0 returnsize + $self->returnsize() or $self->add_returnsize(0); + if($Global::joblog and defined $self->{'exitstatus'}) { + # Add to joblog when finished + $self->print_joblog(); + # Printing is only relevant for grouped/--line-buffer output. + $opt::ungroup and return; + } + } + # Check for disk full + ::exit_if_disk_full(); + } + + my $returnsize = $self->returnsize(); + my @fdno; + if($opt::latestline) { + @fdno = (1); + } else { + @fdno = (sort { $a <=> $b } keys %Global::fh); + } + for my $fdno (@fdno) { + # Sort by file descriptor numerically: 1,2,3,..,9,10,11 + $fdno == 0 and next; + my $out_fh = $Global::fh{$fdno}; + my $in_fh = $self->fh($fdno,"r"); + if(not $in_fh) { + if(not $Job::file_descriptor_warning_printed{$fdno}++) { + # ::warning("File descriptor $fdno not defined\n"); + } + next; + } + ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n"); + if($Global::linebuffer) { + # Line buffered print out + $self->print_linebuffer($fdno,$in_fh,$out_fh); + } elsif($Global::files) { + $self->print_files($fdno,$in_fh,$out_fh); + } elsif($opt::results) { + $self->print_results($fdno,$in_fh,$out_fh); + } else { + $self->print_normal($fdno,$in_fh,$out_fh); + } + flush $out_fh; + } + ::debug("print", "<{'exitstatus'} + and not ($self->virgin() and $opt::pipe)) { + if($Global::joblog and not $opt::sqlworker) { + # Add to joblog when finished + $self->print_joblog(); + } + if($opt::sqlworker and not $opt::results) { + $Global::sql->output($self); + } + if($Global::csvsep) { + # Add output to CSV when finished + $self->print_csv(); + } + if($Global::jsonout) { + $self->print_json(); + } + } + return $returnsize - $self->returnsize(); +} + +{ + my %jsonmap; + + sub print_json($) { + my $self = shift; + sub jsonquote($) { + my $a = shift; + if(not $jsonmap{"\001"}) { + map { $jsonmap{sprintf("%c",$_)} = + sprintf '\u%04x', $_ } 0..31; + } + $a =~ s/\\/\\\\/g; + $a =~ s/\"/\\"/g; + $a =~ s/([\000-\037])/$jsonmap{$1}/g; + return $a; + } + + my $cmd; + if($Global::verbose <= 1) { + $cmd = jsonquote($self->replaced()); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = jsonquote(join " ", @{$self->{'commandline'}}); + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + # { + # "Seq": 12, + # "Host": "/usr/bin/ssh foo@lo", + # "Starttime": 1608344711.743, + # "JobRuntime": 0.01, + # "Send": 0, + # "Receive": 10, + # "Exitval": 0, + # "Signal": 0, + # "Command": "echo 1", + # "V": [ + # "1" + # ], + # "Stdout": "1\n", + # "Stderr": "" + # } + # + printf($Global::csv_fh + q({ "Seq": %s, "Host": "%s", "Starttime": %s, "JobRuntime": %s, ). + q("Send": %s, "Receive": %s, "Exitval": %s, "Signal": %s, ). + q("Command": "%s", "V": [ %s ], "Stdout": "%s", "Stderr": "%s" }). + "\n", + $self->seq(), + jsonquote($self->sshlogin()->string()), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd, + (join ",", + map { '"'.jsonquote($_).'"' } @$record_ref[1..$#$record_ref], + ), + jsonquote($self->{'output'}{1}), + jsonquote($self->{'output'}{2}) + ); + } +} + +{ + my $header_printed; + + sub print_csv($) { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = join " ", @{$self->{'commandline'}}; + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + if(not $header_printed) { + # Variable headers + # Normal => V1..Vn + # --header : => first value from column + my @V; + if($opt::header) { + my $i = 1; + @V = (map { $Global::input_source_header{$i++} } + @$record_ref[1..$#$record_ref]); + } else { + my $V = "V1"; + @V = (map { $V++ } @$record_ref[1..$#$record_ref]); + } + print $Global::csv_fh + (map { $$_ } + combine_ref("Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command", + @V, + "Stdout","Stderr" + )),"\n"; + $header_printed++; + } + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + print $Global::csv_fh + (map { $$_ } + combine_ref + ($self->seq(), + $self->sshlogin()->string(), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), \$cmd, + \@$record_ref[1..$#$record_ref], + \$self->{'output'}{1}, + \$self->{'output'}{2})),"\n"; + } +} + +sub combine_ref($) { + # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu) + my @part = @_; + my $sep = $Global::csvsep; + my $quot = '"'; + my @out = (); + + my $must_be_quoted; + for my $column (@part) { + # Memory optimization: Content transferred as reference + if(ref $column ne "SCALAR") { + # Convert all columns to scalar references + my $v = $column; + $column = \$v; + } + if(not defined $$column) { + $$column = ''; + next; + } + + $must_be_quoted = 0; + + if($$column =~ s/$quot/$quot$quot/go){ + # Replace " => "" + $must_be_quoted ||=1; + } + if($$column =~ /[\s\Q$sep\E]/o){ + # Put quotes around if the column contains , + $must_be_quoted ||=1; + } + + $Global::use{"bytes"} ||= eval "use bytes; 1;"; + if ($$column =~ /\0/) { + # Contains \0 => put quotes around + $must_be_quoted ||=1; + } + if($must_be_quoted){ + push @out, \$sep, \$quot, $column, \$quot; + } else { + push @out, \$sep, $column; + } + } + # Remove the first $sep: ,val,"val" => val,"val" + shift @out; + return @out; +} + +sub print_files($) { + # Print the name of the file containing stdout on stdout + # Uses: + # $opt::pipe + # $opt::group = Print when job is done + # $opt::linebuffer = Print ASAP + # Returns: N/A + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Kill the decompressor which will not be needed + CORE::kill "TERM", $self->fh($fdno,"rpid"); + } + close $in_fh; + + if($opt::pipe and $self->virgin()) { + # Nothing was printed to this job: + # cleanup unused tmp files because --files was set + for my $fdno (1,2) { + ::rm($self->fh($fdno,"name")); + ::rm($self->fh($fdno,"unlink")); + } + } elsif($fdno == 1 and $self->fh($fdno,"name")) { + print $out_fh $self->tag(),$self->fh($fdno,"name"), $Global::files_sep; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, + $self->tag(), $self->fh($fdno,"name"); + } + $self->add_returnsize(-s $self->fh($fdno,"name")); + # Mark as printed - do not print again + $self->set_fh($fdno,"name",undef); + } +} + + +# Different print types +# (--ll | --ll --bar | --lb | --group | --parset | --sql-worker) +# (--files | --results (.json|.csv|.tsv) ) +# --color-failed +# --color +# --keep-order +# --tag +# --bar +{ + my ($up,$eol,$currow,$maxrow); + my ($minvisible,%print_later,%notvisible); + my (%binmodeset,%tab); + + sub latestline_init() { + # cursor_up cuu1 = up one line + $up = `sh -c "tput cuu1 /dev/null`; + chomp($up); + $eol = `sh -c "tput el /dev/null`; + chomp($eol); + if($eol eq "") { $eol = "\033[K"; } + $currow = 1; + $maxrow = 1; + $minvisible = 1; + for(0..8) { + $tab{$_} = " "x(8-($_%8)); + } + } + + sub mbtrunc($$) { + # Simple mbtrunc to avoid using Text::WideChar::Util + my $str = shift; + my $len = shift; + if(::mbswidth($str) == length($str)) { + $str = substr($str,0,$len); + } else { + # mb chars (ヌー平行) are wider than 1 char on screen + # We need at most $len chars - they may be wide + $str =~ s/(.{$len}).*/$1/; + my $rlen = int((::mbswidth($str) - $len)/2+0.5); + do { + $str =~ s/.{$rlen}$//; + $rlen = int((::mbswidth($str) - $len)/2+0.5); + } while($rlen >= 1); + } + return $str; + } + + sub print_latest_line($) { + my $self = shift; + my $out_fh = shift; + if(not defined $self->{$out_fh,'latestline'}) { return; } + my $row = $self->row(); + # Is row visible? + if(not ($minvisible <= $row + and + $row < $minvisible + ::terminal_rows() - 1)) { + return; + } + if(not $binmodeset{$out_fh}++) { + # Enable utf8 if possible + eval q{ binmode $out_fh, "encoding(utf8)"; }; + } + my ($color,$reset_color) = $self->color(); + my $termcol = ::terminal_columns(); + my $untabify_tag = ::decode_utf8($self->untabtag()); + my $untabify_str = + ::untabify(::decode_utf8($self->{$out_fh,'latestline'})); + # -1 to make space for $truncated_str + my $maxtaglen = $termcol - 1; + $untabify_tag = mbtrunc($untabify_tag,$maxtaglen); + my $taglen = ::mbswidth($untabify_tag); + my $maxstrlen = $termcol - $taglen - 1; + $untabify_str = mbtrunc($untabify_str,$maxstrlen); + my $strlen = ::mbswidth($untabify_str); + my $truncated_tag = ""; + my $truncated_str = ""; + if($termcol - $taglen < 2) { + $truncated_tag = ">"; + } else { + if($termcol - $taglen - $strlen <= 2) { + $truncated_str = ">"; + } + } + $maxrow = ($row > $maxrow) ? $row : $maxrow; + printf($out_fh + ("%s%s%s%s". # up down \r eol + "%s%s". # tag trunc_tag + "%s%s%s%s". # color line trunc reset_color + "%s" # down + ), + "$up"x($currow - $row), "\n"x($row - $currow), "\r", $eol, + $untabify_tag,$truncated_tag, + $color, $untabify_str, $truncated_str, $reset_color, + "\n"x($maxrow - $row + 1)); + $currow = $maxrow + 1; + } + + sub print_linebuffer($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + if(defined $self->{'exitstatus'}) { + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($opt::compress) { + if($?) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + # Blocked reading in final round + for my $fdno (1,2) { ::set_fh_blocking($self->fh($fdno,'r')); } + } + if($opt::latestline) { $print_later{$self->row()} = $self; } + } + if(not $self->virgin()) { + if($Global::files or ($opt::results and not $Global::csvsep)) { + # Print filename + if($fdno == 1 and not $self->fh($fdno,"printed")) { + print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n"; + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, $self->tag(), + $self->fh($fdno,"name")); + } + $self->set_fh($fdno,"printed",1); + } + # No need for reading $in_fh, as it is from "cat >/dev/null" + } else { + # Read halflines and print full lines + my $outputlength = 0; + my $halfline_ref = $self->{'halfline'}{$fdno}; + my ($buf,$i,$rv); + # 1310720 gives 1.2 GB/s + # 131072 gives 0.9 GB/s + # The optimal block size differs + # It has been measured on: + # AMD 6376: 60800 (>70k is also reasonable) + # Intel i7-3632QM: 52-59k, 170-175k + # seq 64 | ppar --_test $1 --lb \ + # 'yes {} `seq 1000`|head -c 10000000' >/dev/null + while($rv = sysread($in_fh, $buf, 60800)) { + $outputlength += $rv; + # TODO --recend + # Treat both \n and \r as line end + # Only test for \r if there is no \n + # Test: + # perl -e '$a="x"x1000000; + # $b="$a\r$a\n$a\r$a\n"; + # map { print $b,$_ } 1..10' + $i = ((rindex($buf,"\n")+1) || (rindex($buf,"\r")+1)); + if($i) { + if($opt::latestline) { + # Keep the latest full line + my $l = join('', @$halfline_ref, + substr($buf,0,$i-1)); + # "ab\rb\n" = "bb", but we cannot process that correctly. + # Line may be: + # foo \r bar \n + # foo \r bar \r baz \r + # If so: Remove 'foo \r' + $l =~ s/.*\r//g; + my $j = ((rindex($l,"\n")+1) || + (rindex($l,"\r")+1)); + $self->{$out_fh,'latestline'} = substr($l,$j); + # Remove the processed part + # by keeping the unprocessed part + @$halfline_ref = (substr($buf,$i)); + } else { + # One or more complete lines were found + if($Global::color) { + my $print = join("",@$halfline_ref, + substr($buf,0,$i)); + chomp($print); + my ($color,$reset_color) = $self->color(); + my $colortag = $color.$self->tag(); + # \n => reset \n color tag + $print =~ s{([\n\r])(?=.|$)} + {$reset_color$1$colortag}gs; + print($out_fh $colortag, $print, + $reset_color, "\n"); + } elsif($opt::tag or defined $opt::tagstring) { + # Replace ^ with $tag within the full line + if($Global::cache_replacement_eval) { + # Replace with the same value for tag + my $tag = $self->tag(); + unshift @$halfline_ref, $tag; + # TODO --recend that can be partially in + # @$halfline_ref + substr($buf,0,$i-1) =~ + s/([\n\r])(?=.|$)/$1$tag/gs; + } else { + # Replace with freshly computed tag-value + unshift @$halfline_ref, $self->tag(); + substr($buf,0,$i-1) =~ + s/([\n\r])(?=.|$)/$1.$self->tag()/gse; + } + # The length changed, + # so find the new ending pos + $i = ::max((rindex($buf,"\n")+1), + (rindex($buf,"\r")+1)); + # Print the partial line (halfline) + # and the last half + print $out_fh @$halfline_ref, substr($buf,0,$i); + } else { + # Print the partial line (halfline) + # and the last half + print $out_fh @$halfline_ref, substr($buf,0,$i); + } + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, + @$halfline_ref, substr($buf,0,$i)); + } + # Remove the printed part by keeping the unprinted + @$halfline_ref = (substr($buf,$i)); + } + } else { + # No newline, so append to the halfline + push @$halfline_ref, $buf; + } + } + $self->add_returnsize($outputlength); + if($opt::latestline) { $self->print_latest_line($out_fh); } + } + if(defined $self->{'exitstatus'}) { + if($Global::files or ($opt::results and not $Global::csvsep)) { + $self->add_returnsize(-s $self->fh($fdno,"name")); + } else { + if($opt::latestline) { + # Force re-computing color if --colorfailed + if($opt::colorfailed) { delete $self->{'color'}; } + if($self->{$out_fh,'latestline'} ne "") { + $self->print_latest_line($out_fh); + } + if(@{$self->{'halfline'}{$fdno}}) { + my $l = join('', @{$self->{'halfline'}{$fdno}}); + if($l ne "") { + $self->{$out_fh,'latestline'} = $l; + } + } else { + $self->{$out_fh,'latestline'} = undef; + } + # Print latest line from jobs that are already done + while($print_later{$minvisible}) { + $print_later{$minvisible}->print_latest_line($out_fh); + delete $print_later{$minvisible}; + $minvisible++; + } + # Print latest line from jobs that are on screen now + for(my $row = $minvisible; + $row < $minvisible -1 + ::terminal_rows(); + $row++) { + $print_later{$row} and + $print_later{$row}->print_latest_line($out_fh); + } + } else { + # If the job is dead: print the remaining partial line + # read remaining (already done for $opt::latestline) + my $halfline_ref = $self->{'halfline'}{$fdno}; + if(grep /./, @$halfline_ref) { + my $returnsize = 0; + for(@{$self->{'halfline'}{$fdno}}) { + $returnsize += length $_; + } + $self->add_returnsize($returnsize); + if($opt::tag or defined $opt::tagstring) { + # Prepend $tag the the remaining half line + unshift @$halfline_ref, $self->tag(); + } + # Print the partial line (halfline) + print $out_fh @{$self->{'halfline'}{$fdno}}; + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, @$halfline_ref); + } + @$halfline_ref = (); + } + } + } + if($self->fh($fdno,"rpid") and + CORE::kill 0, $self->fh($fdno,"rpid")) { + # decompress still running + } else { + # decompress done: close fh + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } + } + } + } +} + +sub free_ressources() { + my $self = shift; + if(not $opt::ungroup) { + my $fh; + for my $fdno (sort { $a <=> $b } keys %Global::fh) { + $fh = $self->fh($fdno,"w"); + $fh and close $fh; + $fh = $self->fh($fdno,"r"); + $fh and close $fh; + } + } +} + +sub print_parset($) { + # Wrap output with shell script code to set as variables + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $outputlength = 0; + + ::debug("parset","print $Global::parset"); + if($Global::parset eq "assoc") { + # Start: (done in parse_parset()) + # eval "`echo 'declare -A myassoc; myassoc=( + # Each: (done here) + # [$'a\tb']=$'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # )'`" + print '[',::Q($self->{'commandline'}-> + replace_placeholders(["\177<\177>"],0,0)),']='; + } elsif($Global::parset eq "array") { + # Start: (done in parse_parset()) + # eval "`echo 'myassoc=( + # Each: (done here) + # $'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # )'`" + } elsif($Global::parset eq "var") { + # Start: (done in parse_parset()) + # + # Each: (done here) + # var=$'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # + if(not @Global::parset_vars) { + ::error("Too few named destination variables"); + ::wait_and_exit(255); + } + print shift @Global::parset_vars,"="; + } + local $/ = "\n"; + my $tag = $self->tag(); + my @out; + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs; + push @out, $tag,$_; + } + # Remove last newline + # This often makes it easier to use the output in shell + @out and ${out[$#out]} =~ s/\n$//s; + print ::Q(join("",@out)),"\n"; + return $outputlength; +} + +sub print_normal($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if(not $self->virgin()) { + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + + if($Global::parset and $fdno == 1) { + $outputlength += $self->print_parset($fdno,$in_fh,$out_fh); + } elsif(defined $opt::tag or defined $opt::tagstring + or $Global::color or $opt::colorfailed) { + if($Global::color or $opt::colorfailed) { + my ($color,$reset_color) = $self->color(); + my $colortag = $color.$self->tag(); + # Read line by line + local $/ = "\n"; + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + chomp; + s{([\n\r])(?=.|$)}{$reset_color$1$colortag}gs; + print $out_fh $colortag,$_,$reset_color,"\n"; + } + } else { + my $tag = $self->tag(); + my $pretag = 1; + my $s; + while(sysread($in_fh,$buf,32767)) { + $outputlength += length $buf; + $buf =~ s/(?<=[\r\n])(?=.)/$tag/gs; + print $out_fh ($pretag ? $tag : ""),$buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, + ($pretag ? $tag : ""),$buf; + } + # Should next print start with a tag? + $s = substr($buf, -1); + # This is faster than ($s eq "\n") || ($s eq "\r") + $pretag = ($s eq "\n") ? 1 : ($s eq "\r"); + } + } + } else { + # Most efficient way of copying data from $in_fh to $out_fh + # Intel i7-3632QM: 25k- + while(sysread($in_fh,$buf,32767)) { + print $out_fh $buf; + $outputlength += length $buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $buf; + } + } + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } +} + +sub print_results($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if(not $self->virgin()) { + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + + if($Global::membuffer) { + # Read data into membuffer + if($opt::tag or $opt::tagstring) { + # Read line by line + local $/ = "\n"; + my $tag = $self->tag(); + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs; + push @{$self->{'output'}{$fdno}}, $tag, $_; + } + } else { + # Most efficient way of copying data from $in_fh to $out_fh + while(sysread($in_fh,$buf,60000)) { + $outputlength += length $buf; + push @{$self->{'output'}{$fdno}}, $buf; + } + } + } else { + # Not membuffer: No need to read the file + if($opt::compress) { + $outputlength = -1; + } else { + # Determine $outputlength = file length + seek($in_fh, 0, 2) || ::die_bug("cannot seek result"); + $outputlength = tell($in_fh); + } + } + if($fdno == 1) { $self->add_returnsize($outputlength); } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } +} + +sub print_joblog($) { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = $self->wrapped(); + } + # Newlines make it hard to parse the joblog + $cmd =~ s/\n/\0/g; + print $Global::joblog + join("\t", $self->seq(), $self->sshlogin()->string(), + $self->starttime(), sprintf("%10.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd + ). "\n"; + flush $Global::joblog; + $self->set_job_in_joblog(); +} + +sub tag($) { + my $self = shift; + if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) { + if(defined $opt::tag or defined $opt::tagstring) { + $self->{'tag'} = + ($self->{'commandline'}-> + replace_placeholders([$opt::tagstring],0,0)). + "\t"; + } else { + # No tag + $self->{'tag'} = ""; + } + } + return $self->{'tag'}; +} + +sub untabtag($) { + # tag with \t replaced with spaces + my $self = shift; + my $tag = $self->tag(); + if(not defined $self->{'untab'}{$tag}) { + $self->{'untab'}{$tag} = ::untabify($tag); + } + return $self->{'untab'}{$tag}; +} + +{ + my (@color,$eol,$reset_color,$init); + + sub init_color() { + if(not $init) { + $init = 1; + # color combinations that are readable: black/white text + # on colored background, but not white on yellow + my @color_combinations = + # Force each color code to have the same length in chars + # This will make \t work as expected + ((map { [sprintf("%03d",$_),"000"] } + 6..7,9..11,13..15,40..51,75..87,113..123,147..159, + 171..182,185..231,249..254), + (map { [sprintf("%03d",$_),231] } + 1..9,12..13,16..45,52..81,88..114,124..149, + 160..178,180,182..184,196..214,232..250)); + # reorder list so adjacent colors are dissimilar + # %23 and %7 were found experimentally + my @order = reverse sort { + (($a%23) <=> ($b%23)) + or + (($b%7) <=> ($a%7)); + } 0..$#color_combinations; + @order = @order[54 .. $#color_combinations, 0 .. 53]; + @color = map { + # TODO Can this be done with `tput` codes? + "\033[48;5;".$_->[0].";38;5;".$_->[1]."m" + } @color_combinations[ @order ]; + + # clr_eol el = clear to end of line + $eol = `sh -c "tput el /dev/null`; + chomp($eol); + if($eol eq "") { $eol = "\033[K"; } + # exit_attribute_mode sgr0 = turn off all attributes + $reset_color = `sh -c "tput sgr0 /dev/null`; + chomp($reset_color); + if($reset_color eq "") { $reset_color = "\033[m"; } + } + } + + sub color($) { + my $self = shift; + if(not defined $self->{'color'}) { + if($Global::color) { + # Choose a value based on the seq + $self->{'color'} = $color[$self->seq() % ($#color+1)].$eol; + $self->{'reset_color'} = $reset_color; + } else { + $self->{'color'} = ""; + $self->{'reset_color'} = ""; + } + if($opt::colorfailed) { + if($self->exitstatus()) { + # White on Red + # Can this be done more generally? + $self->{'color'} = + "\033[48;5;"."196".";38;5;"."231"."m".$eol; + $self->{'reset_color'} = $reset_color; + } + } + } + return ($self->{'color'},$self->{'reset_color'}); + } +} + +sub hostgroups($) { + my $self = shift; + if(not defined $self->{'hostgroups'}) { + $self->{'hostgroups'} = + $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; + } + return @{$self->{'hostgroups'}}; +} + +sub exitstatus($) { + my $self = shift; + return $self->{'exitstatus'}; +} + +sub set_exitstatus($$) { + my $self = shift; + my $exitstatus = shift; + if($exitstatus) { + # Overwrite status if non-zero + $self->{'exitstatus'} = $exitstatus; + } else { + # Set status but do not overwrite + # Status may have been set by --timeout + $self->{'exitstatus'} ||= $exitstatus; + } + $opt::sqlworker and + $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(), + $exitstatus); +} + +sub reset_exitstatus($) { + my $self = shift; + undef $self->{'exitstatus'}; +} + +sub exitsignal($) { + my $self = shift; + return $self->{'exitsignal'}; +} + +sub set_exitsignal($$) { + my $self = shift; + my $exitsignal = shift; + $self->{'exitsignal'} = $exitsignal; + $opt::sqlworker and + $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(), + $exitsignal); +} + +{ + my $total_jobs; + + sub should_we_halt { + # Should we halt? Immediately? Gracefully? + # Returns: N/A + my $job = shift; + my $limit; + if($Global::semaphore) { + # Emulate Bash's +128 if there is a signal + $Global::halt_exitstatus = + ($job->exitstatus() + or + $job->exitsignal() ? $job->exitsignal() + 128 : 0); + } + if($job->exitstatus() or $job->exitsignal()) { + # Job failed + $Global::exitstatus++; + $Global::total_failed++; + if($Global::halt_fail) { + ::status("$Global::progname: This job failed:", + $job->replaced()); + $limit = $Global::total_failed; + } + } elsif($Global::halt_success) { + ::status("$Global::progname: This job succeeded:", + $job->replaced()); + $limit = $Global::total_completed - $Global::total_failed; + } + if($Global::halt_done) { + ::status("$Global::progname: This job finished:", + $job->replaced()); + $limit = $Global::total_completed; + } + if(not defined $limit) { + return "" + } + # --halt # => 1..100 (number of jobs failed, 101 means > 100) + # --halt % => 1..100 (pct of jobs failed) + if($Global::halt_pct and not $Global::halt_count) { + $total_jobs ||= $Global::JobQueue->total_jobs(); + # From the pct compute the number of jobs that must fail/succeed + $Global::halt_count = $total_jobs * $Global::halt_pct; + } + if($limit >= $Global::halt_count) { + # At least N jobs have failed/succeded/completed + # or at least N% have failed/succeded/completed + # So we should prepare for exit + if($Global::halt_fail or $Global::halt_done) { + # Set exit status + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + # --halt now,fail=X% or soon,fail=X% + # --halt now,done=X% or soon,done=X% + $Global::halt_exitstatus = + ::ceil($Global::total_failed / $total_jobs * 100); + } elsif($Global::halt_count) { + # --halt now,fail=X or soon,fail=X + # --halt now,done=X or soon,done=X + $Global::halt_exitstatus = + ::min($Global::total_failed,101); + } + if($Global::halt_count and $Global::halt_count == 1) { + # --halt now,fail=1 or soon,fail=1 + # --halt now,done=1 or soon,done=1 + # Emulate Bash's +128 if there is a signal + $Global::halt_exitstatus = + ($job->exitstatus() + or + $job->exitsignal() ? $job->exitsignal() + 128 : 0); + } + } + ::debug("halt","Pct: ",$Global::halt_pct, + " count: ",$Global::halt_count, + " status: ",$Global::halt_exitstatus,"\n"); + } elsif($Global::halt_success) { + $Global::halt_exitstatus = 0; + } + if($Global::halt_when eq "soon") { + $Global::start_no_new_jobs ||= 1; + if(scalar(keys %Global::running) > 0) { + # Only warn if there are more jobs running + ::status + ("$Global::progname: Starting no more jobs. ". + "Waiting for ". (keys %Global::running). + " jobs to finish."); + } + } + return($Global::halt_when); + } + return ""; + } +} + + +package CommandLine; + +sub new($) { + my $class = shift; + my $seq = shift; + my $commandref = shift; + $commandref || die; + my $arg_queue = shift; + my $context_replace = shift; + my $max_number_of_args = shift; # for -N and normal (-n1) + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my $replacecount_ref = shift; + my $len_ref = shift; + my %replacecount = %$replacecount_ref; + my %len = %$len_ref; + for (keys %$replacecount_ref) { + # Total length of this replacement string {} replaced with all args + $len{$_} = 0; + } + return bless { + 'command' => $commandref, + 'seq' => $seq, + 'len' => \%len, + 'arg_list' => [], + 'arg_list_flat' => [], + 'arg_list_flat_orig' => [undef], + 'arg_queue' => $arg_queue, + 'max_number_of_args' => $max_number_of_args, + 'replacecount' => \%replacecount, + 'context_replace' => $context_replace, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'template_names' => $template_names, + 'template_contents' => $template_contents, + 'replaced' => undef, + }, ref($class) || $class; +} + +sub flush_cache() { + my $self = shift; + for my $arglist (@{$self->{'arg_list'}}) { + for my $arg (@$arglist) { + $arg->flush_cache(); + } + } + $self->{'arg_queue'}->flush_cache(); + $self->{'replaced'} = undef; +} + +sub seq($) { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq($$) { + my $self = shift; + $self->{'seq'} = shift; +} + +sub slot($) { + # Find the number of a free job slot and return it + # Uses: + # @Global::slots - list with free jobslots + # Returns: + # $jobslot = number of jobslot + my $self = shift; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$Global::max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; +} + +{ + my $already_spread; + my $darwin_max_len; + + sub populate($) { + # Add arguments from arg_queue until the number of arguments or + # max line length is reached + # Uses: + # $Global::usable_command_line_length + # $opt::cat + # $opt::fifo + # $Global::JobQueue + # $opt::m + # $opt::X + # $Global::max_jobs_running + # Returns: N/A + my $self = shift; + my $next_arg; + my $max_len = $Global::usable_command_line_length || die; + if($^O eq "darwin") { + # Darwin's limit is affected by: + # * number of environment names (variables+functions) + # * size of environment + # * the length of arguments: + # a one-char argument lowers the limit by 5 + # To be safe assume all arguments are one-char + # The max_len is cached between runs, but if the size of + # the environment is different we need to recompute the + # usable max length for this run of GNU Parallel + # See https://unix.stackexchange.com/a/604943/2972 + if(not $darwin_max_len) { + my $envc = (keys %ENV); + my $envn = length join"",(keys %ENV); + my $envv = length join"",(values %ENV); + $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10; + ::debug("init", + "length: $darwin_max_len ". + "3+($max_len - $envn - $envv)/5 - $envc*2"); + } + $max_len = $darwin_max_len; + } + if($opt::cat or $opt::fifo) { + # Get the empty arg added by --pipepart (if any) + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('"$PARALLEL_TMP"')]); + } + while (not $self->{'arg_queue'}->empty()) { + $next_arg = $self->{'arg_queue'}->get(); + if(not defined $next_arg) { + next; + } + $self->push($next_arg); + if($self->len() >= $max_len) { + # Command length is now > max_length + # If there are arguments: remove the last + # If there are no arguments: Error + # TODO stuff about -x opt_x + if($self->number_of_args() > 1) { + # There is something to work on + $self->{'arg_queue'}->unget($self->pop()); + last; + } else { + my $args = join(" ", map { $_->orig() } @$next_arg); + ::error("Command line too long (". + $self->len(). " >= ". + $max_len. + ") at input ". + $self->{'arg_queue'}->arg_number(). + ": ". + ((length $args > 50) ? + (substr($args,0,50))."..." : + $args)); + $self->{'arg_queue'}->unget($self->pop()); + ::wait_and_exit(255); + } + } + + if(defined $self->{'max_number_of_args'}) { + if($self->number_of_args() >= $self->{'max_number_of_args'}) { + last; + } + } + } + if(($opt::m or $opt::X) and not $already_spread + and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { + # -m or -X and EOF => Spread the arguments over all jobslots + # (unless they are already spread) + $already_spread ||= 1; + if($self->number_of_args() > 1) { + $self->{'max_number_of_args'} = + ::ceil($self->number_of_args()/$Global::max_jobs_running); + $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = + $self->{'max_number_of_args'}; + $self->{'arg_queue'}->unget($self->pop_all()); + while($self->number_of_args() < $self->{'max_number_of_args'}) { + $self->push($self->{'arg_queue'}->get()); + } + } + $Global::JobQueue->flush_total_jobs(); + } + + if($opt::sqlmaster) { + # Insert the V1..Vn for this $seq in SQL table + # instead of generating one + $Global::sql->insert_records($self->seq(), $self->{'command'}, + $self->{'arg_list_flat_orig'}); + } + } +} + +sub push($) { + # Add one or more records as arguments + # Returns: N/A + my $self = shift; + my $record = shift; + push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record; + push @{$self->{'arg_list_flat'}}, @$record; + push @{$self->{'arg_list'}}, $record; + # Make @arg available for {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + my $col; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(-?\d+)(?:\D.*|)$/) { + # Positional replacement string + # Deal with negative positional replacement string + $col = ($1 < 0) ? $1 : $1-1; + if(defined($record->[$col])) { + $self->{'len'}{$perlexpr} += + length $record->[$col]->replace($perlexpr,$quote_arg,$self); + } + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} += + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } +} + +sub pop($) { + # Remove last argument + # Returns: + # the last record + my $self = shift; + my $record = pop @{$self->{'arg_list'}}; + # pop off arguments from @$record + splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1; + splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1; + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(\d+) /) { + # Positional + defined($record->[$1-1]) or next; + $self->{'len'}{$perlexpr} -= + length $record->[$1-1]->replace($perlexpr,$quote_arg,$self); + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} -= + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } + return $record; +} + +sub pop_all($) { + # Remove all arguments and zeros the length of replacement perlexpr + # Returns: + # all records + my $self = shift; + my @popped = @{$self->{'arg_list'}}; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$perlexpr} = 0; + } + $self->{'arg_list'} = []; + $self->{'arg_list_flat_orig'} = [undef]; + $self->{'arg_list_flat'} = []; + return @popped; +} + +sub number_of_args($) { + # The number of records + # Returns: + # number of records + my $self = shift; + # This is really the number of records + return $#{$self->{'arg_list'}}+1; +} + +sub number_of_recargs($) { + # The number of args in records + # Returns: + # number of args records + my $self = shift; + my $sum = 0; + my $nrec = scalar @{$self->{'arg_list'}}; + if($nrec) { + $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); + } + return $sum; +} + +sub args_as_string($) { + # Returns: + # all unmodified arguments joined with ' ' (similar to {}) + my $self = shift; + return (join " ", map { $_->orig() } + map { @$_ } @{$self->{'arg_list'}}); +} + +sub results_out($) { + sub max_file_name_length { + # Figure out the max length of a subdir + # TODO and the max total length + # Ext4 = 255,130816 + # Uses: + # $Global::max_file_length is set + # Returns: + # $Global::max_file_length + my $testdir = shift; + + my $upper = 100_000_000; + # Dir length of 8 chars is supported everywhere + my $len = 8; + my $dir = "d"x$len; + do { + rmdir($testdir."/".$dir); + $len *= 16; + $dir = "d"x$len; + } while ($len < $upper and mkdir $testdir."/".$dir); + # Then search for the actual max length between $len/16 and $len + my $min = $len/16; + my $max = $len; + while($max-$min > 5) { + # If we are within 5 chars of the exact value: + # it is not worth the extra time to find the exact value + my $test = int(($min+$max)/2); + $dir = "d"x$test; + if(mkdir $testdir."/".$dir) { + rmdir($testdir."/".$dir); + $min = $test; + } else { + $max = $test; + } + } + $Global::max_file_length = $min; + return $min; + } + + my $self = shift; + my $out = $self->replace_placeholders([$opt::results],0,0); + if($out eq $opt::results) { + # $opt::results simple string: Append args_as_dirname + my $args_as_dirname = $self->args_as_dirname(0); + # Output in: prefix/name1/val1/name2/val2/stdout + $out = $opt::results."/".$args_as_dirname; + if(-d $out or eval{ File::Path::mkpath($out); }) { + # OK + } else { + # mkpath failed: Argument too long or not quoted + # Set $Global::max_file_length, which will keep the individual + # dir names shorter than the max length + max_file_name_length($opt::results); + # Quote dirnames with + + $args_as_dirname = $self->args_as_dirname(1); + # prefix/name1/val1/name2/val2/ + $out = $opt::results."/".$args_as_dirname; + File::Path::mkpath($out); + } + $out .="/"; + } else { + if($out =~ m:/$:s) { + # / = dir + if(-d $out or eval{ File::Path::mkpath($out); }) { + # OK + } else { + ::error("Cannot make dir '$out'."); + ::wait_and_exit(255); + } + } else { + $out =~ m:(.*)/:s; + File::Path::mkpath($1); + } + } + return $out; +} + +{ + my %map; + my %stringmap; + my $sep; + + # test: '' . .. a. a.. + ++ 0..255 on fat12 ext4 + sub args_as_dirname($) { + # Returns: + # all arguments joined with '/' (similar to {}) + # Chars that are not safe on all file systems are quoted. + sub init() { + # ext4: / \t \n \0 \\ \r + # fat: 0..31 " * / : < > ? \ | Maybe also: # [ ] ; = , + # exfat: 128..255 + # Other FS: , [ ] { } ( ) ! ; " ' * ? < > | + # + # Quote these as: + # + = ++ + # \0 = +0 + # \t = +t + # \\ = +b (backslash) + # \n = +n + # \r = +r + # / = +z (zlash) + # ? = +y (whY?) + # " = +d (double quote) + # ' = +q (quote) + # * = +a (asterisk) + # < = +l (less than) + # > = +g (greater than) + # : = +k (kolon) + # ! = +x (eXclamation) + # | = +p (pipe) + # # = +h (hash) + # ; = +s (semicolon) + # = = +e (equal) + # , = +c (comma) + # 1..32 128..255 = +XX (hex value) + # [ ] = +e +f + # ( ) = +i +j + # { } = +v +w + # Quote '' as +m (eMpty) + # Quote . as +_ + # Quote .. as +__ + # (Unused: ou) + %map = qw( + + ++ + \0 +0 + \t +t + \\ +b + \n +n + \r +r + / +z + ? +y + " +d + ' +q + * +a + < +l + > +g + : +k + ! +x + | +p + # +h + ; +s + = +e + , +c + [ +e + ( +i + { +v + ] +f + ) +j + } +w + ); + # 1..32 128..255 = +XX (hex value) + map { $map{sprintf "%c",$_} = sprintf "+%02x",$_ } 1..32, 128..255; + # Default value = itself + map { $map{sprintf "%c",$_} ||= sprintf "%c",$_ } 0..255; + # Quote '' as +m (eMpty) + $stringmap{""} = "+m"; + # Quote . as +_ + $stringmap{"."} = "+_"; + # Quote .. as +__ + $stringmap{".."} = "+__"; + # Set dir separator + eval 'use File::Spec; $sep = File::Spec->catfile("", "");'; + $sep ||= '/'; + } + # If $Global::max_file_length: Keep subdirs < $Global::max_file_length + my $self = shift; + my $quote = shift; + my @res = (); + if(not $sep) { init(); } + + for my $rec_ref (@{$self->{'arg_list'}}) { + # If headers are used, sort by them. + # Otherwise keep the order from the command line. + my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); + for my $n (@header_indexes_sorted) { + CORE::push(@res, + $Global::input_source_header{$n}, + $quote ? + ( + grep { $_ ne "\0noarg" } map { + my $s = $_; + # Quote + as ++ + $s =~ s/(.)/$map{$1}/gs; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig() + ) : + ( + grep { $_ ne "\0noarg" } map { + my $s = $_; + # Quote / as +z and + as ++ + $s =~ s/($sep|\+)/$map{$1}/gos; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig() + ) + ); + } + } + return join $sep, map { $stringmap{$_} || $_ } @res; + } +} + +sub header_indexes_sorted($) { + # Sort headers first by number then by name. + # E.g.: 1a 1b 11a 11b + # Returns: + # Indexes of %Global::input_source_header sorted + my $max_col = shift; + + no warnings 'numeric'; + for my $col (1 .. $max_col) { + # Make sure the header is defined. If it is not: use column number + if(not defined $Global::input_source_header{$col}) { + $Global::input_source_header{$col} = $col; + } + } + my @header_indexes_sorted = sort { + # Sort headers numerically then asciibetically + $Global::input_source_header{$a} <=> $Global::input_source_header{$b} + or + $Global::input_source_header{$a} cmp $Global::input_source_header{$b} + } 1 .. $max_col; + return @header_indexes_sorted; +} + +sub len($) { + # Uses: + # @opt::shellquote + # The length of the command line with args substituted + my $self = shift; + my $len = 0; + # Add length of the original command with no args + # Length of command w/ all replacement args removed + $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; + ::debug("length", "noncontext + command: $len\n"); + # MacOS has an overhead of 8 bytes per argument + my $darwin = ($^O eq "darwin") ? 8 : 0; + my $recargs = $self->number_of_recargs(); + if($self->{'context_replace'}) { + # Context is duplicated for each arg + $len += $recargs * $self->{'len'}{'context'}; + for my $replstring (keys %{$self->{'replacecount'}}) { + # If the replacements string is more than once: mulitply its length + $len += $self->{'len'}{$replstring} * + $self->{'replacecount'}{$replstring}; + ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", + $self->{'replacecount'}{$replstring}, "\n"); + } + # echo 11 22 33 44 55 66 77 88 99 1010 + # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 + # 5 + ctxgrp*arg + ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, + " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); + # Add space between context groups + $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); + if($darwin) { + $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin; + } + } else { + # Each replacement string may occur several times + # Add the length for each time + $len += 1*$self->{'len'}{'context'}; + ::debug("length", "context+noncontext + command: $len\n"); + for my $replstring (keys %{$self->{'replacecount'}}) { + # (space between recargs + length of replacement) + # * number this replacement is used + $len += ($recargs -1 + $self->{'len'}{$replstring}) * + $self->{'replacecount'}{$replstring}; + if($darwin) { + $len += ($recargs * $self->{'replacecount'}{$replstring} + * $darwin); + } + } + } + if(defined $Global::parallel_env) { + # If we are using --env, add the prefix for that, too. + $len += length $Global::parallel_env; + } + if($Global::quoting) { + # Pessimistic length if -q is set + # Worse than worst case: ' => "'" + " => '"' + # TODO can we count the number of expanding chars? + # and count them in arguments, too? + $len *= 3; + } + if(@opt::shellquote) { + # Pessimistic length if --shellquote is set + # Worse than worst case: ' => "'" + for(@opt::shellquote) { + $len *= 3; + } + $len *= 5; + } + if(@opt::sshlogin) { + # Pessimistic length if remote + # Worst case is BASE64 encoding 3 bytes -> 4 bytes + $len = int($len*4/3); + } + return $len; +} + +sub replaced($) { + # Uses: + # $Global::quote_replace + # $Global::quoting + # Returns: + # $replaced = command with place holders replaced and prepended + my $self = shift; + if(not defined $self->{'replaced'}) { + # Don't quote arguments if the input is the full command line + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP + $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg; + $self->{'replaced'} = $self-> + replace_placeholders($self->{'command'},$Global::quoting, + $quote_arg); + my $len = length $self->{'replaced'}; + if ($len != $self->len()) { + ::debug("length", $len, " != ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } else { + ::debug("length", $len, " == ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } + } + return $self->{'replaced'}; +} + +sub replace_placeholders($$$$) { + # Replace foo{}bar with fooargbar + # Input: + # $targetref = command as shell words + # $quote = should everything be quoted? + # $quote_arg = should replaced arguments be quoted? + # Uses: + # @Arg::arg = arguments as strings to be use in {= =} + # Returns: + # @target with placeholders replaced + my $self = shift; + my $targetref = shift; + my $quote = shift; + my $quote_arg = shift; + my %replace; + + # Token description: + # \0spc = unquoted space + # \0end = last token element + # \0ign = dummy token to be ignored + # \177<...\177> = replacement expression + # " " = quoted space, that splits -X group + # text = normal text - possibly part of -X group + my $spacer = 0; + my @tokens = grep { length $_ > 0 } map { + if(/^\177<|^ $/) { + # \177<...\177> or space + $_ + } else { + # Split each space/tab into a token + split /(?=\s)|(?<=\s)/ + } + } + # Split \177< ... \177> into own token + map { split /(?=\177<)|(?<=\177>)/ } + # Insert "\0spc" between every element + # This space should never be quoted + map { $spacer++ ? ("\0spc",$_) : $_ } + map { $_ eq "" ? "\0empty" : $_ } + @$targetref; + + if(not @tokens) { + # @tokens is empty: Return empty array + return @tokens; + } + ::debug("replace", "Tokens ".join":",@tokens,"\n"); + # Make it possible to use $arg[2] in {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + # Flat list: + # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] + # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ] + if(not @{$self->{'arg_list_flat'}}) { + @{$self->{'arg_list_flat'}} = Arg->new(""); + } + my $argref = $self->{'arg_list_flat'}; + # Number of arguments - used for positional arguments + my $n = $#$argref+1; + + # $self is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = $self; + # @replaced = tokens with \177< \177> replaced + my @replaced; + if($self->{'context_replace'}) { + my @ctxgroup; + for my $t (@tokens,"\0end") { + # \0end = last token was end of tokens. + if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") { + # Context group complete: Replace in it + if(grep { /^\177} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }sgxe) { + # Token is \177<..\177> + } else { + if($Global::escape_string_present) { + # Command line contains \177: + # Unescape it \177\256 => \177 + $a =~ s/\177\176/\177/g; + } + } + $a + } @ctxgroup; + $normal_replace or last; + $space = "\0spc"; + } + } else { + # Context group has no a replacement string: Copy it once + CORE::push @replaced, map { + if($Global::escape_string_present) { + # Command line contains \177: + # Unescape it \177\176 => \177 + $a =~ s/\177\176/\177/g; + } + $_; + } @ctxgroup; + } + # New context group + @ctxgroup=(); + } + if($t eq "\0spc" or $t eq " ") { + CORE::push @replaced,$t; + } else { + CORE::push @ctxgroup,$t; + } + } + } else { + # @group = @token + # Replace in group + # Push output + # repquote = no if {} first on line, no if $quote, yes otherwise + for my $t (@tokens) { + if($t =~ /^\177} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? + # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }sgxe; + CORE::push @replaced, $space, $a; + $normal_replace or last; + $space = "\0spc"; + } + } else { + # No replacement + CORE::push @replaced, map { + $Global::escape_string_present and s/\177\176/\177/g; $_; + } $t; + } + } + } + *Arg::arg = []; + ::debug("replace","Replaced: ".join":",@replaced,"\n"); + + # Put tokens into groups that may be quoted. + my @quotegroup; + my @quoted; + for (map { $_ eq "\0empty" ? "" : $_ } + grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" } + @replaced, "\0end") { + if($_ eq "\0spc" or $_ eq "\0end") { + # \0spc splits quotable groups + if($quote) { + if(@quotegroup) { + CORE::push @quoted, ::Q(join"",@quotegroup);; + } + } else { + CORE::push @quoted, join"",@quotegroup; + } + @quotegroup = (); + } else { + CORE::push @quotegroup, $_; + } + } + ::debug("replace","Quoted: ".join":",@quoted,"\n"); + return wantarray ? @quoted : "@quoted"; +} + +sub skip($) { + # Skip this job + my $self = shift; + $self->{'skip'} = 1; +} + + +package CommandLineQueue; + +sub new($) { + sub merge_rpl_parts(@) { + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + # Input: + # @in = the @command as given by the user + # Uses: + # $Global::parensleft + # $Global::parensright + # Returns: + # @command with parts merged to keep {= and =} as one + my @in = @_; + my @out; + my $l = quotemeta($Global::parensleft); + my $r = quotemeta($Global::parensright); + + while(@in) { + my $s = shift @in; + $_ = $s; + # Remove matching (right most) parens + while(s/(.*)$l.*?$r/$1/os) {} + if(/$l/o) { + # Missing right parens + while(@in) { + $s .= " ".shift @in; + $_ = $s; + while(s/(.*)$l.*?$r/$1/os) {} + if(not /$l/o) { + last; + } + } + } + push @out, $s; + } + return @out; + } + + sub escape_177($) { + # Escape \177 => \177\176 + my $s = shift; + $Global::escape_string_present += $s =~ s/\177/\177\176/g; + return $s; + } + + sub replace_parens($) { + # Needs to match rightmost left parens (Perl defaults to leftmost) + # to deal with: {={==} and {={==}=} + # Replace {= -> \177< and =} -> \177> + # + # Complex way to do: + # s/{=(.*)=}/\177<$1\177>/g + # which would not work + my $s = shift; + $s =~ s[\Q$Global::parensleft\E # Match {= + # Match . unless the next string is {= or =} + # needed to force matching the shortest {= =} + ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?) + \Q$Global::parensright\E ] # Match =} + {\177<$1\177>}gxs; + # Now {= perlexpr =} => \177< perlexpr \177> + return $s; + } + + sub replace_rpl_def($) { + my $s = shift; + # Replace rpl-definitions with the corresponding perl code + for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) { + # Replace long --rpl's before short ones, as a short may be a + # substring of a long: + # --rpl '% s/a/b/' --rpl '%% s/b/a/' + # + # Replace the shorthand string (--rpl) + # with the {= perl expr =} + # + # Avoid searching for shorthand strings inside existing {= perl expr =} + # + # Replace $$1 in {= perl expr =} with groupings in shorthand string + # + # parallel --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2_REPLACE/g;' \ + # echo {/.gz/.lz} ::: UU.tar.gz + # + # {/.gz/.lz} => + # \177< + # $_pAr_gRp1 = ".gz"; + # $_pAr_gRp2 = ".lz"; + # s/${_pAr_gRp1}/${_pAr_gRp2}_REPLACE/g; + # \177> + # {/.gz/.lz:%8.2s} => + # \177< + # $_pAr_gRp1 = ".gz"; + # $_pAr_gRp2 = ".lz"; + # s/${_pAr_gRp1}/${_pAr_gRp2}_REPLACE/g;; + # $_ = sprintf("%8.2f",$_); + # \177> + # + sub replacer_rpl { + my $rpl = shift; + my $rv = $Global::rpl{$rpl}; + return replacer($rv,@_); + } + sub replacer { + my $rv = shift; + my $unchanged = shift; + my $position = shift; + my $grp_regexp = shift; + my $grp_string = shift; + my $formatstring = shift; + $grp_string =~ /^${grp_regexp}$/ or + ::die_bug("Match failed: '$grp_regexp' on $grp_string"); + # Dummy entry to start $grp[n] at 1. + my @grp = (1); + if($] >= 5.010) { + @grp = (1, @{^CAPTURE}); + } else { + for(my $i = 1; defined $grp[$#grp]; $i++) { + push @grp, eval '$'.$i; + } + } + # replace $$1 with ${_pAr_gRp1}, $$2 with ${_pAr_gRp2} + # in the code to be executed + $rv =~ s/\$\$ (\d+)/\$\{_pAr_gRp$1\}/gx; + # prepend with $_pAr_gRp1 = perlquote($1), + my $set_args = ""; + for(my $i = 1;defined $grp[$i]; $i++) { + $set_args .= "\$_pAr_gRp$i = \"" . + ::perl_quote_scalar($grp[$i]) . "\";"; + } + # :%8.2f => %8.2f + $formatstring =~ s/^://; + my $formatcode = ""; + if(length $formatstring > 0) { + $formatcode = ";\$_ = sprintf('$formatstring',\$_);"; + } + if($position =~ s/\.(\d+)//) { + # {2.3} => There must be a corresponding --match + my $field = $1; + my $re = $opt::match[$position-1]; + if($re=~/^\+(\d+)/) { + # multiple --match: Reuse --match $1 + $re = $opt::match[$1-1]; + } + if(not defined $re) { + ::error("{$position.$field} requires ". + "corresponding --match"); + ::wait_and_exit(255); + } + if($re =~ /\001/) { + ::error("\\001 is not supported in --match"); + ::wait_and_exit(255); + } + $rv = "m\001$re\001 or ". + "::warning(\"'\$_' did not match '$re'\"); ". + "\$_ = \$$field; $rv"; + } + ::debug("rpl","match: $rv ¤ $unchanged ¤ $position ¤ ". + "$set_args ¤ $rv ¤ $grp_regexp ¤ $formatstring\n"); + return($unchanged . "\177<" . $position . $set_args . + $rv . $formatcode. "\177>"); + } + if($rpl =~ /^\{/) { + my ($prefix,$grp_regexp,$postfix) = + # Ignore { and } + $rpl =~ /^ \{ # { + ( [^(]* ) # Prefix (no '{' ) - e.g. %% + ( \(.*\) )? # Group capture regexp - e.g (.*) + ( [^)]* ) # Postfix (no '}' ) - e.g. end + \} $ # } + /xs; + my $format_regexp = ":%.*?"; + q{ + # Regexp using named captures - kept for documentation + # It is easier to understand than the backward compatible version + # Look for: { position prefix group format postfix } + while($s =~ + s{(? (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + \{ + (? -?\d+(?:\.\d+)? \s*)? + \Q$prefix\E + (?$grp_regexp) + \Q$postfix\E + (? $format_regexp) + \} + } + { + replacer_rpl($rpl, $+{unchanged}, + $+{position}, $grp_regexp, + $+{grp}, $+{format}); + }gsex){}; + # Look for: { position prefix group postfix } + while($s =~ + s{(? (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + \{ + (? -?\d+(?:\.\d+)? \s*)? + \Q$prefix\E + (?$grp_regexp) + \Q$postfix\E + \} + } + { + replacer_rpl($rpl, $+{unchanged}, + $+{position}, $grp_regexp, + $+{grp}); + }gsex){} + }; + { + # This a rewrite of the above to perl 5.8 + # (does not use $+{...} which was introduced in 5.010 + # Look for: { position prefix group format postfix } + while($s =~ + s{( (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + \{ + (-?\d+(?:\.\d+)? \s*)? + \Q$prefix\E + ($grp_regexp) + \Q$postfix\E + ($format_regexp) + \} + } + { + replacer_rpl($rpl, $1, $2, $grp_regexp, $3, $+); + }gsex){} + # Look for: { position prefix group postfix } + # (Same above - except "format") + while($s =~ + s{( (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + \{ + (-?\d+(?:\.\d+)? \s*)? + \Q$prefix\E + ($grp_regexp) + \Q$postfix\E + \} + } + { + replacer_rpl($rpl, $1, $2, $grp_regexp, $3); + }gsex){} + } + } else { + my ($prefix,$grp_regexp,$postfix) = + $rpl =~ /^( [^(]* ) # Prefix - e.g. {%% + ( \(.*\) )? # Group capture regexp - e.g (.*) + ( [^)]* )$ # Postfix - e.g } + /xs; + q { + # Regexp using named captures - kept for documentation + # Look for: prefix group postfix + while($s =~ + s{(? (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + (?) + \Q$prefix\E \s* + (?$grp_regexp) + \Q$postfix\E + } + { + replacer_rpl($rpl, $+{unchanged}, $+{position}, + $grp_regexp, $+{grp}); + }gsex){}; + }; + { + # This a rewrite of the above to perl 5.8 + # (does not use $+{...} which was introduced in 5.010 + # Look for: prefix group postfix + while($s =~ + s{( (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + () + \Q$prefix\E \s* + ($grp_regexp) + \Q$postfix\E + } + { + replacer_rpl($rpl, $1, $2, $grp_regexp, $3); + }gsex){}; + } + } + } + # Perl positional replacement fields {=1.2 perlexpr =} + while($s =~ + s{ \177< # {= + (-?\d+(?:\.\d+)\s*) # position + (([^\177]+|\177\177)*) # perl expr + \177> # =} + } + { replacer($2, '', $1,''); }gsex){}; + return $s; + } + + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift || 0; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my @unget = (); + my $posrpl; + my ($replacecount_ref, $len_ref); + my @command = @$commandref; + my $seq = 1; + + # Replace replacement strings with {= perl expr =} + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + @command = merge_rpl_parts(@command); + + # Protect matching inside {= perl expr =} + # by replacing {= and =} with \177< and \177> + # in options that can contain replacement strings: + # @command, --transferfile, --return, + # --tagstring, --workdir, --results + for(@command, @$transfer_files, @$return_files, + @$template_names, @$template_contents, + $opt::tagstring, $opt::workdir, $opt::results, $opt::retries, + @opt::filter) { + # Skip if undefined + defined($_) or next; + # Escape \177 => \177\176 + $_ = escape_177($_); + # {= perl expr =} => \177< perl expr \177> + $_ = replace_parens($_); + # Replace rpl-definitions with the corresponding perl code + $_ = replace_rpl_def($_); + } + + # Add {} if no replacement strings in @command + ($replacecount_ref, $len_ref, @command) = + replacement_counts_and_lengths($transfer_files, $return_files, + $template_names, $template_contents, + @command); + if("@command" =~ /^[^ \t\n=]*\177append()) { + $seq = $Global::sql->max_seq() + 1; + } + + return bless { + ('unget' => \@unget, + 'command' => \@command, + 'replacecount' => $replacecount_ref, + 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), + 'context_replace' => $context_replace, + 'len' => $len_ref, + 'max_number_of_args' => $max_number_of_args, + 'size' => undef, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'template_names' => $template_names, + 'template_contents' => $template_contents, + 'seq' => $seq, + ) + }, ref($class) || $class; +} + + + +sub replacement_counts_and_lengths($$@) { + # Count the number of different replacement strings. + # Find the lengths of context for context groups and non-context + # groups. + # If no {} found in @command: add it to @command + # + # Input: + # \@transfer_files = array of filenames to transfer + # \@return_files = array of filenames to return + # \@template_names = array of names to copy to + # \@template_contents = array of contents to write + # @command = command template + # Output: + # \%replacecount, \%len, @command + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my @command = @_; + my (%replacecount,%len); + my $sum = 0; + while($sum == 0) { + # Count how many times each replacement string is used + my @cmd = @command; + my $contextlen = 0; + my $noncontextlen = 0; + my $contextgroups = 0; + for my $c (@cmd) { + while($c =~ s/ \177<( (?: [^\177]*|[\177][^<>] )*?)\177> /\000/xs) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "s/a/b/" => 2 } + $replacecount{$1}++; + $sum++; + } + # Measure the length of the context around the {= perl expr =} + # Use that {=...=} has been replaced with \000 above + # So there is no need to deal with \177< + while($c =~ s/ (\S*\000\S*) //xs) { + my $w = $1; + $w =~ tr/\000//d; # Remove all \000's + $contextlen += length($w); + $contextgroups++; + } + # All {= perl expr =} have been removed: The rest is non-context + $noncontextlen += length $c; + } + for(@$transfer_files, @$return_files, + @$template_names, @$template_contents, + @opt::filter, + $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) { + # Options that can contain replacement strings + defined($_) or next; + my $t = $_; + while($t =~ s/ \177<( (?: [^\177]*|[\177][^<>] )* )\177> //xs) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::bar) { + # If the command does not contain {} force it to be computed + # as it is being used by --bar + $replacecount{""} ||= 1; + } + + $len{'context'} = 0+$contextlen; + $len{'noncontext'} = $noncontextlen; + $len{'contextgroups'} = $contextgroups; + $len{'noncontextgroups'} = @cmd-$contextgroups; + ::debug("length", "@command Context: ", $len{'context'}, + " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, + " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); + if($sum == 0) { + if(not @command) { + # Default command = {} + @command = ("\177<\177>"); + } elsif(($opt::pipe or $opt::pipepart) + and not $opt::fifo and not $opt::cat) { + # With --pipe / --pipe-part you can have no replacement + last; + } else { + # Append {} to the command if there are no {...}'s and no {=...=} + push @command, ("\177<\177>"); + } + } + } + return(\%replacecount,\%len,@command); +} + +sub get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + my $cmd_line = shift @{$self->{'unget'}}; + return ($cmd_line); + } else { + if($opt::sqlworker) { + # Get the sequence number from the SQL table + $self->set_seq($SQL::next_seq); + # Get the command from the SQL table + $self->{'command'} = $SQL::command_ref; + my @command; + # Recompute replace counts based on the read command + ($self->{'replacecount'}, + $self->{'len'}, @command) = + replacement_counts_and_lengths($self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'template_name'}, + $self->{'template_contents'}, + @$SQL::command_ref); + if("@command" =~ /^[^ \t\n=]*\177new($self->seq(), + $self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}, + $self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'template_names'}, + $self->{'template_contents'}, + $self->{'replacecount'}, + $self->{'len'}, + ); + $cmd_line->populate(); + ::debug("run","cmd_line->number_of_args ", + $cmd_line->number_of_args(), "\n"); + if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) { + if($cmd_line->replaced() eq "") { + # Empty command - pipe requires a command + ::error("--pipe/--pipepart must have a command to pipe into ". + "(e.g. 'cat')."); + ::wait_and_exit(255); + } + } elsif($cmd_line->number_of_args() == 0) { + # We did not get more args - maybe at EOF string? + return undef; + } + $self->set_seq($self->seq()+1); + return $cmd_line; + } +} + +sub unget($) { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_queue'}->empty(); + ::debug("run", "CommandLineQueue->empty $empty"); + return $empty; +} + +sub seq($) { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq($$) { + my $self = shift; + $self->{'seq'} = shift; +} + +sub quote_args($) { + my $self = shift; + # If there is not command emulate |bash + return $self->{'command'}; +} + + +package Limits::Command; + +# Maximal command line length (for -m and -X) +sub max_length($) { + # Find the max_length of a command line and cache it + # Returns: + # number of chars on the longest command line allowed + if(not $Limits::Command::line_max_len) { + # Disk cache of max command line length + my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() . + "/linelen"; + my $cached_limit; + local $/ = undef; + if(open(my $fh, "<", $len_cache)) { + $cached_limit = <$fh>; + $cached_limit || ::warning("Invalid content in $len_cache"); + close $fh; + } + if(not $cached_limit) { + $cached_limit = real_max_length(); + # If $HOME is write protected: Do not fail + my $dir = ::dirname($len_cache); + -d $dir or eval { File::Path::mkpath($dir); }; + open(my $fh, ">", $len_cache.$$); + print $fh $cached_limit; + close $fh; + rename $len_cache.$$, $len_cache || ::die_bug("rename cache file"); + } + $Limits::Command::line_max_len = tmux_length($cached_limit); + } + return int($Limits::Command::line_max_len); +} + +sub real_max_length() { + # Find the max_length of a command line + # Returns: + # The maximal command line length with 1 byte arguments + # return find_max(" c"); + return find_max("c"); +} + +sub find_max($) { + my $string = shift; + # This is slow on Cygwin, so give Cygwin users a warning + if($^O eq "cygwin" or $^O eq "msys") { + ::warning("Finding the maximal command line length. ". + "This may take up to 1 minute.") + } + # Use an upper bound of 100 MB if the shell allows for infinite + # long lengths + my $upper = 100_000_000; + my $lower; + # 1000 is supported everywhere, so the search can start anywhere 1..999 + # 324 makes the search much faster on Cygwin, so let us use that + my $len = 324; + do { + if($len > $upper) { return $len }; + $lower = $len; + $len *= 4; + ::debug("init", "Maxlen: $lower<$len<$upper(".($upper-$lower)."): "); + } while (is_acceptable_command_line_length($len,$string)); + # Then search for the actual max length between + # last successful length ($len/16) and upper bound + return binary_find_max(int($len/16),$len,$string); +} + + +# Prototype forwarding +sub binary_find_max($$$); +sub binary_find_max($$$) { + # Given a lower and upper bound find the max (length or args) of a + # command line + # Returns: + # number of chars on the longest command line allowed + my ($lower, $upper, $string) = (@_); + if($lower == $upper + or $lower == $upper-1 + or $lower/$upper > 0.99) { + # $lower is +- 1 or within 1%: Don't search more + return $lower; + } + # Unevenly split binary search which is faster for Microsoft Windows. + # Guessing too high is cheap. Guessing too low is expensive. + my $split = ($^O eq "cygwin" or $^O eq "msys") ? 0.93 : 0.5; + my $middle = int (($upper-$lower)*$split + $lower); + ::debug("init", "Maxlen: $lower<$middle<$upper(".($upper-$lower)."): "); + if (is_acceptable_command_line_length($middle,$string)) { + return binary_find_max($middle,$upper,$string); + } else { + return binary_find_max($lower,$middle,$string); + } +} + +{ + my $prg; + + sub is_acceptable_command_line_length($$) { + # Test if a command line of this length can run + # in the current environment + # If the string is " x" it tests how many args are allowed + # Returns: + # 0 if the command line length is too long + # 1 otherwise + my $len = shift; + my $string = shift; + if($Global::parallel_env) { + $len += length $Global::parallel_env; + } + # Force using non-built-in command + $prg ||= ::which("echo"); + my $l = length ::qqx("$prg ".${string}x(($len-1-length $prg)/length $string)); + if($l < $len/2) { + # The command returned OK, but did not output $len chars + # => this failed (Centos3 does this craziness) + return 0 + } + ::debug("init", "$len=$?\n"); + return not $?; + } +} + +sub tmux_length($) { + # If $opt::tmux set, find the limit for tmux + # tmux 1.8 has a 2kB limit + # tmux 1.9 has a 16kB limit + # tmux 2.0 has a 16kB limit + # tmux 2.1 has a 16kB limit + # tmux 2.2 has a 16kB limit + # Input: + # $len = maximal command line length + # Returns: + # $tmux_len = maximal length runable in tmux + local $/ = "\n"; + my $len = shift; + if($opt::tmux) { + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not ::which($ENV{'PARALLEL_TMUX'})) { + ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH."); + ::wait_and_exit(255); + } + my @out; + for my $l (1, 2020, 16320, 30000, $len) { + my $tmpfile = ::tmpname("tms"); + my $qtmp = ::Q($tmpfile); + my $tmuxcmd = $ENV{'PARALLEL_TMUX'}. + " -S $qtmp new-session -d -n echo $l". + ("t"x$l). " && echo $l; rm -f $qtmp"; + push @out, ::qqx($tmuxcmd); + ::rm($tmpfile); + } + ::debug("tmux","tmux-out ",@out); + chomp @out; + # The arguments is given 3 times on the command line + # and the tmux wrapping is around 30 chars + # (29 for tmux1.9, 33 for tmux1.8) + my $tmux_len = ::max(@out); + $len = ::min($len,int($tmux_len/4-33)); + ::debug("tmux","tmux-length ",$len); + } + return $len; +} + + +package RecordQueue; + +sub new($) { + my $class = shift; + my $fhs = shift; + my $colsep = shift; + my @unget = (); + my $arg_sub_queue; + if($opt::sqlworker) { + # Open SQL table + $arg_sub_queue = SQLRecordQueue->new(); + } elsif(defined $colsep) { + # Open one file with colsep or CSV + $arg_sub_queue = RecordColQueue->new($fhs); + } else { + # Open one or more files if multiple -a + $arg_sub_queue = MultifileQueue->new($fhs); + } + return bless { + 'unget' => \@unget, + 'arg_number' => 0, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + $self->{'arg_number'}++; + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + my $ret = shift @{$self->{'unget'}}; + if($ret) { + map { $_->flush_cache() } @$ret; + } + return $ret; + } + my $ret = $self->{'arg_sub_queue'}->get(); + if($ret) { + if(grep { index($_->orig(),"\0") > 0 } @$ret) { + # Allow for \0 in position 0 because GNU Parallel uses "\0noarg" + # to mean no-string + ::warning("A NUL character in the input was replaced with \\0.", + "NUL cannot be passed through in the argument list.", + "Did you mean to use the --null option?"); + for(grep { index($_->orig(),"\0") > 0 } @$ret) { + # Replace \0 with \\0 + my $a = $_->orig(); + $a =~ s/\0/\\0/g; + $_->set_orig($a); + } + } + if(defined $Global::max_number_of_args + and $Global::max_number_of_args == 0) { + ::debug("run", "Read 1 but return 0 args\n"); + # \0noarg => nothing (not the empty string) + map { $_->set_orig("\0noarg"); } @$ret; + } + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + map { $_->flush_cache() } @$ret; + } + return $ret; +} + +sub unget($) { + my $self = shift; + ::debug("run", "RecordQueue-unget\n"); + $self->{'arg_number'} -= @_; + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } + $self->{'arg_sub_queue'}->flush_cache(); +} + +sub arg_number($) { + my $self = shift; + return $self->{'arg_number'}; +} + + +package RecordColQueue; + +sub new($) { + my $class = shift; + my $fhs = shift; + my @unget = (); + my $arg_sub_queue = MultifileQueue->new($fhs); + return bless { + 'unget' => \@unget, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + if($self->{'arg_sub_queue'}->empty()) { + return undef; + } + my $in_record = $self->{'arg_sub_queue'}->get(); + if(defined $in_record) { + my @out_record = (); + for my $arg (@$in_record) { + ::debug("run", "RecordColQueue::arg $arg\n"); + my $line = $arg->orig(); + ::debug("run", "line='$line'\n"); + if($line ne "") { + if($opt::csv) { + # Parse CSV and put it into a record + chomp $line; + if(not $Global::csv->parse($line)) { + die "CSV has unexpected format: ^$line^"; + } + for($Global::csv->fields()) { + push @out_record, Arg->new($_); + } + } else { + # Split --colsep into record + for my $s (split /$opt::colsep/o, $line, -1) { + push @out_record, Arg->new($s); + } + } + } else { + push @out_record, Arg->new(""); + } + } + return \@out_record; + } else { + return undef; + } +} + +sub unget($) { + my $self = shift; + ::debug("run", "RecordColQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordColQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $arg (@{$self->{'unget'}}) { + $arg->flush_cache(); + } + $self->{'arg_sub_queue'}->flush_cache(); +} + + +package SQLRecordQueue; + +sub new($) { + my $class = shift; + my @unget = (); + return bless { + 'unget' => \@unget, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + return $Global::sql->get_record(); +} + +sub unget($) { + my $self = shift; + ::debug("run", "SQLRecordQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + if(@{$self->{'unget'}}) { return 0; } + my $get = $self->get(); + if(defined $get) { + $self->unget($get); + } + my $empty = not $get; + ::debug("run", "SQLRecordQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } +} + + +package MultifileQueue; + +@Global::unget_argv=(); + +sub new($$) { + my $class = shift; + my $fhs = shift; + for my $fh (@$fhs) { + if(-t $fh and -t ($Global::status_fd || *STDERR)) { + ::warning( + "Input is read from the terminal. You are either an expert", + "(in which case: YOU ARE AWESOME!) or maybe you forgot", + "::: or :::: or -a or to pipe data into parallel. If so", + "consider going through the tutorial: man parallel_tutorial", + "Press CTRL-D to exit."); + } + } + return bless { + 'unget' => \@Global::unget_argv, + 'fhs' => $fhs, + 'arg_matrix' => undef, + }, ref($class) || $class; +} + +sub get($) { + my $self = shift; + if($opt::link) { + return $self->link_get(); + } else { + return $self->nest_get(); + } +} + +sub unget($) { + my $self = shift; + ::debug("run", "MultifileQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @Global::unget_argv) && + not @{$self->{'unget'}}; + for my $fh (@{$self->{'fhs'}}) { + $empty &&= eof($fh); + } + ::debug("run", "MultifileQueue->empty $empty "); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}, @{$self->{'arg_matrix'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } +} + +sub link_get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + for my $i (0..$#{$self->{'fhs'}}) { + my $fh = $self->{'fhs'}[$i]; + my $arg = read_arg_from_fh($fh); + if(defined $arg) { + # Record $arg for recycling at end of file + push @{$self->{'arg_matrix'}[$i]}, $arg; + push @record, $arg; + $empty = 0; + } else { + ::debug("run", "EOA "); + # End of file: Recycle arguments + push @{$self->{'arg_matrix'}[$i]}, shift @{$self->{'arg_matrix'}[$i]}; + # return last @{$args->{'args'}{$fh}}; + push @record, @{$self->{'arg_matrix'}[$i]}[-1]; + } + } + if($empty) { + return undef; + } else { + return \@record; + } +} + +sub nest_get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + my $no_of_inputsources = $#{$self->{'fhs'}} + 1; + if(not $self->{'arg_matrix'}) { + # Initialize @arg_matrix with one arg from each file + # read one line from each file + my @first_arg_set; + my $all_empty = 1; + for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + if(defined $arg) { + $all_empty = 0; + } + $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); + push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; + } + if($all_empty) { + # All filehandles were at eof or eof-string + return undef; + } + return [@first_arg_set]; + } + + # Treat the case with one input source special. For multiple + # input sources we need to remember all previously read values to + # generate all combinations. But for one input source we can + # forget the value after first use. + if($no_of_inputsources == 1) { + my $arg = read_arg_from_fh($self->{'fhs'}[0]); + if(defined($arg)) { + return [$arg]; + } + return undef; + } + for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { + if(eof($self->{'fhs'}[$fhno])) { + next; + } else { + # read one + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + defined($arg) || next; # If we just read an EOF string: Treat this as EOF + my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; + $self->{'arg_matrix'}[$fhno][$len] = $arg; + # make all new combinations + my @combarg = (); + for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { + push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}], + # Is input source --link'ed to the next? + $opt::linkinputsource[$fhn+1]); + } + # Find only combinations with this new entry + $combarg[2*$fhno] = [$len,$len]; + # map combinations + # [ 1, 3, 7 ], [ 2, 4, 1 ] + # => + # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ] + my @mapped; + for my $c (expand_combinations(@combarg)) { + my @a; + for my $n (0 .. $no_of_inputsources - 1 ) { + push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; + } + push @mapped, \@a; + } + # append the mapped to the ungotten arguments + push @{$self->{'unget'}}, @mapped; + # get the first + if(@mapped) { + return shift @{$self->{'unget'}}; + } + } + } + # all are eof or at EOF string; return from the unget queue + return shift @{$self->{'unget'}}; +} + +{ + my $cr_count = 0; + my $nl_count = 0; + my $dos_crnl_determined; + sub read_arg_from_fh($) { + # Read one Arg from filehandle + # Returns: + # Arg-object with one read line + # undef if end of file + my $fh = shift; + my $prepend; + my $arg; + my $half_record = 0; + do {{ + # This makes 10% faster + if(not defined ($arg = <$fh>)) { + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(not $dos_crnl_determined and not defined $opt::d) { + # Warn if input has CR-NL and -d is not set + if($arg =~ /\r$/) { + $cr_count++; + } else { + $nl_count++; + } + if($cr_count == 3 or $nl_count == 3) { + $dos_crnl_determined = 1; + if($nl_count == 0 and $cr_count == 3) { + ::warning('The first three values end in CR-NL. '. + 'Consider using -d "\r\n"'); + } + } + } + if($opt::csv) { + # We need to read a full CSV line. + if(($arg =~ y/"/"/) % 2 ) { + # The number of " on the line is uneven: + # If we were in a half_record => we have a full record now + # If we were outside a half_record => + # we are in a half record now + $half_record = not $half_record; + } + if($half_record) { + # CSV half-record with quoting: + # col1,"col2 2""x3"" board newline <-this one + # cont",col3 + $prepend .= $arg; + redo; + } else { + # Now we have a full CSV record + } + } + # Remove delimiter + chomp $arg; + if($Global::end_of_file_string and + $arg eq $Global::end_of_file_string) { + # Ignore the rest of input file + close $fh; + ::debug("run", "EOF-string ($arg) met\n"); + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(defined $prepend) { + $arg = $prepend.$arg; # For line continuation + undef $prepend; + } + if($Global::ignore_empty) { + if($arg =~ /^\s*$/) { + redo; # Try the next line + } + } + if($Global::max_lines) { + if($arg =~ /\s$/) { + # Trailing space => continued on next line + $prepend = $arg; + redo; + } + } + }} while (1 == 0); # Dummy loop {{}} for redo + if(defined $arg) { + return Arg->new($arg); + } else { + ::die_bug("multiread arg undefined"); + } + } +} + +# Prototype forwarding +sub expand_combinations(@); +sub expand_combinations(@) { + # Input: + # ([xmin,xmax], [ymin,ymax], ...) + # Returns: ([x,y,...],[x,y,...]) + # where xmin <= x <= xmax and ymin <= y <= ymax + my $minmax_ref = shift; + my $link = shift; # This is linked to the next input source + my $xmin = $$minmax_ref[0]; + my $xmax = $$minmax_ref[1]; + my @p; + if(@_) { + my @rest = expand_combinations(@_); + if($link) { + # Linked to next col with --link/:::+/::::+ + # TODO BUG does not wrap values if not same number of vals + push(@p, map { [$$_[0], @$_] } + grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest); + } else { + # If there are more columns: Compute those recursively + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, map { [$x, @$_] } @rest; + } + } + } else { + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, [$x]; + } + } + return @p; +} + + +package Arg; + +sub new($) { + my $class = shift; + my $orig = shift; + my @hostgroups; + if($opt::hostgroups) { + if($orig =~ s:@(.+)::) { + # We found hostgroups on the arg + @hostgroups = split(/\+|,/, $1); + if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { + # This hostgroup is not defined using -S + # Add it + ::warning("Adding hostgroups: @hostgroups"); + # Add sshlogin + for(grep { not defined $Global::hostgroups{$_} } @hostgroups) { + my $sshlogin = SSHLogin->new($_); + my $sshlogin_string = $sshlogin->string(); + $Global::host{$sshlogin_string} = $sshlogin; + $Global::hostgroups{$sshlogin_string} = 1; + } + } + } else { + # No hostgroup on the arg => any hostgroup + @hostgroups = (keys %Global::hostgroups); + } + } + return bless { + 'orig' => $orig, + 'hostgroups' => \@hostgroups, + }, ref($class) || $class; +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = ::Q($_[0]); + no warnings 'redefine'; + *Q = \&::Q; + return $ret; +} + +sub pQ($) { + # pQ alias for ::perl_quote_scalar + my $ret = ::pQ($_[0]); + no warnings 'redefine'; + *pQ = \&::pQ; + return $ret; +} + +sub hash($) { + $Global::use{"DBI"} ||= eval "use B; 1;"; + B::hash(@_); +} + +sub total_jobs() { + return $Global::JobQueue->total_jobs(); +} + +{ + my %perleval; + my $job; + sub skip() { + # shorthand for $job->skip(); + $job->skip(); + } + sub slot() { + # shorthand for $job->slot(); + $job->slot(); + } + sub seq() { + # shorthand for $job->seq(); + $job->seq(); + } + sub uq() { + # Do not quote this arg + $Global::unquote_arg = 1; + } + sub yyyy_mm_dd_hh_mm_ss(@) { + # ISO8601 2038-01-19T03:14:08 + ::strftime("%Y-%m-%dT%H:%M:%S", localtime(shift || time())); + } + sub yyyy_mm_dd_hh_mm(@) { + # ISO8601 2038-01-19T03:14 + ::strftime("%Y-%m-%dT%H:%M", localtime(shift || time())); + } + sub yyyy_mm_dd(@) { + # ISO8601 2038-01-19 + ::strftime("%Y-%m-%d", localtime(shift || time())); + } + sub hh_mm_ss(@) { + # ISO8601 03:14:08 + ::strftime("%H:%M:%S", localtime(shift || time())); + } + sub hh_mm(@) { + # ISO8601 03:14 + ::strftime("%H:%M", localtime(shift || time())); + } + sub yyyymmddhhmmss(@) { + # ISO8601 20380119 + ISO8601 031408 + ::strftime("%Y%m%d%H%M%S", localtime(shift || time())); + } + sub yyyymmddhhmm(@) { + # ISO8601 20380119 + ISO8601 0314 + ::strftime("%Y%m%d%H%M", localtime(shift || time())); + } + sub yyyymmdd(@) { + # ISO8601 20380119 + ::strftime("%Y%m%d", localtime(shift || time())); + } + sub hhmmss(@) { + # ISO8601 031408 + ::strftime("%H%M%S", localtime(shift || time())); + } + sub hhmm(@) { + # ISO8601 0314 + ::strftime("%H%M", localtime(shift || time())); + } + + sub replace($$$$) { + # Calculates the corresponding value for a given perl expression + # Returns: + # The calculated string (quoted if asked for) + my $self = shift; + my $perlexpr = shift; # E.g. $_=$_ or s/.gz// + my $quote = shift; # should the string be quoted? + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + $job = shift; + # Positional replace treated as normal replace + $perlexpr =~ s/^(-?\d+)? *//; + if(not $Global::cache_replacement_eval + or + not $self->{'cache'}{$perlexpr}) { + # Only compute the value once + # Use $_ as the variable to change + local $_; + if($Global::trim eq "n") { + $_ = $self->{'orig'}; + } else { + # Trim the input + $_ = trim_of($self->{'orig'}); + } + ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); + if(not $perleval{$perlexpr}) { + # Make an anonymous function of the $perlexpr + # And more importantly: Compile it only once + if($perleval{$perlexpr} = + eval('sub { no strict; no warnings; my $job = shift; '. + $perlexpr.' }')) { + # All is good + } else { + # The eval failed. Maybe $perlexpr is invalid perl? + ::error("Cannot use $perlexpr: $@"); + ::wait_and_exit(255); + } + } + # Execute the function + $perleval{$perlexpr}->($job); + $self->{'cache'}{$perlexpr} = $_; + if($Global::unquote_arg) { + # uq() was called in perlexpr + $self->{'cache'}{'unquote'}{$perlexpr} = 1; + # Reset for next perlexpr + $Global::unquote_arg = 0; + } + } + # Return the value quoted if needed + if($self->{'cache'}{'unquote'}{$perlexpr}) { + return($self->{'cache'}{$perlexpr}); + } else { + return($quote ? Q($self->{'cache'}{$perlexpr}) + : $self->{'cache'}{$perlexpr}); + } + } +} + +sub flush_cache($) { + # Flush cache of computed values + my $self = shift; + $self->{'cache'} = undef; +} + +sub orig($) { + my $self = shift; + return $self->{'orig'}; +} + +sub set_orig($$) { + my $self = shift; + $self->{'orig'} = shift; +} + +sub trim_of($) { + # Removes white space as specifed by --trim: + # n = nothing + # l = start + # r = end + # lr|rl = both + # Returns: + # string with white space removed as needed + my @strings = map { defined $_ ? $_ : "" } (@_); + my $arg; + if($Global::trim eq "n") { + # skip + } elsif($Global::trim eq "l") { + for my $arg (@strings) { $arg =~ s/^\s+//; } + } elsif($Global::trim eq "r") { + for my $arg (@strings) { $arg =~ s/\s+$//; } + } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { + for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } + } else { + ::error("--trim must be one of: r l rl lr."); + ::wait_and_exit(255); + } + return wantarray ? @strings : "@strings"; +} + + +package TimeoutQueue; + +sub new($) { + my $class = shift; + my $delta_time = shift; + my ($pct); + if($delta_time =~ /(\d+(\.\d+)?)%/) { + # Timeout in percent + $pct = $1/100; + $delta_time = 1_000_000; + } + $delta_time = ::multiply_time_units($delta_time); + + return bless { + 'queue' => [], + 'delta_time' => $delta_time, + 'pct' => $pct, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, + }, ref($class) || $class; +} + +sub delta_time($) { + my $self = shift; + return $self->{'delta_time'}; +} + +sub set_delta_time($$) { + my $self = shift; + $self->{'delta_time'} = shift; +} + +sub remedian($) { + my $self = shift; + return $self->{'remedian'}; +} + +sub set_remedian($$) { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; +} + +sub update_median_runtime($) { + # Update delta_time based on runtime of finished job if timeout is + # a percentage + my $self = shift; + my $runtime = shift; + if($self->{'pct'}) { + $self->set_remedian($runtime); + $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); + ::debug("run", "Timeout: $self->{'delta_time'}s "); + } +} + +sub process_timeouts($) { + # Check if there was a timeout + my $self = shift; + # $self->{'queue'} is sorted by start time + while (@{$self->{'queue'}}) { + my $job = $self->{'queue'}[0]; + if($job->endtime()) { + # Job already finished. No need to timeout the job + # This could be because of --keep-order + shift @{$self->{'queue'}}; + } elsif($job->is_timedout($self->{'delta_time'})) { + # Need to shift off queue before kill + # because kill calls usleep that calls process_timeouts + shift @{$self->{'queue'}}; + ::warning("This job was killed because it timed out:", + $job->replaced()); + $job->kill(); + } else { + # Because they are sorted by start time the rest are later + last; + } + } +} + +sub insert($) { + my $self = shift; + my $in = shift; + push @{$self->{'queue'}}, $in; +} + + +package SQL; + +sub new($) { + my $class = shift; + my $dburl = shift; + $Global::use{"DBI"} ||= eval "use DBI; 1;"; + # +DBURL = append to this DBURL + my $append = $dburl=~s/^\+//; + my %options = parse_dburl(get_alias($dburl)); + my %driveralias = ("sqlite" => "SQLite", + "sqlite3" => "SQLite", + "pg" => "Pg", + "postgres" => "Pg", + "postgresql" => "Pg", + "csv" => "CSV", + "oracle" => "Oracle", + "ora" => "Oracle"); + my $driver = $driveralias{$options{'databasedriver'}} || + $options{'databasedriver'}; + my $database = $options{'database'}; + my $host = $options{'host'} ? ";host=".$options{'host'} : ""; + my $port = $options{'port'} ? ";port=".$options{'port'} : ""; + my $dsn = "DBI:$driver:dbname=$database$host$port"; + my $userid = $options{'user'}; + my $password = $options{'password'};; + if(not grep /$driver/, DBI->available_drivers) { + ::error("$driver not supported. Are you missing a perl DBD::$driver module?"); + ::wait_and_exit(255); + } + my $dbh; + if($driver eq "CSV") { + # CSV does not use normal dsn + if(-d $database) { + $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", }) + or die $DBI::errstr; + } else { + ::error("$database is not a directory."); + ::wait_and_exit(255); + } + } else { + $dbh = DBI->connect($dsn, $userid, $password, + { RaiseError => 1, AutoInactiveDestroy => 1 }) + or die $DBI::errstr; + } + $dbh->{'PrintWarn'} = $Global::debug || 0; + $dbh->{'PrintError'} = $Global::debug || 0; + $dbh->{'RaiseError'} = 1; + $dbh->{'ShowErrorStatement'} = 1; + $dbh->{'HandleError'} = sub {}; + if(not defined $options{'table'}) { + ::error("The DBURL ($dburl) must contain a table."); + ::wait_and_exit(255); + } + if($options{'table'} =~ m:/:) { + ::error("The table name ($options{'table'}) cannot contain /"); + ::wait_and_exit(255); + } + + return bless { + 'dbh' => $dbh, + 'driver' => $driver, + 'max_number_of_args' => undef, + 'table' => $options{'table'}, + 'append' => $append, + }, ref($class) || $class; +} + +# Prototype forwarding +sub get_alias($); +sub get_alias($) { + my $alias = shift; + $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql: + if ($alias !~ /^:/) { + return $alias; + } + + # Find the alias + my $path; + if (-l $0) { + ($path) = readlink($0) =~ m|^(.*)/|; + } else { + ($path) = $0 =~ m|^(.*)/|; + } + + my @deprecated = ("$ENV{HOME}/.dburl.aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for (@deprecated) { + if(-r $_) { + ::warning("$_ is deprecated. ". + "Use .sql/aliases instead (read man sql)."); + } + } + my @urlalias=(); + check_permissions("$ENV{HOME}/.sql/aliases"); + check_permissions("$ENV{HOME}/.dburl.aliases"); + my @search = ("$ENV{HOME}/.sql/aliases", + "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for my $alias_file (@search) { + # local $/ needed if -0 set + local $/ = "\n"; + if(-r $alias_file) { + my $in = ::open_or_exit("<",$alias_file); + push @urlalias, <$in>; + close $in; + } + } + my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/; + # If we saw this before: we have an alias loop + if(grep {$_ eq $alias_part } @Private::seen_aliases) { + ::error("$alias_part is a cyclic alias."); + exit -1; + } else { + push @Private::seen_aliases, $alias_part; + } + + my $dburl; + for (@urlalias) { + /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; } + } + + if($dburl) { + return get_alias($dburl.$rest); + } else { + ::error("$alias is not defined in @search"); + exit(-1); + } +} + +sub check_permissions($) { + my $file = shift; + + if(-e $file) { + if(not -o $file) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be owned by $username: ". + "chown $username $file"); + } + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) = stat($file); + if($mode & 077) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be only be readable by $username: ". + "chmod 600 $file"); + } + } +} + +sub parse_dburl($) { + sub undef_if_empty { + if(defined($_[0]) and $_[0] eq "") { + return undef; + } + return $_[0]; + } + my $url = shift; + my %options = (); + # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]] + + if($url=~m!^(?:sql:)? # You can prefix with 'sql:' + ((?:oracle|ora|mysql|pg|postgres|postgresql|influx|influxdb)(?:s|ssl|)| + (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1) + (?: + ([^:@/][^:@]*|) # Username ($2) + (?: + :([^@]*) # Password ($3) + )? + @)? + ([^:/]*)? # Hostname ($4) + (?: + : + ([^/]*)? # Port ($5) + )? + (?: + / + ([^?]*)? # Database ($6) + )? + (?: + / + ([^?/]*)? # Table ($7) + ) + (?: + \? + (.*)? # Query ($8) + )? + $!ix) { + $options{databasedriver} = undef_if_empty(lc(uri_unescape($1))); + $options{user} = undef_if_empty(uri_unescape($2)); + $options{password} = undef_if_empty(uri_unescape($3)); + $options{host} = undef_if_empty(uri_unescape($4)); + $options{port} = undef_if_empty(uri_unescape($5)); + $options{database} = undef_if_empty(uri_unescape($6)) + || $options{user} || $ENV{'USER'}; + $options{table} = undef_if_empty(uri_unescape($7)) + || $options{user} || $ENV{'USER'}; + $options{query} = undef_if_empty(uri_unescape($8)); + ::debug("sql", "dburl $url\n"); + ::debug("sql", "databasedriver ", $options{databasedriver}, + " user ", $options{user}, + " password ", $options{password}, " host ", $options{host}, + " port ", $options{port}, " database ", $options{database}, + " table ", $options{table}, " query ", $options{query}, "\n"); + } else { + ::error("$url is not a valid DBURL"); + exit 255; + } + return %options; +} + +sub uri_unescape($) { + # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm + # to avoid depending on URI::Escape + # This section is (C) Gisle Aas. + # Note from RFC1630: "Sequences which start with a percent sign + # but are not followed by two hexadecimal characters are reserved + # for future extension" + my $str = shift; + if (@_ && wantarray) { + # not executed for the common case of a single argument + my @str = ($str, @_); # need to copy + foreach (@str) { + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + } + return @str; + } + $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; + $str; +} + +sub run($) { + my $self = shift; + my $stmt = shift; + if($self->{'driver'} eq "CSV") { + $stmt=~ s/;$//; + if($stmt eq "BEGIN" or + $stmt eq "COMMIT") { + return undef; + } + } + my @retval; + my $dbh = $self->{'dbh'}; + ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n"); + # Execute with the rest of the args - if any + my $rv; + my $sth; + my $lockretry = 0; + while($lockretry < 10) { + $sth = $dbh->prepare($stmt); + if($sth + and + eval { $rv = $sth->execute(@_) }) { + last; + } else { + if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/ + or + $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) { + # This is fine: + # It is just a worker that reported back too late - + # another worker had finished the job first + # and the table was then dropped + $rv = $sth = 0; + last; + } + if($DBI::errstr =~ /locked/) { + ::debug("sql", "Lock retry: $lockretry"); + $lockretry++; + ::usleep(rand()*300); + } elsif(not $sth) { + # Try again + $lockretry++; + } else { + ::error($DBI::errstr); + ::wait_and_exit(255); + } + } + } + if($lockretry >= 10) { + ::die_bug("retry > 10: $DBI::errstr"); + } + if($rv < 0 and $DBI::errstr){ + ::error($DBI::errstr); + ::wait_and_exit(255); + } + return $sth; +} + +sub get($) { + my $self = shift; + my $sth = $self->run(@_); + my @retval; + # If $sth = 0 it means the table was dropped by another process + while($sth) { + my @row = $sth->fetchrow_array(); + @row or last; + push @retval, \@row; + } + return \@retval; +} + +sub table($) { + my $self = shift; + return $self->{'table'}; +} + +sub append($) { + my $self = shift; + return $self->{'append'}; +} + +sub update($) { + my $self = shift; + my $stmt = shift; + my $table = $self->table(); + $self->run("UPDATE $table $stmt",@_); +} + +sub output($) { + my $self = shift; + my $commandline = shift; + + $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ". + $commandline->seq(), + join("",@{$commandline->{'output'}{1}}), + join("",@{$commandline->{'output'}{2}})); +} + +sub max_number_of_args($) { + # Maximal number of args for this table + my $self = shift; + if(not $self->{'max_number_of_args'}) { + # Read the number of args from the SQL table + my $table = $self->table(); + my $v = $self->get("SELECT * FROM $table LIMIT 1;"); + my @reserved_columns = qw(Seq Host Starttime JobRuntime Send + Receive Exitval _Signal Command Stdout Stderr); + if(not $v) { + ::error("$table contains no records"); + } + # Count the number of Vx columns + $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns; + } + return $self->{'max_number_of_args'}; +} + +sub set_max_number_of_args($$) { + my $self = shift; + $self->{'max_number_of_args'} = shift; +} + +sub create_table($) { + my $self = shift; + if($self->append()) { return; } + my $max_number_of_args = shift; + $self->set_max_number_of_args($max_number_of_args); + my $table = $self->table(); + $self->run(qq(DROP TABLE IF EXISTS $table;)); + # BIGINT and TEXT are not supported in these databases or are too small + my %vartype = ( + "Oracle" => { "BIGINT" => "NUMBER(19,0)", + "TEXT" => "CLOB", }, + "mysql" => { "TEXT" => "BLOB", }, + "CSV" => { "BIGINT" => "INT", + "FLOAT" => "REAL", }, + ); + my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT"; + my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT"; + my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)"; + my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args()); + $self->run(qq{CREATE TABLE $table + (Seq $BIGINT, + Host $TEXT, + Starttime $FLOAT, + JobRuntime $FLOAT, + Send $BIGINT, + Receive $BIGINT, + Exitval $BIGINT, + _Signal $BIGINT, + Command $TEXT,}. + $v_def. + qq{Stdout $TEXT, + Stderr $TEXT);}); +} + +sub insert_records($) { + my $self = shift; + my $seq = shift; + my $command_ref = shift; + my $record_ref = shift; + my $table = $self->table(); + # For SQL encode the command with \177 space as split points + my $command = join("\177 ",@$command_ref); + my @v_cols = map { ", V$_" } (1..$self->max_number_of_args()); + # Two extra value due to $seq, Exitval, Send + my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4); + $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ". + "VALUES ($v_vals);", $seq, $command, -1000, + 0, @$record_ref[1..$#$record_ref]); +} + + +sub get_record($) { + my $self = shift; + my @retval; + my $table = $self->table(); + my @v_cols = map { ", V$_" } (1..$self->max_number_of_args()); + my $rand = "Reserved-".$$.rand(); + my $v; + my $more_pending; + + do { + if($self->{'driver'} eq "CSV") { + # Sub SELECT is not supported in CSV + # So to minimize the race condition below select a job at random + my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ". + "WHERE Exitval = -1000 LIMIT 100;"); + $v = [ sort { rand() > 0.5 } @$r ]; + } else { + # Avoid race condition where multiple workers get the same job + # by setting Stdout to a unique string + # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL + $self->update("SET Stdout = ?,Exitval = ? ". + "WHERE Seq = (". + " SELECT * FROM (". + " SELECT min(Seq) FROM $table WHERE Exitval = -1000". + " ) AS dummy". + ") AND Exitval = -1000;", $rand, -1210); + # If a parallel worker overwrote the unique string this will get nothing + $v = $self->get("SELECT Seq, Command @v_cols FROM $table ". + "WHERE Stdout = ?;", $rand); + } + if($v->[0]) { + my $val_ref = $v->[0]; + # Mark record as taken + my $seq = shift @$val_ref; + # Save the sequence number to use when running the job + $SQL::next_seq = $seq; + $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220); + # Command is encoded with '\177 space' as delimiter + my @command; + my $encoded_cmd = shift @$val_ref; + if($encoded_cmd =~ /\177 /) { + @command = split /\177 /, $encoded_cmd; + } else { + # (\257 space for backward compatability) + @command = split /\257 /, $encoded_cmd; + # Recode \257 => \177 + for (@command,@$val_ref) { + s/\257/\177>/g; + s/\257\256/\257/g; + } + } + $SQL::command_ref = \@command; + for (@$val_ref) { + push @retval, Arg->new($_); + } + } else { + # If the record was updated by another job in parallel, + # then we may not be done, so see if there are more jobs pending + $more_pending = + $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210); + } + } while (not $v->[0] and $more_pending->[0]); + + if(@retval) { + return \@retval; + } else { + return undef; + } +} + +sub total_jobs($) { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT count(*) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::total_jobs"); + } +} + +sub max_seq($) { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT max(Seq) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::max_seq"); + } +} + +sub finished($) { + # Check if there are any jobs left in the SQL table that do not + # have a "real" exitval + my $self = shift; + if($opt::wait or $Global::start_sqlworker) { + my $table = $self->table(); + my $rv = $self->get("select Seq,Exitval from $table ". + "where Exitval <= -1000 limit 1"); + return not $rv->[0]; + } else { + return 1; + } +} + +package Semaphore; + +# This package provides a counting semaphore +# +# If a process dies without releasing the semaphore the next process +# that needs that entry will clean up dead semaphores +# +# The semaphores are stored in $PARALLEL_HOME/semaphores/id- Each +# file in $PARALLEL_HOME/semaphores/id-/ is the process ID of the +# process holding the entry. If the process dies, the entry can be +# taken by another process. + +sub new($) { + my $class = shift; + my $id = shift; + my $count = shift; + $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex + $id = "id-".$id; # To distinguish it from a process id + my $parallel_locks = $Global::cache_dir . "/semaphores"; + -d $parallel_locks or ::mkdir_or_die($parallel_locks); + my $lockdir = "$parallel_locks/$id"; + my $lockfile = $lockdir.".lock"; + if(-d $parallel_locks and -w $parallel_locks + and -r $parallel_locks and -x $parallel_locks) { + # skip + } else { + ::error("Semaphoredir must be writable: '$parallel_locks'"); + ::wait_and_exit(255); + } + + if($count < 1) { ::die_bug("semaphore-count: $count"); } + return bless { + 'lockfile' => $lockfile, + 'lockfh' => Symbol::gensym(), + 'lockdir' => $lockdir, + 'id' => $id, + 'idfile' => $lockdir."/".$id, + 'pid' => $$, + 'pidfile' => $lockdir."/".$$.'@'.::hostname(), + 'count' => $count + 1 # nlinks returns a link for the 'id-' as well + }, ref($class) || $class; +} + +sub remove_dead_locks($) { + my $self = shift; + my $lockdir = $self->{'lockdir'}; + + for my $d (glob "$lockdir/*") { + $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; + my ($pid, $host) = ($1, $2); + if($host eq ::hostname()) { + if(kill 0, $pid) { + ::debug("sem", "Alive: $pid $d\n"); + } else { + ::debug("sem", "Dead: $d\n"); + ::rm($d); + } + } + } +} + +sub acquire($) { + my $self = shift; + my $sleep = 1; # 1 ms + my $start_time = time; + while(1) { + # Can we get a lock? + $self->atomic_link_if_count_less_than() and last; + $self->remove_dead_locks(); + # Retry slower and slower up to 1 second + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + time - $start_time > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Stealing the semaphore."); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("timeout_write_idfile: $self->{'idfile'}"); + close $fh; + } + link $self->{'idfile'}, $self->{'pidfile'}; + last; + } + if($opt::semaphoretimeout < 0 + and + time - $start_time > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + exit(1); + last; + } + } + } + ::debug("sem", "acquired $self->{'pid'}\n"); +} + +sub release($) { + my $self = shift; + ::rm($self->{'pidfile'}); + if($self->nlinks() == 1) { + # This is the last link, so atomic cleanup + $self->lock(); + if($self->nlinks() == 1) { + ::rm($self->{'idfile'}); + rmdir $self->{'lockdir'}; + } + $self->unlock(); + } + ::debug("run", "released $self->{'pid'}\n"); +} + +sub pid_change($) { + # This should do what release()+acquire() would do without having + # to re-acquire the semaphore + my $self = shift; + + my $old_pidfile = $self->{'pidfile'}; + $self->{'pid'} = $$; + $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname(); + my $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + ::rm($old_pidfile); +} + +sub atomic_link_if_count_less_than($) { + # Link $file1 to $file2 if nlinks to $file1 < $count + my $self = shift; + my $retval = 0; + $self->lock(); + my $nlinks = $self->nlinks(); + ::debug("sem","$nlinks<$self->{'count'} "); + if($nlinks < $self->{'count'}) { + -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'}); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + close $fh; + } + $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + } + $self->unlock(); + ::debug("sem", "atomic $retval"); + return $retval; +} + +sub nlinks($) { + my $self = shift; + if(-e $self->{'idfile'}) { + return (stat(_))[3]; + } else { + return 0; + } +} + +sub lock($) { + my $self = shift; + my $sleep = 100; # 100 ms + my $total_sleep = 0; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $locked = 0; + while(not $locked) { + if(tell($self->{'lockfh'}) == -1) { + # File not open + open($self->{'lockfh'}, ">", $self->{'lockfile'}) + or ::debug("run", "Cannot open $self->{'lockfile'}"); + } + if($self->{'lockfh'}) { + # File is open + chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw + if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { + # The file is locked: No need to retry + $locked = 1; + last; + } else { + if ($! =~ m/Function not implemented/) { + ::warning("flock: $!", + "Will wait for a random while."); + ::usleep(rand(5000)); + # File cannot be locked: No need to retry + $locked = 2; + last; + } + } + } + # Locking failed in first round + # Sleep and try again + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + $total_sleep += $sleep; + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + $total_sleep/1000 > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Taking the semaphore."); + $locked = 3; + last; + } + if($opt::semaphoretimeout < 0 + and + $total_sleep/1000 > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + $locked = 4; + last; + } + } else { + if($total_sleep/1000 > 30) { + ::warning("Semaphore stuck for 30 seconds. ". + "Consider using --semaphoretimeout."); + } + } + } + ::debug("run", "locked $self->{'lockfile'}"); +} + +sub unlock($) { + my $self = shift; + ::rm($self->{'lockfile'}); + close $self->{'lockfh'}; + ::debug("run", "unlocked\n"); +} + +# Keep perl -w happy + +$opt::x = $Semaphore::timeout = $Semaphore::wait = +$Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg = +$Global::max_slot_number = $opt::session; + +package main; + + +sub main() { + unpack_combined_executable(); + save_stdin_stdout_stderr(); + save_original_signal_handler(); + parse_options(); + ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fh), "\n"); + my $number_of_args; + if($Global::max_number_of_args) { + $number_of_args = $Global::max_number_of_args; + } elsif ($opt::X or $opt::m or $opt::xargs) { + $number_of_args = undef; + } else { + $number_of_args = 1; + } + + my @command = @ARGV; + my @input_source_fh; + if($opt::pipepart) { + if($opt::tee) { + @input_source_fh = map { open_or_exit("<",$_) } @opt::a; + # Remove the first: It will be the file piped. + shift @input_source_fh; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } + } else { + # -a is used for data - not for command line args + @input_source_fh = map { open_or_exit("<",$_) } "/dev/null"; + } + } else { + @input_source_fh = map { open_or_exit("<",$_) } @opt::a; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } + } + + if($opt::skip_first_line) { + # Skip the first line for the first file handle + my $fh = $input_source_fh[0]; + <$fh>; + } + + set_input_source_header(\@command,\@input_source_fh); + if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); + } + if($opt::sqlmaster and $opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } + + $Global::start_time = ::now(); + if($opt::nonall or $opt::onall) { + onall(\@input_source_fh,@command); + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); + } + + $Global::JobQueue = JobQueue->new( + \@command, \@input_source_fh, $Global::ContextReplace, + $number_of_args, \@Global::transfer_files, \@Global::ret_files, + \@Global::template_names, \@Global::template_contents + ); + + if($opt::sqlmaster) { + # Create SQL table to hold joblog + output + # Figure out how many arguments are in a job + # (It is affected by --colsep, -N, $number_source_fh) + my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}; + my $record = $record_queue->get(); + my $no_of_values = $number_of_args * (1+$#{$record}); + $record_queue->unget($record); + $Global::sql->create_table($no_of_values); + if($opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } + } + + if($opt::pipepart) { + pipepart_setup(); + } elsif($opt::pipe) { + if($opt::tee) { + pipe_tee_setup(); + } elsif($opt::shard or $opt::bin) { + pipe_shard_setup(); + } elsif($opt::groupby) { + pipe_group_by_setup(); + } + } + + if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) { + # Count the number of jobs or shuffle all jobs + # before starting any. + # Must be done after ungetting any --pipepart jobs. + $Global::JobQueue->total_jobs(); + } + # Compute $Global::max_jobs_running + # Must be done after ungetting any --pipepart jobs. + max_jobs_running(); + init_run_jobs(); + my $sem; + if($Global::semaphore) { + $sem = acquire_semaphore(); + } + $SIG{TERM} = $Global::original_sig{TERM}; + $SIG{HUP} = \&start_no_new_jobs; + + if($opt::progress) { + ::status_no_nl(init_progress()); + } + if($opt::tee or $opt::shard or $opt::bin) { + # All jobs must be running in parallel for --tee/--shard/--bin + while(start_more_jobs()) {} + $Global::start_no_new_jobs = 1; + if(not $Global::JobQueue->empty()) { + if($opt::tee) { + ::error("--tee requires --jobs to be higher. Try --jobs 0."); + } elsif($opt::bin) { + ::error("--bin requires --jobs to be higher than the number of", + "arguments. Increase --jobs."); + } elsif($opt::shard) { + ::error("--shard requires --jobs to be higher than the number of", + "arguments. Increase --jobs."); + } else { + ::die_bug("--bin/--shard/--tee should not get here"); + } + ::wait_and_exit(255); + } + } elsif($opt::pipe and not $opt::pipepart and not $opt::semaphore) { + # Fill all jobslots + while(start_more_jobs()) {} + spreadstdin(); + } elsif($opt::fast) { + fast(); + } else { + # Reap the finished jobs and start more + while(reapers() + start_more_jobs()) {} + } + ::debug("init", "Start draining\n"); + drain_job_queue(@command); + ::debug("init", "Done draining\n"); + reapers(); + ::debug("init", "Done reaping\n"); + if($Global::semaphore) { $sem->release(); } + cleanup(); + ::debug("init", "Halt\n"); + halt(); +} + +main(); diff --git a/tools/ci/inria/main b/tools/ci/inria/main index c781cd5b0da4..8b3dd23911fc 100755 --- a/tools/ci/inria/main +++ b/tools/ci/inria/main @@ -186,11 +186,13 @@ case "${OCAML_ARCH}" in cleanup=true check_make_alldepend=true export OCAMLTEST_SKIP_TESTS="$memory_model_tests" + init_submodule_flexdll=true ;; cygwin64) cleanup=true check_make_alldepend=true export OCAMLTEST_SKIP_TESTS="$memory_model_tests" + init_submodule_flexdll=true ;; mingw) build='--build=i686-pc-cygwin' diff --git a/tools/objinfo.ml b/tools/objinfo.ml index 54a6325ecba7..bfaf94ce7cbe 100644 --- a/tools/objinfo.ml +++ b/tools/objinfo.ml @@ -375,6 +375,10 @@ let dump_byte ic = | SYMB -> let symb = Bytesections.read_section_struct toc ic section in print_global_table symb + | ORUN -> + let ocamlrunparam = + Bytesections.read_section_string toc ic section in + printf "Overridden OCAMLRUNPARAM defaults: %s\n" ocamlrunparam | OSLD -> let caml_standard_library_default = Bytesections.read_section_string toc ic section in diff --git a/tools/opam/gen_ocaml_config.ml b/tools/opam/gen_ocaml_config.ml new file mode 100644 index 000000000000..bd3358697899 --- /dev/null +++ b/tools/opam/gen_ocaml_config.ml @@ -0,0 +1,161 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Louis Gesbert, OCamlPro *) +(* *) +(* Copyright 2017 OCamlPro SAS *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* For as long it remains not totally impractical to do so, this script is + written in OCaml 3.07. Its purpose is to generate an opam .config file + containing the following variables: + - native: True if ocamlopt is located with ocaml + - native-tools: True if ocamlc.opt is located with ocaml + - native-dynlink: True if dynlink.cmxa exists in -I + or -I +dynlink + - stubsdir: Content of +ld.conf in CAML_LD_LIBRARY_PATH format + - preinstalled: True if this installation is provided by the system, rather + than compiled from sources by opam + - compiler: ["system"], if [preinstalled], otherwise the version of the opam + compiler package which provided the compiler (e.g. "5.4.0"). For + largely historical reasons, custom compiler append additional + configuration information (e.g. "5.4.0+options+flambda"). This + variable should be considered deprecated and the content + unstable. *) + +(* The script must be invoked using the interpreter, for example: + ocaml gen_ocaml_config.ml 5.4.0 ocaml 5.4.0+options false +flambda + where "5.4.0" is the expected value of Sys.ocaml_version (with any additional + information removed), the resulting configuration should be written to + "ocaml.config" and the "compiler" variable should be set to + "5.4.0+options+flambda". *) +let expected_ocaml_version, + package_config_file, + compiler_package_version, + preinstalled, + option_names = + match Array.to_list Sys.argv with + | _ :: + expected_ocaml_version :: + package_config_file :: + compiler_package_version :: + preinstalled :: + options -> + expected_ocaml_version, + package_config_file ^ ".config", + compiler_package_version, + preinstalled, + String.concat "" (List.filter ((<>) "") options) + | _ -> + prerr_endline "Invalid arguments"; + exit 1 + +(* Check that Sys.ocaml_version is as expected *) +let () = + let ocaml_version = + Scanf.sscanf Sys.ocaml_version "%u.%u" (fun major minor -> + if (major, minor) > (3, 7) then + (* Strip off any additional information *) + Scanf.sscanf Sys.ocaml_version "%[^~+]" (fun x -> x) + else + Sys.ocaml_version) + in + if ocaml_version <> expected_ocaml_version then + (Printf.eprintf + "OCaml version mismatch: %s, expected %s" + ocaml_version expected_ocaml_version; + exit 1) + +(* Write the .config file *) +let () = + let binary = + let dir = Filename.dirname Sys.executable_name in + if Filename.check_suffix Sys.executable_name ".exe" then + fun name -> Filename.concat dir (name ^ ".exe") + else + fun name -> Filename.concat dir name + in + let libdir = + let exit_code = + let ocamlc = binary "ocamlc" in + let ocamlc = + if Sys.os_type = "Win32" then + if String.contains ocamlc ' ' then + "\"" ^ ocamlc ^ "\"" + else + ocamlc + else + Filename.quote ocamlc + in + Sys.command (ocamlc ^ " -where > where") in + if exit_code = 0 then + (* Must be opened in text mode for Windows *) + let ic = open_in "where" in + let r = input_line ic in + close_in ic; Sys.remove "where"; r + else begin + Printf.eprintf "Unexpected exit code %d from `ocamlc -where'\n" exit_code; + exit 1 + end + in + let native = Sys.file_exists (binary "ocamlopt") in + let native_tools = Sys.file_exists (binary "ocamlc.opt") in + let native_dynlink = + let check_dir libdir = + Sys.file_exists (Filename.concat libdir "dynlink.cmxa") + in + List.exists check_dir [Filename.concat libdir "dynlink"; libdir] + in + let stubsdir = + let ld_conf = Filename.concat libdir "ld.conf" in + if Sys.file_exists ld_conf then + let separator = if Sys.os_type = "Win32" then ";" else ":" in + let ic = open_in ld_conf in + let rec input_lines acc = + try + let line = input_line ic in + let line = + if line = Filename.current_dir_name then + libdir + else if line = Filename.parent_dir_name then + Filename.concat libdir line + else + Scanf.sscanf line "%[.]%1[/\\]" (fun prefix separator -> + if separator <> "" then + if prefix = Filename.current_dir_name then + let line = String.sub line 2 (String.length line - 2) in + Filename.concat libdir line + else if prefix = Filename.parent_dir_name then + Filename.concat libdir line + else + line + else + line) + in + input_lines (line::acc) + with End_of_file -> + close_in ic; List.rev acc + in + String.concat separator (input_lines []) + else + "" + in + let oc = open_out package_config_file in + (* Quoted strings need OCaml 4.02; "\ " needs OCaml 3.09! *) + Printf.fprintf oc "\ + opam-version: \"2.0\"\n\ + variables {\n \ + native: %b\n \ + native-tools: %b\n \ + native-dynlink: %b\n \ + stubsdir: %S\n \ + preinstalled: %s\n \ + compiler: \"%s%s\"\n\ + }\n" native native_tools native_dynlink stubsdir preinstalled + compiler_package_version option_names; + close_out oc diff --git a/tools/opam/gen_ocaml_config.ml.in b/tools/opam/gen_ocaml_config.ml.in deleted file mode 100644 index b6771e60ba9a..000000000000 --- a/tools/opam/gen_ocaml_config.ml.in +++ /dev/null @@ -1,65 +0,0 @@ -let () = - let ocaml_version = - let v = Sys.ocaml_version in - let l = String.length v in - let plus = try String.index v '+' with Not_found -> l in - (* Introduced in 4.11.0; used from 4.12.0 *) - let tilde = try String.index v '~' with Not_found -> l in - String.sub v 0 (min (min plus tilde) l) - in - if ocaml_version <> Sys.argv.(1) then - (Printf.eprintf - "OCaml version mismatch: %%s, expected %s" - ocaml_version Sys.argv.(1); - exit 1) - else - let oc = open_out (Sys.argv.(2) ^ ".config") in - let exe = ".exe" in - let (ocaml, suffix) = - let s = Sys.executable_name in - if Filename.check_suffix s exe then - (Filename.chop_suffix s exe, exe) - else - (s, "") - in - let ocamlc = ocaml^"c"^suffix in - let libdir = - if Sys.command (ocamlc^" -where > where") = 0 then - (* Must be opened in text mode for Windows *) - let ic = open_in "where" in - let r = input_line ic in - close_in ic; r - else - failwith "Bad return from 'ocamlc -where'" - in - let stubsdir = - let ic = open_in (Filename.concat libdir "ld.conf") in - let rec r acc = try r (input_line ic::acc) with End_of_file -> acc in - let lines = List.rev (r []) in - close_in ic; - let sep = if Sys.os_type = "Win32" then ";" else ":" in - String.concat sep lines - in - let has_native_dynlink = - let check_dir libdir = - Sys.file_exists (Filename.concat libdir "dynlink.cmxa") - in - List.exists check_dir [Filename.concat libdir "dynlink"; libdir] - in - let p fmt = Printf.fprintf oc (fmt ^^ "\n") in - p "opam-version: \"2.0\""; - p "variables {"; - p " native: %%b" - (Sys.file_exists (ocaml^"opt"^suffix)); - p " native-tools: %%b" - (* The variable [ocamlc] already has a suffix on Windows - (ex. '...\bin\ocamlc.exe') so we use [ocaml] to check *) - (Sys.file_exists (ocaml^"c.opt"^suffix)); - p " native-dynlink: %%b" - has_native_dynlink; - p " stubsdir: %%S" - stubsdir; - p " preinstalled: %{ocaml-system:installed}%"; - p " compiler: \"%{ocaml-system:installed?system:}%%{ocaml-base-compiler:version}%%{dkml-base-compiler:version}%%{ocaml-variants:version}%%{ocaml-option-32bit:installed?+32bit:}%%{ocaml-option-afl:installed?+afl:}%%{ocaml-option-bytecode-only:installed?+bytecode-only:}%%{ocaml-option-default-unsafe-string:installed?+default-unsafe-string:}%%{ocaml-option-fp:installed?+fp:}%%{ocaml-option-flambda:installed?+flambda:}%%{ocaml-option-musl:installed?+musl:}%%{ocaml-option-nnp:installed?+nnp:}%%{ocaml-option-no-flat-float-array:installed?+no-flat-float-array:}%%{ocaml-option-spacetime:installed?+spacetime:}%%{ocaml-option-static:installed?+static:}%\""; - p "}"; - close_out oc diff --git a/tools/opam/generate.ml b/tools/opam/generate.ml new file mode 100644 index 000000000000..d0084d9b2ee8 --- /dev/null +++ b/tools/opam/generate.ml @@ -0,0 +1,235 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* David Allsopp, University of Cambridge & Tarides *) +(* *) +(* Copyright 2025 David Allsopp Ltd. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* This script is called from the root of the repository at the end of + `make INSTALL_MODE= install` and is responsible for converting + the various files generated by the installation backend into final output. + Parameters are the following Makefile variables: + $1 = $(INSTALL_MODE) (opam or clone) + $2 = $(OPAM_PACKAGE_NAME) + $3 = $(LN) *) + +let exit_because fmt = Printf.ksprintf (fun s -> prerr_endline s; exit 1) fmt + +let () = + if Array.length Sys.argv <> 4 + || Sys.argv.(1) <> "clone" && Sys.argv.(1) <> "opam" then begin + exit_because "Invalid command line arguments" + end + +let mode = Sys.argv.(1) +let package = Sys.argv.(2) +let ln_command = Sys.argv.(3) + +let output_endline oc = Printf.kfprintf (fun oc -> output_char oc '\n') oc + +let write_install_lines oc file = + In_channel.with_open_text file @@ + In_channel.fold_lines (fun _ -> output_endline oc " %s") () + +let remove_file = Sys.remove + +let output_section oc section = + let file = "opam-" ^ section in + if Sys.file_exists file then begin + let section = + if section = "lib" || section = "libexec" then + section ^ "_root" + else + section + in + output_endline oc {|%s: [ +%a]|} section write_install_lines file; + remove_file file + end + +(* See note in Makefile.common *) +let valid_in_path = function '\'' | '"' | '\\' -> false | _ -> true +let valid_in_section c = c <> '@' && valid_in_path c +let valid_path path = + if String.for_all valid_in_path path then + path + else + exit_because "%S contains characters invalid in a path" path +let valid_section dir = + if String.for_all valid_in_section dir then + dir + else + exit_because "%S contains characters invalid in a section" dir + +(* [generate_install file] processes then erases opam-bin, opam-lib opam-libexec + and opam-man to produce [file] *) +let generate_install file = + Out_channel.with_open_text file @@ fun oc -> + List.iter (output_section oc) ["bin"; "lib"; "libexec"; "man"]; + output_endline oc {|share_root: [ + "config.cache" {"ocaml/config.cache"} + "config.status" {"ocaml/config.status"} +]|} + +(* [process_clone oc process] processes clone-* in the current directory, + emitting mkdir commands to [oc] and passing the directory name and a channel + set to the start of each clone file to [process]. The clone files are erased + after processing. *) +let process_clone oc process = + let process_file file = + if String.starts_with ~prefix:"clone-" file then begin + let dir = + String.map (function '@' -> '/' | c -> c) + (String.sub file 6 (String.length file - 6)) + |> valid_section + in + output_endline oc {|mkdir -p "$1"'/%s'|} dir; + In_channel.with_open_text file @@ process oc dir; + remove_file file + end + in + let files = Sys.readdir Filename.current_dir_name in + Array.sort String.compare files; + Array.iter process_file files + +(* [process_symlinks oc ~mkdir] processes create-symlinks, if it exists, writing + any required mkdir commands to [oc] if [~mkdir = true] and also the + appropriate ln / mklink commands. create-symlinks is erased after + processing. *) +let process_symlinks oc ~mkdir = + let module StringSet = Set.Make(String) in + let file = "create-symlinks" in + if Sys.file_exists file then + let lines = + let parse acc line = + match String.split_on_char ' ' line with + | [dir; target; source] -> + (valid_section dir, valid_path target, valid_path source)::acc + | _ -> + exit_because "Invalid line encountered in create-symlinks" + in + In_channel.with_open_text file @@ fun ic -> + List.rev (In_channel.fold_lines parse [] ic) + in + output_endline oc {|cd "$1"|}; + let _ = + let create_dir seen (dir, _, _) = + if not (StringSet.mem dir seen) && String.contains dir '/' then + output_endline oc {|mkdir -p '%s'|} dir; + StringSet.add dir seen + in + List.fold_left create_dir StringSet.empty (if mkdir then lines else []) + in + if not Sys.win32 then + let ln (dir, target, source) = + output_endline oc {|%s '%s' '%s/%s'|} ln_command target dir source + in + List.iter ln lines + else begin + let mklink (dir, target, source) = + (* Convert all slashes to _two_ backslashes *) + let to_backslashes oc s = + output_string oc (String.concat {|\\|} (String.split_on_char '/' s)) + in + output_endline oc + {| cmd /c "mklink %a\\%s %s"|} to_backslashes dir source target + and cp (dir, target, source) = + output_endline oc {| $CP '%s/%s' '%s/%s'|} dir target dir source + in + output_endline oc {|cmd /c "mklink __ln_test mklink-test"|}; + output_endline oc {|if test -L "$1/__ln_test"; then|}; + List.iter mklink lines; + output_endline oc {|else|}; + List.iter cp lines; + output_endline oc {|fi|}; + output_endline oc {|rm -f __ln_test|} + end; + remove_file file + +let copy_files oc dir = + In_channel.fold_lines (fun _ line -> + match String.split_on_char ' ' line with + | [source; dest] -> + let source = valid_path source in + let dest = valid_path dest in + output_endline oc {|cp '%s' "$1"'/%s/%s'|} source dir dest + | _ -> + exit_because "Invalid line encountered in clone files") () + +let clone_files oc dir ic = + output_endline oc + {|dest="$1"'/%s' xargs sh "$1/clone-files" <<'EOF'|} dir; + In_channel.fold_lines (fun _ -> output_endline oc "%s") () ic; + output_endline oc {|EOF|} + +let () = + if mode = "opam" then begin + generate_install (package ^ ".install"); + (* The script must be written with Unix line-endings on Windows *) + Out_channel.with_open_bin (package ^ "-fixup.sh") @@ fun oc -> + output_endline oc {|#!/bin/sh +set -eu|}; + process_clone oc copy_files; + process_symlinks oc ~mkdir:true + end else begin + (* Don't pass -p to cp on Windows - it's never going to be relevant (no + execute bit which needs preserving) and there are scenarios in which it's + more likely to fail than add anything useful (especially if copying from + a Cygwin-managed build directory to /cygdrive) *) + let preserve = if Sys.win32 then "" else "p" in + (* The script must be written with Unix line-endings on Windows *) + Out_channel.with_open_bin (package ^ "-clone.sh") @@ fun oc -> + output_endline oc {|#!/bin/sh +set -eu +mkdir -p "$1" +rm -f "$1/__cp_test" "$1/__ln_test" +if cp --reflink=always doc/ocaml/LICENSE "$1/__cp_test" 2>/dev/null; then + rm -f "$1/__cp_test" + CP='cp --reflink=always -%sf' + if ! test -e "$1/clone-files"; then + echo "$CP"' "$@" "$dest/"' > "$1/clone-files" + fi +else + CP='cp -%sf' + if ! test -e "$1/clone-files"; then + if ln -f doc/ocaml/LICENSE "$1/__ln_test" 2>/dev/null; then + rm -f "$1/__ln_test" + echo 'ln -f "$@" "$dest/"' > "$1/clone-files" + else + echo "$CP"' "$@" "$dest/"' > "$1/clone-files" + fi + fi +fi|} preserve preserve; + Out_channel.with_open_text "clone-share@ocaml" (fun oc -> + output_endline oc "share/ocaml/clone"; + if Sys.file_exists "config.cache" then + output_endline oc "share/ocaml/config.cache"); + process_clone oc clone_files; + (* ld.conf is a configuration file, so is always copied. + Makefile.config and config.status will both contain the original + prefix, which must be updated. *) + output_endline oc {|cp lib/ocaml/ld.conf "$1/lib/ocaml/ld.conf" +cat > "$1/prefix.awk" <<'ENDAWK' +{ + rest = $0 + while ((p = index(rest, ENVIRON["O"]))) { + printf "%%s%%s", substr(rest, 1, p-1), ENVIRON["N"] + rest = substr(rest, p + length(ENVIRON["O"])) + } + print rest +} +ENDAWK +prefix="$(sed -ne 's/^prefix *= *//p' lib/ocaml/Makefile.config)" +for file in lib/ocaml/Makefile.config share/ocaml/config.status; do + O="$prefix" N="$1" awk -f "$1/prefix.awk" "$file" > "$1/$file" +done +rm -f "$1/clone-files" "$1/prefix.awk"|}; + process_symlinks oc ~mkdir:false + end diff --git a/tools/opam/ocaml-config.install b/tools/opam/ocaml-config.install deleted file mode 100644 index 03a7de264c06..000000000000 --- a/tools/opam/ocaml-config.install +++ /dev/null @@ -1 +0,0 @@ -share: ["gen_ocaml_config.ml"] diff --git a/tools/opam/process.sh b/tools/opam/process.sh new file mode 100644 index 000000000000..bab1444e2d91 --- /dev/null +++ b/tools/opam/process.sh @@ -0,0 +1,190 @@ +#!/bin/sh +#************************************************************************** +#* * +#* OCaml * +#* * +#* David Allsopp, University of Cambridge & Tarides * +#* * +#* Copyright 2025 David Allsopp Ltd. * +#* * +#* All rights reserved. This file is distributed under the terms of * +#* the GNU Lesser General Public License version 2.1, with the * +#* special exception on linking described in the file LICENSE. * +#* * +#************************************************************************** + +set -eu + +# POSIX.1-2024 (Issue 8) lifts this from being a bashism. The sub-shell dance is +# necessary because set is a builtin and is permitted to abort the script +# unconditionally on error. +if (set -o pipefail 2> /dev/null); then + set -o pipefail +fi + +# This script is responsible for building and cloning OCaml installations. It is +# invoked by both the build and install sections of an opam package. +# $1 = make command (the `make` variable in opam). This should be the path to +# a binary only and is invoked without word-splitting (i.e. any +# additional arguments should be passed in $2 and the command is invoked +# "$1"). +# $2 = additional arguments passed to "$1". This variable will be used +# unquoted - arguments with spaces cannot be passed. From the build +# section, this allows the -j argument to be specified. For the install +# section, this argument must be "install". +# $3 = opam build-id variable of this package. +# $4 = name of the opam package to be used when generating .install and +# .config files. +# The remaining arguments depend on the value of $2. When it is "install": +# $5 = installation prefix, which may be a native Windows path, rather than a +# Cygwin path. +# When $2 is not "install" (the build opam section): +# $5 = "enabled" if cloning the compiler from an existing switch is permitted +# and "disabled" to force the compiler to be built from sources. +# $6, and any further arguments are additional options to pass to `configure` +# if the compiler is built from sources. + +make="$1" +make_args="$2" +build_id="$3" +package_name="$4" + +if [ x"$make_args" = 'xinstall' ]; then + prefix="$5" + + echo "📦 Installing the compiler to $prefix" + if [ -e 'config.status' ]; then + echo "📜 Using make install" + "$make" install + else + origin="$(tail -n 1 build-id)" + origin_prefix="$(opam var --safe --switch="$origin" prefix | tr -d '\r')" + echo "🪄 Duplicating $origin_prefix" + ( cd "$origin_prefix" && sh ./share/ocaml/clone "$prefix" ) + fi + + exit 0 +fi + +# Build the package + +cloning="$5" +shift 5 +# "$@" now expands to the correctly-quoted arguments to pass to configure + +origin='' +clone_mechanism='' +if [ x"$cloning" = 'xenabled' ]; then + echo "🕵️ Searching for a switch containing build-id $build_id" + + if [ -e "$OPAM_SWITCH_PREFIX/share/ocaml/build-id" ]; then + switch="$(tail -n 1 "$OPAM_SWITCH_PREFIX/share/ocaml/build-id")" + if [ -n "$switch" ]; then + switch_share_dir="$(opam var --safe --switch="$switch" share \ + | tr -d '\r')" + switch_build_id="$switch_share_dir/ocaml/build-id" + if [ -e "$switch_build_id" ]; then + if [ x"$build_id" = x"$(head -n 1 "$switch_build_id")" ]; then + echo "🔁 Prefer to re-clone from $switch" + echo "$switch" > opam-switches + origin="$switch" + if ln "$switch_build_id" __cp_test 2>/dev/null; then + rm __cp_test + clone_mechanism='hard-linking' + fi + fi + fi + fi + fi + + echo "🐫 Requesting list of switches from opam" + opam switch list --safe --short | tr -d '\r' | grep -Fxv "$OPAMSWITCH" \ + >> opam-switches 2> /dev/null || true + + while IFS= read -r switch; do + switch_share_dir="$(opam var --safe --switch="$switch" share | tr -d '\r')" + switch_build_id="$switch_share_dir/ocaml/build-id" + if [ -e "$switch_build_id" ]; then + if [ x"$build_id" = x"$(head -n 1 "$switch_build_id")" ]; then + # There are three ways of cloning a switch: + # - Copy-on-Write (cp --reflink=always) + # - Hard linking + # - Copy + # Copy-on-Write is the ideal - virtually no space overhead, but with + # defence against accidental subsequent alterations. Hard linking is + # preferred over copying for the space-saving, and because the + # compiler should not being subsequently altered. + if cp --reflink=always "$switch_build_id" __cp_test 2>/dev/null; then + rm __cp_test + echo "📝 - can reflink from: $switch" + origin="$switch" + clone_mechanism='copy-on-write' + break + elif ln "$switch_build_id" __cp_test 2>/dev/null; then + rm __cp_test + if [ -z "$clone_mechanism" ]; then + echo "🔗 - can hard link from: $switch" + origin="$switch" + clone_mechanism='hard-linking' + fi + elif [ -z "$origin" ]; then + echo "📄 - can copy from: $switch" + origin="$switch" + fi + elif [ -z "$origin" ]; then + echo "⛔ - different compiler: $switch" + fi + fi + done < opam-switches +fi + +{ echo "$build_id"; echo "$origin" ; } > build-id + +if [ -n "$origin" ]; then + + echo "🧬 Will clone the compiler from $origin" + test -n "$clone_mechanism" || clone_mechanism='copying' + + cloned='true' + clone_source="$(sed -e '1d;s/\\/\\\\/g;s/%/%%/g;s/"/\\"/g' build-id)" + case "$origin" in + */*|*\\*) clone_source="local switch $clone_source";; + *) clone_source="global switch $clone_source";; + esac + + cat > "$package_name.install" <<'EOF' +share_root: [ + "build-id" {"ocaml/build-id"} +] +EOF + +else + + echo "🏗️ Will build the compiler from sources" + + cloned='false' + clone_source='' + + ./configure --cache-file=config.cache "$@" + "$make" $make_args + "$make" OPAM_PACKAGE_NAME=ocaml-compiler INSTALL_MODE=clone install + + cat > "$package_name.install" <<'EOF' +share_root: [ + "build-id" {"ocaml/build-id"} + "ocaml-compiler-clone.sh" {"ocaml/clone"} + "config.cache" {"ocaml/config.cache"} + "config.status" {"ocaml/config.status"} +] +EOF +fi + +# Create the .config file +cat > "$package_name.config" < "ocamlprof.dump" + match Sys.getenv_opt "OCAMLPROF_DUMP" with + | None | Some "" -> "ocamlprof.dump" + | Some file -> file in begin try let ic = open_in_bin dumpfile in diff --git a/toplevel/byte/topeval.ml b/toplevel/byte/topeval.ml index 9fbba9e9ced8..6e562773912b 100644 --- a/toplevel/byte/topeval.ml +++ b/toplevel/byte/topeval.ml @@ -69,6 +69,16 @@ include Topcommon.MakeEvalPrinter(EvalBase) (* Load in-core and execute a lambda term *) +module Meta = struct + type closure = unit -> Obj.t + type bytecode + external reify_bytecode : + (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t -> + Instruct.debug_event list array -> string option -> bytecode * closure + = "caml_reify_bytecode" + external release_bytecode : bytecode -> unit = "caml_static_release_bytecode" +end + let may_trace = ref false (* Global lock on tracing *) let load_lambda ppf lam = @@ -240,6 +250,8 @@ let load_compunit ic filename ppf compunit = raise Load_failed end +external supports_shared_libraries : unit -> bool = "%shared_libraries" + let rec load_file recursive ppf name = let filename = try Some (Load_path.find name) with Not_found -> None @@ -283,6 +295,17 @@ and really_load_file recursive ppf name filename ic = let toc_pos = input_binary_int ic in (* Go to table of contents *) seek_in ic toc_pos; let lib = (input_value ic : library) in + if lib.lib_dllibs <> [] && not (supports_shared_libraries ()) then begin + let detail = + match lib.lib_dllibs with + | [_] -> "a shared library" + | _ -> "shared libraries" + in + fprintf ppf + "File %s requires %s to be loaded, which the runtime executing \ + this toplevel does not support.@." name detail; + raise Load_failed + end; List.iter (fun dllib -> let name = Dll.extract_dll_name dllib in diff --git a/toplevel/byte/trace.ml b/toplevel/byte/trace.ml index bddb4b3f7a89..bf50dcd8bfab 100644 --- a/toplevel/byte/trace.ml +++ b/toplevel/byte/trace.ml @@ -53,12 +53,15 @@ let set_code_pointer cls ptr = (* Call a traced function (use old code pointer, but new closure as environment so that recursive calls are also traced). - It is necessary to wrap Meta.invoke_traced_function in an ML function + It is necessary to wrap caml_invoke_traced_function in an ML function so that the RETURN at the end of the ML wrapper takes us to the code of the function. *) +external caml_invoke_traced_function: + Obj.raw_data -> Obj.t -> Obj.t -> Obj.t = "caml_invoke_traced_function" + let invoke_traced_function codeptr env arg = - Meta.invoke_traced_function codeptr env arg + caml_invoke_traced_function codeptr env arg let print_label ppf l = if l <> Asttypes.Nolabel then fprintf ppf "%s:" diff --git a/toplevel/topcommon.ml b/toplevel/topcommon.ml index 19d845767153..65cb5e0970b3 100644 --- a/toplevel/topcommon.ml +++ b/toplevel/topcommon.ml @@ -294,6 +294,7 @@ let set_paths ?(auto_include=Compmisc.auto_include) ?(dir="") () = let update_search_path_from_env () = let extra_paths = let env = Sys.getenv_opt "OCAMLTOP_INCLUDE_PATH" in + (* NB: Misc.split_path_contents "" = [] *) Option.fold ~none:[] ~some:Misc.split_path_contents env in Clflags.include_dirs := List.rev_append extra_paths !Clflags.include_dirs diff --git a/utils/ccomp.ml b/utils/ccomp.ml index d6818ef093a5..8effc9b289e7 100644 --- a/utils/ccomp.ml +++ b/utils/ccomp.ml @@ -153,16 +153,15 @@ let create_archive archive file_list = (quote_files ~response_files:Config.ar_supports_response_files file_list)) -let expand_libname cclibs = - cclibs |> List.map (fun cclib -> - if String.starts_with ~prefix:"-l" cclib then - let libname = - "lib" ^ String.sub cclib 2 (String.length cclib - 2) ^ Config.ext_lib in - try - Load_path.find libname - with Not_found -> - libname - else cclib) +let expand_libname cclib = + if String.starts_with ~prefix:"-l" cclib then + let libname = + "lib" ^ String.sub cclib 2 (String.length cclib - 2) ^ Config.ext_lib in + try + Some (Load_path.find libname) + with Not_found -> + None + else Some cclib type link_mode = | Exe @@ -182,11 +181,16 @@ let call_linker mode output_name files extra = Profile.record_call "c-linker" (fun () -> let cmd = if mode = Partial then - let (l_prefix, files) = + let l_prefix = match Config.ccomp_type with - | "msvc" -> ("/libpath:", expand_libname files) - | _ -> ("-L", files) + | "msvc" -> "/libpath:" + | _ -> "-L" in + (* For partial linking, only include -llib if -llib can be found in the + current search path. For ld -r, this PATH is (usually) limited to the + -L directories. This should cause OCaml libraries to be linked, but + not any system libraries mentioned in .cma/.cmxa files. *) + let files = List.filter_map expand_libname files in Printf.sprintf "%s%s %s %s %s" Config.native_pack_linker (Filename.quote output_name) diff --git a/utils/clflags.ml b/utils/clflags.ml index 4c7e6fd020bb..0b45ee819436 100644 --- a/utils/clflags.ml +++ b/utils/clflags.ml @@ -50,6 +50,8 @@ let compile_only = ref false (* -c *) and output_name = ref (None : string option) (* -o *) and include_dirs = ref ([] : string list) (* -I *) and hidden_include_dirs = ref ([] : string list) (* -H *) +and runtime_parameters = (* -set-runtime-default *) + (Hashtbl.create 16 : (string, string) Hashtbl.t) and standard_library_default = ref None (* -set-runtime-default *) and no_std_include = ref false (* -nostdlib *) and no_cwd = ref false (* -nocwd *) diff --git a/utils/clflags.mli b/utils/clflags.mli index 8d12d167f73c..ee46f6b8dadd 100644 --- a/utils/clflags.mli +++ b/utils/clflags.mli @@ -76,6 +76,7 @@ val compile_only : bool ref val output_name : string option ref val include_dirs : string list ref val hidden_include_dirs : string list ref +val runtime_parameters : (string, string) Hashtbl.t val standard_library_default : string option ref val no_std_include : bool ref val no_cwd : bool ref diff --git a/utils/config.common.ml.in b/utils/config.common.ml.in index b1545d86c946..7cabf2091e32 100644 --- a/utils/config.common.ml.in +++ b/utils/config.common.ml.in @@ -43,13 +43,16 @@ let standard_library_relative = Some standard_library_default_raw let standard_library = - try - Sys.getenv "OCAMLLIB" - with Not_found -> - try - Sys.getenv "CAMLLIB" - with Not_found -> - standard_library_default + match Sys.getenv_opt "OCAMLLIB" with + | None | Some "" -> + begin match Sys.getenv_opt "CAMLLIB" with + | None | Some "" -> + standard_library_default + | Some value -> + value + end + | Some value -> + value let bindir = Option.value ~default:bindir relative_root_dir let target_bindir = diff --git a/utils/misc.ml b/utils/misc.ml index 9c338984728b..7d7721a65ce1 100644 --- a/utils/misc.ml +++ b/utils/misc.ml @@ -579,7 +579,7 @@ let path_separator = let split_path_contents ?(sep = path_separator) = function | "" -> [] - | s -> String.split_on_char sep s + | s -> List.filter ((<>) "") (String.split_on_char sep s) (* Hashtable functions *) diff --git a/utils/misc.mli b/utils/misc.mli index 6b4be202c0cd..717fb7704294 100644 --- a/utils/misc.mli +++ b/utils/misc.mli @@ -246,7 +246,9 @@ val split_path_contents: ?sep:char -> string -> string list directories. [s] is split using the platform-specific delimiter, or [~sep] if it is passed. - Returns the empty list if [s] is empty. *) + Returns the empty list if [s] is empty. Empty strings are filtered + out - include [Filename.current_dir_name] explicitly if the current + working directory is supposed to be searched. *) val copy_file: in_channel -> out_channel -> unit (** [copy_file ic oc] reads the contents of file [ic] and copies diff --git a/yacc/main.c b/yacc/main.c index 59cf02d5c223..6dead9327764 100644 --- a/yacc/main.c +++ b/yacc/main.c @@ -290,14 +290,18 @@ void create_file_names(void) #ifdef _WIN32 tmpdir = _wgetenv(L"TEMP"); - if (tmpdir == 0) tmpdir = L"."; + /* Ensure tmpdir is neither NULL nor zero-length */ + if (tmpdir == 0 || *tmpdir == 0) tmpdir = L"."; #else tmpdir = getenv("TMPDIR"); - if (tmpdir == 0) tmpdir = "/tmp"; + /* Write to /tmp instead of . if TMPDIR is "Set But Null" (also ensures + tmpdir is neither NULL nor zero-length */ + if (tmpdir == 0 || *tmpdir == 0) tmpdir = "/tmp"; #endif len = strlen_os(tmpdir); i = len + sizeof(temp_form); - if (len && tmpdir[len-1] != dirsep) + /* Technically, tmpdir != NULL && *tmpdir != 0 - i.e. len > 0 */ + if (tmpdir[len-1] != dirsep) ++i; action_file_name = MALLOC(i * sizeof(char_os)); @@ -311,7 +315,7 @@ void create_file_names(void) strcpy_os(entry_file_name, tmpdir); strcpy_os(text_file_name, tmpdir); - if (len && tmpdir[len - 1] != dirsep) + if (tmpdir[len - 1] != dirsep) { action_file_name[len] = dirsep; entry_file_name[len] = dirsep;