From 9cb14796418302c551cec2ceac13287152bb953b Mon Sep 17 00:00:00 2001 From: Maor Leger Date: Thu, 4 Jun 2026 20:54:11 +0000 Subject: [PATCH] chore: drop command-line-args/usage and @swc/helpers from runtime deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I noticed that everything we list under root `dependencies` gets propagated verbatim into every published per-target package.json via `gulp/package-task.js`, so each entry is something every downstream `npm install apache-arrow` has to pull in. This PR trims that list from 9 down to 4 (`@types/node`, `flatbuffers`, `json-with-bigint`, `tslib` stay). Two pieces here: Moving to devDependencies. The published library and CLI are both compiled by TSC (`gulp-typescript` + `tsconfig.bin.cjs.json`), and `importHelpers: true` in our tsconfig means the emitted code imports from `tslib`, never `@swc/helpers` — `grep -r '@swc/helpers' targets/` on a freshly built tree returns zero matches in any `.js`/`.mjs`/`.d.ts`. The only path that touches SWC at all is the dev-time bin shebangs that go through `@swc-node/register/esm-register`, and that path keeps working with `@swc/helpers` in devDeps (verified by removing it from `dependencies`, blowing away node_modules + the lockfile, reinstalling, and running both the test suite and `bin/file-to-stream.ts` end-to-end). I considered deleting it entirely since `@swc-node/register` indirectly pulls it in, but keeping it as an explicit devDep felt safer in case upstream changes that. Per the [SWC docs](https://swc.rs/docs/configuration/compilation#jscexternalhelpers), `externalHelpers` is a bundle-size knob — it doesn't enable any features — and most of our source needs zero helper emissions anyway because the targets are modern enough that `async`/`await`/`class extends` compile to themselves. The only piece of the published package that actually needs an argv parser is the `arrow2csv` CLI. `node:util.parseArgs` has been stable since Node 18.3, which is well below our `engines.node: '>=20.0'` floor, so we don't strictly need the two libraries (plus their transitive trees of `chalk`, `lodash`, `table-layout`, etc.) to be installed alongside `apache-arrow` for every consumer. This PR adds a small internal helper at `src/bin/cli.ts` that wraps `parseArgs` and provides a plain-text `formatUsage`. The one tricky bit was that `command-line-args` treats `multiple: true` greedily — `-s a b c` puts all three into `schema` — while `parseArgs` only honours the repeated-flag form. I wanted to keep `arrow2csv` behaviour identical, so the helper opts in to `tokens: true` and reattributes positionals that follow a `multiple: true` flag back to that flag's array. The unit tests in `test/unit/bin/cli-tests.ts` lock in the greedy and non-greedy cases side-by-side. The dev-only `bin/integration.ts`, `bin/json-to-arrow.ts`, and `gulp/argv.js` get migrated over in the same pass. The new helper is intentionally not re-exported from `src/Arrow.ts`, so it isn't public API. A few small things that `command-line-args` and `command-line-usage` support that this helper doesn't (none currently used by anything in `arrow2csv`): - Custom `type: (val) => ...` coercion functions - `lazyMultiple`, `defaultOption`, `group`, `camelCase`, `stopAtFirstUnknown` - ANSI styling (`{bold ...}` / `{underline ...}` markers are stripped to plain text rather than coloured) - Text wrapping of descriptions and `typeLabel` rendering in the help screen — the `typeLabel` field is accepted on the spec but not rendered. None of our help text is wide enough to need wrapping. Happy to add any of these back if reviewers prefer; my read is that the unused-feature surface isn't worth re-implementing. Validation: - `npm run lint:ci` clean - `npm test -- -t src` passes (46 suites, ~11.7k tests, plus the new cli-tests.ts) - `npm run build` succeeds for all 10 target/format combos - `npm run test:bundle` (esbuild + rollup + webpack) all run clean - End-to-end smoke tests of `node targets/apache-arrow/bin/arrow2csv.js` pass for `--help`, `-f `, greedy `-s id name -f `, and `--sep ' , '` --- bin/integration.ts | 13 ++-- bin/json-to-arrow.ts | 11 +-- gulp/argv.js | 36 ++++++--- package-lock.json | 141 ++-------------------------------- package.json | 6 +- src/bin/arrow2csv.ts | 11 +-- src/bin/cli.ts | 114 ++++++++++++++++++++++++++++ test/unit/bin/cli-tests.ts | 151 +++++++++++++++++++++++++++++++++++++ 8 files changed, 318 insertions(+), 165 deletions(-) create mode 100644 src/bin/cli.ts create mode 100644 test/unit/bin/cli-tests.ts diff --git a/bin/integration.ts b/bin/integration.ts index d1ba87c9..de7101a4 100755 --- a/bin/integration.ts +++ b/bin/integration.ts @@ -21,7 +21,7 @@ import * as fs from 'node:fs'; import * as Path from 'node:path'; import { glob } from 'glob'; import { zip } from 'ix/iterable/zip'; -import commandLineArgs from 'command-line-args'; +import { parseCliArgs, formatUsage } from '../src/bin/cli.ts'; import { parseArrowJSON } from '../src/util/json.ts'; import { @@ -35,7 +35,7 @@ import { } from '../index.ts'; const { createElementComparator } = util; -const argv = commandLineArgs(cliOpts(), { partial: true }); +const { values: argv, positionals } = parseCliArgs(cliOpts(), process.argv.slice(2)); const exists = async (p: string) => { try { @@ -47,9 +47,9 @@ const exists = async (p: string) => { if (!argv.mode) { return print_usage(); } - const mode = argv.mode.toUpperCase(); - let jsonPaths = [...(argv.json || [])]; - let arrowPaths = [...(argv.arrow || [])]; + const mode = (argv.mode as string).toUpperCase(); + let jsonPaths = [...(argv.json as string[] | undefined ?? [])]; + let arrowPaths = [...(argv.arrow as string[] | undefined ?? [])]; if (mode === 'VALIDATE' && jsonPaths.length === 0) { [jsonPaths, arrowPaths] = await loadLocalJSONAndArrowPathsForDebugging(jsonPaths, arrowPaths); @@ -104,7 +104,7 @@ function cliOpts() { } function print_usage() { - console.log(require('command-line-usage')([ + console.log(formatUsage([ { header: 'integration', content: 'Script for running Arrow integration tests' @@ -121,6 +121,7 @@ function print_usage() { ...cliOpts(), { name: 'help', + type: Boolean, description: 'Print this usage guide.' } ] diff --git a/bin/json-to-arrow.ts b/bin/json-to-arrow.ts index 2600964d..edfd4cfa 100755 --- a/bin/json-to-arrow.ts +++ b/bin/json-to-arrow.ts @@ -19,14 +19,14 @@ import * as fs from 'node:fs'; import * as Path from 'node:path'; -import commandLineArgs from 'command-line-args'; +import { parseCliArgs, formatUsage } from '../src/bin/cli.ts'; import { finished as eos } from 'node:stream/promises'; import { parseArrowJSON } from '../src/util/json.ts'; import { RecordBatchReader, RecordBatchFileWriter, RecordBatchStreamWriter } from '../index.ts'; -const argv = commandLineArgs(cliOpts(), { partial: true }); -const jsonPaths = [...(argv.json || [])]; -const arrowPaths = [...(argv.arrow || [])]; +const { values: argv } = parseCliArgs(cliOpts(), process.argv.slice(2)); +const jsonPaths = [...(argv.json as string[] | undefined ?? [])]; +const arrowPaths = [...(argv.arrow as string[] | undefined ?? [])]; (async () => { @@ -81,7 +81,7 @@ function cliOpts() { } function print_usage() { - console.log(require('command-line-usage')([ + console.log(formatUsage([ { header: 'json-to-arrow', content: 'Script for converting a JSON Arrow file to a binary Arrow file' @@ -98,6 +98,7 @@ function print_usage() { ...cliOpts(), { name: 'help', + type: Boolean, description: 'Print this usage guide.' } ] diff --git a/gulp/argv.js b/gulp/argv.js index 967644ea..34746939 100644 --- a/gulp/argv.js +++ b/gulp/argv.js @@ -15,18 +15,32 @@ // specific language governing permissions and limitations // under the License. -import args from 'command-line-args'; -export const argv = args([ - { name: `all`, type: Boolean }, - { name: 'verbose', alias: `v`, type: Boolean }, - { name: `target`, type: String, defaultValue: `` }, - { name: `module`, type: String, defaultValue: `` }, - { name: `coverage`, type: Boolean, defaultValue: false }, - { name: `tests`, type: String, multiple: true, defaultValue: [`test/unit/`] }, - { name: `targets`, alias: `t`, type: String, multiple: true, defaultValue: [] }, - { name: `modules`, alias: `m`, type: String, multiple: true, defaultValue: [] }, -], { partial: true }); +import { parseArgs } from 'node:util'; +const { values, tokens } = parseArgs({ + options: { + all: { type: 'boolean' }, + verbose: { type: 'boolean', short: 'v' }, + target: { type: 'string', default: '' }, + module: { type: 'string', default: '' }, + coverage: { type: 'boolean', default: false }, + tests: { type: 'string', multiple: true, default: ['test/unit/'] }, + targets: { type: 'string', short: 't', multiple: true, default: [] }, + modules: { type: 'string', short: 'm', multiple: true, default: [] }, + }, + args: process.argv.slice(2), + strict: false, + allowPositionals: true, + tokens: true, +}); + +// Expose unknown/positional tokens under `_unknown` for compatibility with +// the previous command-line-args partial mode used by gulp/test-task.js. +values._unknown = tokens + .filter((t) => t.kind === 'positional') + .map((t) => t.value); + +export const argv = values; export const { targets, modules } = argv; if (argv.target === `src`) { diff --git a/package-lock.json b/package-lock.json index 136737b2..6f5f64d9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,12 +9,7 @@ "version": "21.1.0", "license": "Apache-2.0", "dependencies": { - "@swc/helpers": "^0.5.11", - "@types/command-line-args": "^5.2.3", - "@types/command-line-usage": "^5.0.4", "@types/node": "^25.2.0", - "command-line-args": "^6.0.1", - "command-line-usage": "^7.0.1", "flatbuffers": "^25.1.24", "json-with-bigint": "^3.5.3", "tslib": "^2.6.2" @@ -29,6 +24,7 @@ "@rollup/stream": "3.0.1", "@swc-node/register": "1.11.1", "@swc/core": "1.15.32", + "@swc/helpers": "^0.5.11", "@types/benchmark": "2.1.5", "@types/glob": "8.1.0", "@types/jest": "29.5.14", @@ -3800,6 +3796,7 @@ "version": "0.5.19", "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.19.tgz", "integrity": "sha512-QamiFeIK3txNjgUTNppE6MiG3p7TdninpZu0E0PbqVh1a9FNLT2FRhisaa4NcaX52XVhA5l7Pk58Ft7Sqi/2sA==", + "dev": true, "license": "Apache-2.0", "dependencies": { "tslib": "^2.8.0" @@ -3878,18 +3875,6 @@ "dev": true, "license": "MIT" }, - "node_modules/@types/command-line-args": { - "version": "5.2.3", - "resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.3.tgz", - "integrity": "sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw==", - "license": "MIT" - }, - "node_modules/@types/command-line-usage": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/@types/command-line-usage/-/command-line-usage-5.0.4.tgz", - "integrity": "sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==", - "license": "MIT" - }, "node_modules/@types/eslint": { "version": "8.56.7", "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.56.7.tgz", @@ -4692,6 +4677,7 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, "license": "MIT", "dependencies": { "color-convert": "^2.0.1" @@ -4767,15 +4753,6 @@ "node": ">=0.10.0" } }, - "node_modules/array-back": { - "version": "6.2.3", - "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.3.tgz", - "integrity": "sha512-SGDvmg6QTYiTxCBkYVmThcoa67uLl35pyzRHdpCGBOcqFy6BtwnphoFPk7LhJshD+Yk1Kt35WGWeZPTgwR4Fhw==", - "license": "MIT", - "engines": { - "node": ">=12.17" - } - }, "node_modules/array-each": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/array-each/-/array-each-1.0.1.tgz", @@ -5360,6 +5337,7 @@ "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, "license": "MIT", "dependencies": { "ansi-styles": "^4.1.0", @@ -5372,21 +5350,6 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, - "node_modules/chalk-template": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz", - "integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==", - "license": "MIT", - "dependencies": { - "chalk": "^4.1.2" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/chalk-template?sponsor=1" - } - }, "node_modules/char-regex": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/char-regex/-/char-regex-1.0.2.tgz", @@ -5651,6 +5614,7 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, "license": "MIT", "dependencies": { "color-name": "~1.1.4" @@ -5663,6 +5627,7 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, "license": "MIT" }, "node_modules/color-support": { @@ -5682,44 +5647,6 @@ "dev": true, "license": "MIT" }, - "node_modules/command-line-args": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-6.0.2.tgz", - "integrity": "sha512-AIjYVxrV9X752LmPDLbVYv8aMCuHPSLZJXEo2qo/xJfv+NYhaZ4sMSF01rM+gHPaMgvPM0l5D/F+Qx+i2WfSmQ==", - "license": "MIT", - "dependencies": { - "array-back": "^6.2.3", - "find-replace": "^5.0.2", - "lodash.camelcase": "^4.3.0", - "typical": "^7.3.0" - }, - "engines": { - "node": ">=12.20" - }, - "peerDependencies": { - "@75lb/nature": "latest" - }, - "peerDependenciesMeta": { - "@75lb/nature": { - "optional": true - } - } - }, - "node_modules/command-line-usage": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.3.tgz", - "integrity": "sha512-PqMLy5+YGwhMh1wS04mVG44oqDsgyLRSKJBdOo1bnYhMKBW65gZF1dRp2OZRhiTjgUHljy99qkO7bsctLaw35Q==", - "license": "MIT", - "dependencies": { - "array-back": "^6.2.2", - "chalk-template": "^0.4.0", - "table-layout": "^4.1.0", - "typical": "^7.1.1" - }, - "engines": { - "node": ">=12.20.0" - } - }, "node_modules/commander": { "version": "6.2.1", "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz", @@ -6956,23 +6883,6 @@ "node": ">=8" } }, - "node_modules/find-replace": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/find-replace/-/find-replace-5.0.2.tgz", - "integrity": "sha512-Y45BAiE3mz2QsrN2fb5QEtO4qb44NcS7en/0y9PEVsg351HsLeVclP8QPMH79Le9sH3rs5RSwJu99W0WPZO43Q==", - "license": "MIT", - "engines": { - "node": ">=14" - }, - "peerDependencies": { - "@75lb/nature": "latest" - }, - "peerDependenciesMeta": { - "@75lb/nature": { - "optional": true - } - } - }, "node_modules/find-up": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", @@ -8262,6 +8172,7 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -10084,12 +9995,6 @@ "dev": true, "license": "MIT" }, - "node_modules/lodash.camelcase": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", - "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==", - "license": "MIT" - }, "node_modules/lodash.clone": { "version": "4.5.0", "resolved": "https://registry.npmjs.org/lodash.clone/-/lodash.clone-4.5.0.tgz", @@ -12300,6 +12205,7 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, "license": "MIT", "dependencies": { "has-flag": "^4.0.0" @@ -12342,19 +12248,6 @@ "semver": "bin/semver.js" } }, - "node_modules/table-layout": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-4.1.1.tgz", - "integrity": "sha512-iK5/YhZxq5GO5z8wb0bY1317uDF3Zjpha0QFFLA8/trAoiLbQD0HUbMesEaxyzUgDxi2QlcbM8IvqOlEjgoXBA==", - "license": "MIT", - "dependencies": { - "array-back": "^6.2.2", - "wordwrapjs": "^5.1.0" - }, - "engines": { - "node": ">=12.17" - } - }, "node_modules/tapable": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz", @@ -12950,15 +12843,6 @@ "typescript": ">=4.8.4 <6.0.0" } }, - "node_modules/typical": { - "version": "7.3.0", - "resolved": "https://registry.npmjs.org/typical/-/typical-7.3.0.tgz", - "integrity": "sha512-ya4mg/30vm+DOWfBg4YK3j2WD6TWtRkCbasOJr40CseYENzCUby/7rIvXA99JGsQHeNxLbnXdyLLxKSv3tauFw==", - "license": "MIT", - "engines": { - "node": ">=12.17" - } - }, "node_modules/uc.micro": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz", @@ -13557,15 +13441,6 @@ "node": ">= 8" } }, - "node_modules/wordwrapjs": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz", - "integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg==", - "license": "MIT", - "engines": { - "node": ">=12.17" - } - }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", diff --git a/package.json b/package.json index d78d1dff..0a666c9d 100644 --- a/package.json +++ b/package.json @@ -46,12 +46,7 @@ "jest.config.js" ], "dependencies": { - "@swc/helpers": "^0.5.11", - "@types/command-line-args": "^5.2.3", - "@types/command-line-usage": "^5.0.4", "@types/node": "^25.2.0", - "command-line-args": "^6.0.1", - "command-line-usage": "^7.0.1", "flatbuffers": "^25.1.24", "json-with-bigint": "^3.5.3", "tslib": "^2.6.2" @@ -63,6 +58,7 @@ "@rollup/stream": "3.0.1", "@swc-node/register": "1.11.1", "@swc/core": "1.15.32", + "@swc/helpers": "^0.5.11", "@types/benchmark": "2.1.5", "@types/glob": "8.1.0", "@types/jest": "29.5.14", diff --git a/src/bin/arrow2csv.ts b/src/bin/arrow2csv.ts index 2835b257..314e9472 100755 --- a/src/bin/arrow2csv.ts +++ b/src/bin/arrow2csv.ts @@ -23,12 +23,13 @@ import * as fs from 'node:fs'; import * as stream from 'node:stream'; import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue, util } from '../Arrow.js'; -import * as commandLineUsage from 'command-line-usage'; -import * as commandLineArgs from 'command-line-args'; +import { parseCliArgs, formatUsage } from './cli.js'; import { parseArrowJSON } from '../util/json.js'; -const argv = commandLineArgs(cliOpts(), { partial: true }); -const files = argv.help ? [] : [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean); +const { values: argv, positionals } = parseCliArgs(cliOpts(), process.argv.slice(2)); +const files = argv.help + ? [] + : [...(argv.file as string[] | undefined ?? []), ...positionals].filter(Boolean); const state = { ...argv, closed: false, maxColWidths: [10] }; @@ -301,7 +302,7 @@ function cliOpts() { } function print_usage() { - console.log(commandLineUsage([ + console.log(formatUsage([ { header: 'arrow2csv', content: 'Print a CSV from an Arrow file' diff --git a/src/bin/cli.ts b/src/bin/cli.ts new file mode 100644 index 00000000..4b6e0f4f --- /dev/null +++ b/src/bin/cli.ts @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Thin wrapper over `node:util.parseArgs` for the apache-arrow bin scripts. +// Replaces `command-line-args` + `command-line-usage` to drop two runtime deps +// (and their transitive trees) from the published package. + +import { parseArgs, type ParseArgsConfig } from 'node:util'; + +export interface OptionSpec { + name: string; + alias?: string; + type: StringConstructor | BooleanConstructor | NumberConstructor; + multiple?: boolean; + defaultValue?: unknown; + description?: string; + typeLabel?: string; + // Accepted for source-compat with the previous command-line-args specs; ignored. + optional?: boolean; + default?: unknown; +} + +export function parseCliArgs(spec: OptionSpec[], args: string[]) { + const options: NonNullable = {}; + for (const o of spec) { + options[o.name] = { + type: o.type === Boolean ? 'boolean' : 'string', + ...(o.alias && { short: o.alias }), + ...(o.multiple && { multiple: true }), + ...(o.defaultValue !== undefined && { default: o.defaultValue as never }), + }; + } + + const { values, tokens } = parseArgs({ + options, args, strict: false, allowPositionals: true, tokens: true, + }); + + // `parseArgs` only honours repeated multi-flags (`-s a -s b`). To match + // command-line-args' greedy behaviour (`-s a b c`), walk the token stream + // and route positionals to the most recently seen `multiple: true` flag + // until another option appears. + const multi = new Set(spec.filter((o) => o.multiple).map((o) => o.name)); + const out = values as Record; + const positionals: string[] = []; + let owner: string | null = null; + for (const tok of tokens) { + if (tok.kind === 'option') { + owner = multi.has(tok.name) ? tok.name : null; + } else if (tok.kind === 'option-terminator') { + owner = null; + } else if (tok.kind === 'positional') { + if (owner) { + const list = (out[owner] as string[] | undefined) ?? []; + out[owner] = [...list, tok.value]; + } else { + positionals.push(tok.value); + } + } + } + + // Coerce Number-typed values; parseArgs only parses as string or boolean. + for (const o of spec) { + if (o.type !== Number) continue; + const v = out[o.name]; + if (Array.isArray(v)) out[o.name] = v.map(Number); + else if (typeof v === 'string') out[o.name] = Number(v); + } + + return { values: out, positionals }; +} + +export interface UsageSection { + header: string; + content?: string | string[]; + optionList?: OptionSpec[]; +} + +// Drops the {bold ...} / {underline ...} chalk markup that command-line-usage +// recognised, so existing call sites can keep their content strings unchanged. +const stripStyles = (s: string) => s.replaceAll(/\{(?:bold|underline)\s+(.*?)\}/g, '$1'); + +const formatOptionHead = (o: OptionSpec) => + `${o.alias ? `-${o.alias}, ` : ' '}--${o.name}`; + +export function formatUsage(sections: UsageSection[]): string { + const out: string[] = []; + for (const s of sections) { + out.push('', stripStyles(s.header), ''); + const content = typeof s.content === 'string' ? [s.content] : s.content ?? []; + for (const line of content) out.push(' ' + stripStyles(line)); + if (s.optionList?.length) { + const heads = s.optionList.map((o) => formatOptionHead(o)); + const width = Math.max(...heads.map((h) => h.length)); + for (const [i, o] of s.optionList.entries()) { + out.push(' ' + heads[i].padEnd(width + 2) + (o.description ?? '')); + } + } + } + return out.join('\n'); +} diff --git a/test/unit/bin/cli-tests.ts b/test/unit/bin/cli-tests.ts new file mode 100644 index 00000000..41bc2f74 --- /dev/null +++ b/test/unit/bin/cli-tests.ts @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { parseCliArgs, formatUsage, OptionSpec } from '../../../src/bin/cli.js'; + +describe(`parseCliArgs`, () => { + const spec: OptionSpec[] = [ + { name: 'schema', alias: 's', type: String, multiple: true }, + { name: 'file', alias: 'f', type: String, multiple: true }, + { name: 'sep', type: String, defaultValue: ' | ' }, + { name: 'count', type: Number }, + { name: 'metadata', alias: 'm', type: Boolean }, + { name: 'help', type: Boolean }, + ]; + + test('parses a boolean long flag', () => { + const r = parseCliArgs(spec, ['--help']); + expect(r.values.help).toBe(true); + expect(r.positionals).toEqual([]); + }); + + test('parses a short alias', () => { + const r = parseCliArgs(spec, ['-m']); + expect(r.values.metadata).toBe(true); + }); + + test('parses repeated multi-flag', () => { + const r = parseCliArgs(spec, ['-s', 'a', '-s', 'b', '-s', 'c']); + expect(r.values.schema).toEqual(['a', 'b', 'c']); + expect(r.positionals).toEqual([]); + }); + + test('parses space-delimited multi-flag (greedy)', () => { + const r = parseCliArgs(spec, ['-s', 'a', 'b', 'c']); + expect(r.values.schema).toEqual(['a', 'b', 'c']); + expect(r.positionals).toEqual([]); + }); + + test('greedy attribution stops at the next flag', () => { + const r = parseCliArgs(spec, ['-s', 'a', 'b', 'c', '-f', 'x.arrow']); + expect(r.values.schema).toEqual(['a', 'b', 'c']); + expect(r.values.file).toEqual(['x.arrow']); + expect(r.positionals).toEqual([]); + }); + + test('greedy attribution spans across multi-flags', () => { + const r = parseCliArgs(spec, ['-s', 'a', 'b', 'c', '-f', 'x.arrow', 'pos.arrow']); + expect(r.values.schema).toEqual(['a', 'b', 'c']); + expect(r.values.file).toEqual(['x.arrow', 'pos.arrow']); + expect(r.positionals).toEqual([]); + }); + + test('positionals with no preceding multi-flag stay in positionals', () => { + const r = parseCliArgs(spec, ['file1.arrow', 'file2.arrow']); + expect(r.values.schema).toBeUndefined(); + expect(r.positionals).toEqual(['file1.arrow', 'file2.arrow']); + }); + + test('default values are populated when flag is absent', () => { + const r = parseCliArgs(spec, []); + expect(r.values.sep).toBe(' | '); + }); + + test('user-supplied value overrides default', () => { + const r = parseCliArgs(spec, ['--sep', ' , ']); + expect(r.values.sep).toBe(' , '); + }); + + test('unknown flags do not throw, and break greedy attribution', () => { + const r = parseCliArgs(spec, ['-s', 'a', '--unknown', 'b', '-f', 'x.arrow']); + expect(r.values.schema).toEqual(['a']); + expect(r.values.unknown).toBe(true); + expect(r.positionals).toEqual(['b']); + expect(r.values.file).toEqual(['x.arrow']); + }); + + test('Number type is coerced from string', () => { + const r = parseCliArgs(spec, ['--count', '42']); + expect(r.values.count).toBe(42); + expect(typeof r.values.count).toBe('number'); + }); + + test('combined: typical arrow2csv invocation', () => { + const r = parseCliArgs(spec, ['-s', 'foo', 'bar', '-f', 'simple.arrow', '-m']); + expect(r.values.schema).toEqual(['foo', 'bar']); + expect(r.values.file).toEqual(['simple.arrow']); + expect(r.values.metadata).toBe(true); + }); +}); + +describe(`formatUsage`, () => { + test('renders header and content sections', () => { + const out = formatUsage([ + { header: 'arrow2csv', content: 'Print a CSV from an Arrow file' }, + ]); + expect(out).toContain('arrow2csv'); + expect(out).toContain('Print a CSV from an Arrow file'); + }); + + test('strips {bold ...} and {underline ...} markup', () => { + const out = formatUsage([ + { header: 'Synopsis', content: ['$ arrow2csv {bold -s} col1 {underline file.arrow}'] }, + ]); + expect(out).toContain('$ arrow2csv -s col1 file.arrow'); + expect(out).not.toMatch(/\{bold/); + expect(out).not.toMatch(/\{underline/); + }); + + test('renders an option list with aliases and descriptions', () => { + const out = formatUsage([ + { + header: 'Options', + optionList: [ + { name: 'schema', alias: 's', type: String, multiple: true, description: 'Column names' }, + { name: 'help', type: Boolean, description: 'Print this usage guide.' }, + ], + }, + ]); + expect(out).toContain('-s, --schema'); + expect(out).toContain('Column names'); + expect(out).toContain('--help'); + expect(out).toContain('Print this usage guide.'); + }); + + test('option list renders correctly when no options have aliases', () => { + const out = formatUsage([ + { + header: 'Options', + optionList: [ + { name: 'mode', type: String, description: 'The mode' }, + ], + }, + ]); + expect(out).toContain('--mode'); + expect(out).toContain('The mode'); + }); +});