diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c18ad659..32ce9127a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. - Publish and document missing JSON schemas. Add `head_schema_url`/`baseline_schema_url` to diff v2. ([#1106](https://github.com/open-telemetry/weaver/pull/1106) by @lmolkova) - Add `--allow-git-credentials` global flag to enable system credential helpers (e.g. `osxkeychain`, `git-credential-manager`) when cloning private registries. By default, git operations remain isolated for security. ([#1306](https://github.com/open-telemetry/weaver/pull/1306) by @jerbly) - MCP: Add `browse_namespace` tool and `findings_only` output mode for `live_check`. Add configurable namespace separator via `--namespace-separator`. ([#1324](https://github.com/open-telemetry/weaver/pull/1324) by @jerbly) +- New feature ([#1344](https://github.com/open-telemetry/weaver/issues/1344)) - Support authenticated HTTP downloads of remote registries via `WEAVER_HTTP_AUTH_TOKEN` or `GITHUB_TOKEN` env vars, including GitHub private release assets. ([#1356](https://github.com/open-telemetry/weaver/pull/1356) by @jerbly) # [0.22.1] - 2026-03-13 diff --git a/crates/weaver_common/src/lib.rs b/crates/weaver_common/src/lib.rs index fecbe92fe..494718a78 100644 --- a/crates/weaver_common/src/lib.rs +++ b/crates/weaver_common/src/lib.rs @@ -81,6 +81,15 @@ pub enum Error { error: String, }, + /// A remote file download failed. + #[error("Failed to download remote file `{url}`: {error}")] + RemoteFileDownloadFailed { + /// The URL that was being downloaded + url: String, + /// The error message + error: String, + }, + /// An invalid registry archive. #[error("This archive `{archive}` is not supported. Supported formats are: .tar.gz, .zip")] UnsupportedRegistryArchive { diff --git a/crates/weaver_common/src/test.rs b/crates/weaver_common/src/test.rs index 55a92c1d2..7e9ad2462 100644 --- a/crates/weaver_common/src/test.rs +++ b/crates/weaver_common/src/test.rs @@ -3,6 +3,8 @@ //! HTTP server for testing purposes. use std::path::PathBuf; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; use rouille::{match_assets, Server}; use std::sync::mpsc::Sender; @@ -14,20 +16,39 @@ pub struct HttpServerError { error: String, } -/// A struct that serves static files from a directory. -pub struct ServeStaticFiles { +/// Internal test HTTP server holding the kill switch and port. +struct TestHttpServer { kill_switch: Sender<()>, port: u16, } -impl Drop for ServeStaticFiles { - /// Stops the HTTP server. +impl Drop for TestHttpServer { fn drop(&mut self) { - // If we fail to kill the server, ignore it. let _ = self.kill_switch.send(()); } } +impl TestHttpServer { + fn new( + server: Server rouille::Response + Send + Sync + 'static>, + ) -> Self { + let port = server.server_addr().port(); + let (_, kill_switch) = server.stoppable(); + Self { kill_switch, port } + } + + fn port(&self) -> u16 { + self.port + } + + fn relative_path_to_url(&self, file: &str) -> String { + format!("http://127.0.0.1:{}/{}", self.port, file) + } +} + +/// A struct that serves static files from a directory. +pub struct ServeStaticFiles(TestHttpServer); + impl ServeStaticFiles { /// Creates a new HTTP server that serves static files from a directory. /// Note: This server is only available for testing purposes. @@ -39,22 +60,150 @@ impl ServeStaticFiles { .map_err(|e| HttpServerError { error: e.to_string(), })?; - let port = server.server_addr().port(); - let (_, kill_switch) = server.stoppable(); - Ok(Self { kill_switch, port }) + Ok(Self(TestHttpServer::new(server))) } /// Returns the port of the server. #[must_use] pub fn port(&self) -> u16 { - self.port + self.0.port() } /// Returns the URL of a file. /// The file path should be relative to the static path. #[must_use] pub fn relative_path_to_url(&self, file: &str) -> String { - format!("http://127.0.0.1:{}/{}", self.port, file) + self.0.relative_path_to_url(file) + } +} + +/// An HTTP server that requires Bearer token authentication to serve static files. +/// Returns 401 Unauthorized if the `Authorization: Bearer ` header is missing or wrong. +pub struct ServeStaticFilesWithAuth(TestHttpServer); + +impl ServeStaticFilesWithAuth { + /// Creates a new auth-checking HTTP server. + /// Only requests with `Authorization: Bearer ` will receive files. + pub fn from( + static_path: impl Into, + expected_token: impl Into, + ) -> Result { + let static_path = static_path.into(); + let expected_token = expected_token.into(); + let server = Server::new("127.0.0.1:0", move |request| { + let auth = request.header("Authorization").unwrap_or_default(); + let expected = format!("Bearer {expected_token}"); + if auth != expected { + return rouille::Response::text("Unauthorized").with_status_code(401); + } + match_assets(request, &static_path) + }) + .map_err(|e| HttpServerError { + error: e.to_string(), + })?; + Ok(Self(TestHttpServer::new(server))) + } + + /// Returns the port of the server. + #[must_use] + pub fn port(&self) -> u16 { + self.0.port() + } + + /// Returns the URL of a file. + #[must_use] + pub fn relative_path_to_url(&self, file: &str) -> String { + self.0.relative_path_to_url(file) + } +} + +/// A mock GitHub REST API server. +/// +/// Serves `GET /repos/{owner}/{repo}/releases/tags/{tag}` with a caller-provided +/// JSON body, and `GET /` with caller-provided binary content. Any +/// other path returns 404. +/// +/// Counts the number of requests it received so tests can assert caching behavior. +pub struct MockGitHubApi { + server: TestHttpServer, + request_count: Arc, +} +/// Description of a single release served by [`MockGitHubApi`]. +pub struct MockRelease { + /// `{owner}/{repo}/{tag}` path components. + pub owner: String, + /// The repository name. + pub repo: String, + /// The release tag. + pub tag: String, + /// The assets in the release: `(filename, content)` pairs. Each asset is + /// served at `/assets/{filename}` and the release JSON's `url` points to + /// that same path on this server. + pub assets: Vec<(String, Vec)>, +} + +impl MockGitHubApi { + /// Start a server serving the given releases. Returns an error if the + /// server fails to bind to a local port. + pub fn start(releases: Vec) -> Result { + let request_count = Arc::new(AtomicUsize::new(0)); + let counter = Arc::clone(&request_count); + let server = Server::new("127.0.0.1:0", move |request| { + _ = counter.fetch_add(1, Ordering::SeqCst); + let url = request.url(); + for release in &releases { + let tags_path = format!( + "/repos/{}/{}/releases/tags/{}", + release.owner, release.repo, release.tag + ); + if url == tags_path { + // Build a release JSON where each asset's `url` points at + // `/assets/{filename}` on this same server. + let host = request.header("Host").unwrap_or("127.0.0.1"); + let assets_json: Vec = release + .assets + .iter() + .map(|(name, _)| { + serde_json::json!({ + "name": name, + "url": format!("http://{host}/assets/{name}"), + }) + }) + .collect(); + let body = serde_json::json!({ "assets": assets_json }); + return rouille::Response::from_data("application/json", body.to_string()); + } + for (name, content) in &release.assets { + if url == format!("/assets/{name}") { + return rouille::Response::from_data( + "application/octet-stream", + content.clone(), + ); + } + } + } + rouille::Response::empty_404() + }) + .map_err(|e| HttpServerError { + error: e.to_string(), + })?; + Ok(Self { + server: TestHttpServer::new(server), + request_count, + }) + } + + /// Base URL of the mock API (e.g. `http://127.0.0.1:12345`). Pass this to + /// `normalize_github_url_with_api_base` in tests. + #[must_use] + pub fn base_url(&self) -> String { + format!("http://127.0.0.1:{}", self.server.port()) + } + + /// Number of HTTP requests the server has handled. + #[must_use] + pub fn request_count(&self) -> usize { + self.request_count.load(Ordering::SeqCst) } } diff --git a/crates/weaver_common/src/vdir.rs b/crates/weaver_common/src/vdir.rs index 16f26d0a2..8634a958d 100644 --- a/crates/weaver_common/src/vdir.rs +++ b/crates/weaver_common/src/vdir.rs @@ -7,12 +7,24 @@ //! - A local filesystem directory. //! - A local archive file (`.tar.gz` or `.zip`). //! - A remote archive file (`.tar.gz` or `.zip`) accessible via HTTP(S). +//! - A remote individual file accessible via HTTP(S) (e.g. a published registry manifest). //! - A Git repository accessible via HTTP(S). //! //! It handles the fetching, extraction, and temporary storage management transparently. //! -//! It uses a specific string format to represent these sources, potentially including -//! Git refspecs (tags/branches/commits) or sub-folders within archives/repositories. +//! # HTTP Authentication +//! +//! Remote downloads (both archives and individual files) support Bearer token authentication. +//! Set the token at startup via [`set_http_auth_token`], typically from the +//! `WEAVER_HTTP_AUTH_TOKEN` or `GITHUB_TOKEN` environment variable. When a token is configured, +//! all HTTP requests include `Authorization: Bearer ` and `User-Agent: weaver` headers. +//! +//! For GitHub private release assets, browser-style download URLs +//! (`https://github.com/{owner}/{repo}/releases/download/{tag}/{file}`) are automatically +//! normalized to GitHub API asset URLs, since the browser URLs do not support token-based +//! authentication. This applies to both individual files (e.g. `manifest.yaml`) and +//! archive assets (`.zip`, `.tar.gz`). The API release metadata is cached per release to +//! avoid redundant calls when downloading multiple assets from the same release. //! //! # String Format //! @@ -32,12 +44,29 @@ //! - Local archive with sub-folder: `data.zip[specific_dir]` //! - Git repo (default branch): `https://github.com/user/repo.git` //! - Git repo (tag `v1.0`, sub-folder `schemas`): `https://github.com/user/repo.git@v1.0[schemas]` +//! - Git repo without `.git` suffix (inferred from `@refspec` or `[sub_folder]`): +//! `https://github.com/user/repo@v1.0[schemas]` //! - Remote archive: `https://example.com/archive.tar.gz` //! - Remote archive with sub-folder: `https://example.com/archive.zip[data/files]` - -use crate::vdir::VirtualDirectoryPath::{GitRepo, LocalArchive, LocalFolder, RemoteArchive}; +//! - Remote file: `https://example.com/registry/manifest.yaml` +//! - GitHub release asset: `https://github.com/org/repo/releases/download/v1.0.0/manifest.yaml` +//! +//! # Disambiguating HTTP(S) URLs +//! +//! An HTTP(S) `source` is classified as follows (in order): +//! 1. `.zip` or `.tar.gz` suffix → remote archive (may carry a `[sub_folder]`). +//! 2. `.git` suffix, or presence of `@refspec` or `[sub_folder]` → Git repo. Once +//! archives are ruled out, a `@refspec` or `[sub_folder]` is a reliable signal +//! of a Git repo, so the `.git` suffix is not required. +//! 3. Otherwise → remote file. + +use crate::vdir::VirtualDirectoryPath::{ + GitRepo, LocalArchive, LocalFolder, RemoteArchive, RemoteFile, +}; use crate::Error; -use crate::Error::{GitError, InvalidRegistryArchive, UnsupportedRegistryArchive}; +use crate::Error::{ + GitError, InvalidRegistryArchive, RemoteFileDownloadFailed, UnsupportedRegistryArchive, +}; use gix::clone::PrepareFetch; use gix::create::Kind; use gix::remote::fetch::Shallow; @@ -47,6 +76,7 @@ use regex::Regex; use rouille::url::Url; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::fmt::Display; use std::fs::{create_dir_all, File}; use std::io; @@ -54,8 +84,10 @@ use std::num::NonZeroU32; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::atomic::AtomicBool; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use tempfile::TempDir; +use ureq::config::{Config, RedirectAuthHeaders}; +use ureq::Agent; /// When true, git clone operations use `open::Options::default()` which reads /// global/system git config and enables credential helpers for private repos. @@ -76,6 +108,174 @@ pub fn is_git_credentials_enabled() -> bool { ALLOW_GIT_CREDENTIALS.load(std::sync::atomic::Ordering::Relaxed) } +/// Optional Bearer token for authenticating HTTP requests when downloading +/// remote archives and files. Set via `set_http_auth_token()`, typically from +/// the `WEAVER_HTTP_AUTH_TOKEN` or `GITHUB_TOKEN` environment variable at startup. +static HTTP_AUTH_TOKEN: Mutex> = Mutex::new(None); + +/// Set the Bearer token used for HTTP authentication when downloading remote archives. +pub fn set_http_auth_token(token: String) { + *HTTP_AUTH_TOKEN + .lock() + .expect("HTTP_AUTH_TOKEN mutex poisoned") = Some(token); +} + +/// Returns the configured HTTP auth token, if any. +#[must_use] +pub fn http_auth_token() -> Option { + HTTP_AUTH_TOKEN + .lock() + .expect("HTTP_AUTH_TOKEN mutex poisoned") + .clone() +} + +/// Shared ureq [`Agent`] configured for authenticated HTTP downloads. +/// +/// Uses `RedirectAuthHeaders::SameHost` so that the `Authorization` header +/// is preserved across same-host redirects (needed for GitHub API asset +/// downloads that redirect within `*.github.com`) but stripped on +/// cross-origin redirects. The agent is shared so that connection pooling +/// benefits multiple downloads in the same run. +static HTTP_AGENT: Lazy = Lazy::new(|| { + Config::builder() + .max_redirects(10) + .redirect_auth_headers(RedirectAuthHeaders::SameHost) + .build() + .into() +}); + +/// Attaches Bearer auth and User-Agent headers to a request if a token is configured. +fn attach_auth(request: ureq::RequestBuilder) -> ureq::RequestBuilder { + let request = request.header("User-Agent", "weaver"); + match http_auth_token() { + Some(token) => request.header("Authorization", &format!("Bearer {token}")), + None => request, + } +} + +/// Download `url` into `save_path` with Bearer auth. GitHub browser-style +/// release URLs (for both individual files and archive assets) are transparently +/// normalized to API asset URLs so that token auth works for private repos. +fn download_to_file( + url: &str, + save_path: &Path, + map_err: impl Fn(String) -> Error, +) -> Result<(), Error> { + let resolved_url = normalize_github_url(url)?; + + let mut request = attach_auth(HTTP_AGENT.get(&resolved_url)); + // For GitHub API asset downloads, `Accept: application/octet-stream` + // triggers the redirect to the actual file content. + if resolved_url.starts_with("https://api.github.com/") { + request = request.header("Accept", "application/octet-stream"); + } + let response = request.call().map_err(|e| map_err(e.to_string()))?; + + let mut file = File::create(save_path).map_err(|e| map_err(e.to_string()))?; + _ = io::copy(&mut response.into_body().into_reader(), &mut file) + .map_err(|e| map_err(e.to_string()))?; + Ok(()) +} + +/// Cache for GitHub release API responses, keyed by `(owner, repo, tag)`. +/// Avoids duplicate API calls when multiple files are downloaded from the same release +/// (e.g. manifest.yaml then resolved.yaml). +static GITHUB_RELEASE_CACHE: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); + +/// If `url` is a GitHub browser-style release asset URL, resolve it to the +/// API asset URL (which accepts Bearer token auth). Any other URL is returned +/// unchanged. Release metadata is cached so that downloading multiple assets +/// from the same release only makes one API call. +/// +/// Browser form: `https://github.com/{owner}/{repo}/releases/download/{tag}/{filename}` +/// API form: `https://api.github.com/repos/{owner}/{repo}/releases/assets/{id}` +fn normalize_github_url(url: &str) -> Result { + normalize_github_url_with_api_base(url, "https://api.github.com") +} + +/// Variant of [`normalize_github_url`] with a configurable API base URL for testing. +/// The `api_base` must not end with a trailing slash. +fn normalize_github_url_with_api_base(url: &str, api_base: &str) -> Result { + let Some((owner, repo, tag, filename)) = parse_github_release_url(url) else { + return Ok(url.to_owned()); + }; + let err = |msg: String| RemoteFileDownloadFailed { + url: url.to_owned(), + error: msg, + }; + + let cache_key = format!("{owner}/{repo}/{tag}"); + let release = { + let cache = GITHUB_RELEASE_CACHE + .lock() + .expect("GitHub release cache lock poisoned"); + cache.get(&cache_key).cloned() + }; + let release = if let Some(cached) = release { + cached + } else { + let api_url = format!("{api_base}/repos/{owner}/{repo}/releases/tags/{tag}"); + let req = attach_auth( + HTTP_AGENT + .get(&api_url) + .header("Accept", "application/vnd.github+json"), + ); + let body: String = req + .call() + .map_err(|e| err(format!("GitHub API request failed: {e}")))? + .into_body() + .read_to_string() + .map_err(|e| err(format!("Failed to read GitHub API response: {e}")))?; + let parsed: serde_json::Value = serde_json::from_str(&body) + .map_err(|e| err(format!("Failed to parse GitHub API response: {e}")))?; + _ = GITHUB_RELEASE_CACHE + .lock() + .expect("GitHub release cache lock poisoned") + .insert(cache_key, parsed.clone()); + parsed + }; + + find_asset_url(&release, filename, tag, url) +} + +/// Parse a GitHub browser-style release asset URL into its components. +/// Returns `None` if the URL does not match the expected pattern. +fn parse_github_release_url(url: &str) -> Option<(&str, &str, &str, &str)> { + let rest = url.strip_prefix("https://github.com/")?; + let parts: Vec<&str> = rest.splitn(6, '/').collect(); + if parts.len() != 6 || parts[2] != "releases" || parts[3] != "download" { + return None; + } + Some((parts[0], parts[1], parts[4], parts[5])) +} + +/// Find the API asset URL for `filename` within a GitHub release JSON response. +fn find_asset_url( + release: &serde_json::Value, + filename: &str, + tag: &str, + url: &str, +) -> Result { + let err = |msg: String| RemoteFileDownloadFailed { + url: url.to_owned(), + error: msg, + }; + let assets = release["assets"] + .as_array() + .ok_or_else(|| err("GitHub release has no assets".to_owned()))?; + + let asset = assets + .iter() + .find(|a| a["name"].as_str() == Some(filename)) + .ok_or_else(|| err(format!("Asset '{filename}' not found in release '{tag}'")))?; + + asset["url"] + .as_str() + .map(|s| s.to_owned()) + .ok_or_else(|| err("Asset missing 'url' field".to_owned())) +} + /// The extension for a tar gz archive. const TAR_GZ_EXT: &str = ".tar.gz"; /// The extension for a zip archive. @@ -145,6 +345,12 @@ pub enum VirtualDirectoryPath { /// If omitted, the repository root is used. sub_folder: Option, }, + /// A virtual directory representing a single remote file accessible via HTTP(S). + /// Used for downloading individual files such as published registry manifests. + RemoteFile { + /// URL of the remote file + url: String, + }, } // Helper to allow mapping an Option via a function that works with empty strings. @@ -187,6 +393,7 @@ impl VirtualDirectoryPath { refspec, sub_folder: map_option(sub_folder, f), }, + RemoteFile { url } => RemoteFile { url: f(url) }, } } } @@ -265,12 +472,20 @@ impl FromStr for VirtualDirectoryPath { url: source.to_owned(), sub_folder, }) - } else { + } else if source.ends_with(".git") || refspec.is_some() || sub_folder.is_some() { + // Archives (`.zip` / `.tar.gz`) are already handled above. Of the + // remaining HTTP(S) sources, only a Git repo can meaningfully carry + // a `@refspec` or a `[sub_folder]`, so their presence classifies the + // URL as `GitRepo` even when the `.git` suffix is omitted. Ok(Self::GitRepo { url: source.to_owned(), refspec, sub_folder, }) + } else { + Ok(Self::RemoteFile { + url: source.to_owned(), + }) } } else if source.ends_with(".zip") || source.ends_with(".tar.gz") { Ok(Self::LocalArchive { @@ -316,6 +531,7 @@ impl Display for VirtualDirectoryPath { (None, Some(folder)) => write!(f, "{url}[{folder}]"), (None, None) => write!(f, "{url}"), }, + RemoteFile { url } => write!(f, "{url}"), } } } @@ -378,6 +594,10 @@ impl VirtualDirectory { let tmp_dir = Self::create_tmp_repo()?; Self::try_from_remote_archive(url, sub_folder.as_ref(), tmp_dir, vdir_path_repr) } + RemoteFile { url } => { + let tmp_dir = Self::create_tmp_repo()?; + Self::try_from_remote_file(url, tmp_dir, vdir_path_repr) + } }; vdir } @@ -683,6 +903,9 @@ impl VirtualDirectory { /// The temporary directory is created in the `.weaver/vdir_cache`. /// The temporary directory is deleted when the [`VirtualDirectory`] goes out of scope. /// + /// GitHub browser-style release archive URLs are automatically normalized to API + /// asset URLs so that Bearer token auth works for private repositories. + /// /// Arguments: /// - `id`: The unique identifier for the registry. /// - `url`: The URL of the archive. @@ -696,52 +919,22 @@ impl VirtualDirectory { vdir_path: String, ) -> Result { let tmp_path = target_dir.path().to_path_buf(); - - // Download the archive from the URL - let response = ureq::get(url).call().map_err(|e| InvalidRegistryArchive { + let err = |msg: String| InvalidRegistryArchive { archive: url.to_owned(), - error: e.to_string(), - })?; - if response.status() != 200 { - return Err(InvalidRegistryArchive { - archive: url.to_owned(), - error: format!("HTTP status code: {}", response.status()), - }); - } + error: msg, + }; - // Parse the URL to get the file name - let parsed_url = Url::parse(url).map_err(|e| InvalidRegistryArchive { - archive: url.to_owned(), - error: e.to_string(), - })?; + // Use the original URL for the filename, not the (possibly GitHub-API-normalized) + // download URL, so the archive extension is preserved for `try_from_local_archive`. + let parsed_url = Url::parse(url).map_err(|e| err(e.to_string()))?; let file_name = parsed_url .path_segments() .and_then(|mut segments| segments.next_back()) .and_then(|name| if name.is_empty() { None } else { Some(name) }) - .ok_or("Failed to extract file name from URL") - .map_err(|e| InvalidRegistryArchive { - archive: url.to_owned(), - error: e.to_owned(), - })?; + .ok_or_else(|| err("Failed to extract file name from URL".to_owned()))?; - // Create the full path to the save file let save_path = tmp_path.join(file_name); - - // Open a file in write mode - let mut file = File::create(save_path.clone()).map_err(|e| InvalidRegistryArchive { - archive: url.to_owned(), - error: e.to_string(), - })?; - - // Write the response body to the file. - // The number of bytes written is ignored as the `try_from_local_archive` function - // will handle the archive extraction and return an error if the archive is invalid. - _ = io::copy(&mut response.into_body().into_reader(), &mut file).map_err(|e| { - InvalidRegistryArchive { - archive: url.to_owned(), - error: e.to_string(), - } - })?; + download_to_file(url, &save_path, err)?; Self::try_from_local_archive( save_path.to_str().unwrap_or_default(), @@ -751,6 +944,43 @@ impl VirtualDirectory { ) } + /// Downloads a single remote file via HTTP(S) into a temporary directory. + /// + /// GitHub browser-style release URLs are automatically normalized to API + /// URLs so that Bearer token auth works for private repositories. + /// + /// The resulting `VirtualDirectory` path points to the downloaded file itself, + /// enabling callers such as `RegistryRepo::try_new` to treat it as a manifest. + fn try_from_remote_file( + url: &str, + target_dir: TempDir, + vdir_path: String, + ) -> Result { + let tmp_path = target_dir.path().to_path_buf(); + let err = |msg: String| RemoteFileDownloadFailed { + url: url.to_owned(), + error: msg, + }; + + // Use the original URL for the filename (not the resolved API URL, which + // has an opaque numeric asset ID). + let parsed_url = Url::parse(url).map_err(|e| err(e.to_string()))?; + let file_name = parsed_url + .path_segments() + .and_then(|mut segments| segments.next_back()) + .and_then(|name| if name.is_empty() { None } else { Some(name) }) + .unwrap_or("downloaded_file"); + + let save_path = tmp_path.join(file_name); + download_to_file(url, &save_path, err)?; + + Ok(Self { + vdir_path, + path: save_path, + tmp_dir: Arc::new(Some(target_dir)), + }) + } + /// Returns the local filesystem path to the resolved virtual directory content. #[must_use] pub fn path(&self) -> &Path { @@ -1055,4 +1285,336 @@ mod tests { // Reset for other tests ALLOW_GIT_CREDENTIALS.store(false, std::sync::atomic::Ordering::Relaxed); } + + #[test] + fn test_http_auth_token() { + use super::{http_auth_token, set_http_auth_token, HTTP_AUTH_TOKEN}; + + // Reset to known state + if let Ok(mut guard) = HTTP_AUTH_TOKEN.lock() { + *guard = None; + } + + assert!(http_auth_token().is_none()); + set_http_auth_token("test-token-123".to_owned()); + assert_eq!(http_auth_token().as_deref(), Some("test-token-123")); + + // Reset for other tests + if let Ok(mut guard) = HTTP_AUTH_TOKEN.lock() { + *guard = None; + } + } + + /// Tests that remote archive downloads work with and without Bearer auth. + /// Combined into one test because `HTTP_AUTH_TOKEN` is shared global state + /// and parallel tests would race on it. + #[test] + fn test_remote_archive_auth() { + use super::HTTP_AUTH_TOKEN; + use crate::test::ServeStaticFilesWithAuth; + + let token = "secret-test-token"; + + let server = ServeStaticFilesWithAuth::from("tests/test_data", token) + .expect("failed to start auth server"); + let url = server.relative_path_to_url("semconv_registry_v1.26.0.tar.gz"); + + // Without a token, the auth server should reject the request. + if let Ok(mut guard) = HTTP_AUTH_TOKEN.lock() { + *guard = None; + } + let registry_path = format!("{url}[model]") + .parse::() + .expect("failed to parse registry path"); + let result = VirtualDirectory::try_new(®istry_path); + assert!(result.is_err(), "expected error when no auth token is set"); + + // With the correct token, the download should succeed. + if let Ok(mut guard) = HTTP_AUTH_TOKEN.lock() { + *guard = Some(token.to_owned()); + } + let registry_path = format!("{url}[model]") + .parse::() + .expect("failed to parse registry path"); + check_archive(registry_path, Some("general.yaml")); + + // Reset for other tests + if let Ok(mut guard) = HTTP_AUTH_TOKEN.lock() { + *guard = None; + } + } + + #[test] + fn test_remote_file_parsing() { + // A URL without .git, .zip, or .tar.gz suffix should be parsed as RemoteFile + let path_str = "https://example.com/registry/manifest.yaml"; + let path: VirtualDirectoryPath = path_str.parse().expect("failed to parse"); + assert!( + matches!(&path, VirtualDirectoryPath::RemoteFile { url } if url == path_str), + "Expected RemoteFile, got {path:?}" + ); + assert_eq!(path.to_string(), path_str); + + // GitHub API release asset URL + let path_str = "https://api.github.com/repos/org/repo/releases/assets/12345678"; + let path: VirtualDirectoryPath = path_str.parse().expect("failed to parse"); + assert!( + matches!(&path, VirtualDirectoryPath::RemoteFile { url } if url == path_str), + "Expected RemoteFile, got {path:?}" + ); + + // .git suffix should still be GitRepo + let path_str = "https://github.com/org/repo.git"; + let path: VirtualDirectoryPath = path_str.parse().expect("failed to parse"); + assert!( + matches!(&path, VirtualDirectoryPath::GitRepo { .. }), + "Expected GitRepo, got {path:?}" + ); + + // A `@refspec` without `.git` is still a git repo. + let path: VirtualDirectoryPath = "https://github.com/org/repo@v1.0.0" + .parse() + .expect("failed to parse"); + assert!( + matches!( + &path, + VirtualDirectoryPath::GitRepo { url, refspec: Some(r), sub_folder: None } + if url == "https://github.com/org/repo" && r == "v1.0.0" + ), + "Expected GitRepo with refspec, got {path:?}" + ); + + // A `[sub_folder]` without `.git` is still a git repo. + let path: VirtualDirectoryPath = "https://github.com/org/repo[model]" + .parse() + .expect("failed to parse"); + assert!( + matches!( + &path, + VirtualDirectoryPath::GitRepo { url, refspec: None, sub_folder: Some(s) } + if url == "https://github.com/org/repo" && s == "model" + ), + "Expected GitRepo with sub_folder, got {path:?}" + ); + + // Both refspec and sub_folder, no `.git` — still a git repo. + let path: VirtualDirectoryPath = "https://github.com/org/repo@v1.0.0[model]" + .parse() + .expect("failed to parse"); + assert!( + matches!( + &path, + VirtualDirectoryPath::GitRepo { url, refspec: Some(r), sub_folder: Some(s) } + if url == "https://github.com/org/repo" && r == "v1.0.0" && s == "model" + ), + "Expected GitRepo with refspec and sub_folder, got {path:?}" + ); + } + + #[test] + fn test_remote_file_download() { + let server = ServeStaticFiles::from("tests/test_data").expect("failed to start server"); + let url = server.relative_path_to_url("file_a.yaml"); + let vdir_path = VirtualDirectoryPath::RemoteFile { url }; + let vdir = VirtualDirectory::try_new(&vdir_path).expect("failed to download remote file"); + let content = std::fs::read_to_string(vdir.path()).expect("failed to read downloaded file"); + assert_eq!(content, "file: A"); + } + + #[test] + fn test_parse_github_release_url() { + use super::parse_github_release_url; + + // Canonical browser-style release asset URL. + assert_eq!( + parse_github_release_url( + "https://github.com/owner/repo/releases/download/v1.0.0/manifest.yaml" + ), + Some(("owner", "repo", "v1.0.0", "manifest.yaml")) + ); + + // Filename containing a slash is preserved intact (splitn keeps the tail). + assert_eq!( + parse_github_release_url("https://github.com/o/r/releases/download/tag/sub/file.yaml"), + Some(("o", "r", "tag", "sub/file.yaml")) + ); + + // Non-GitHub host passes through. + assert_eq!( + parse_github_release_url("https://example.com/owner/repo/releases/download/v1/f"), + None + ); + + // GitHub URL that isn't a release asset download. + assert_eq!( + parse_github_release_url("https://github.com/owner/repo/blob/main/README.md"), + None + ); + + // Too few path segments. + assert_eq!( + parse_github_release_url("https://github.com/owner/repo/releases/download/v1"), + None + ); + + // Already-resolved API URL passes through (not a browser URL). + assert_eq!( + parse_github_release_url( + "https://api.github.com/repos/owner/repo/releases/assets/12345" + ), + None + ); + } + + #[test] + fn test_normalize_github_url_passthrough() { + use super::normalize_github_url; + + // Non-matching URLs must not trigger network calls and must come back unchanged. + for url in [ + "https://example.com/file.yaml", + "https://github.com/owner/repo/blob/main/README.md", + "https://api.github.com/repos/owner/repo/releases/assets/12345", + "http://127.0.0.1:8080/manifest.yaml", + ] { + assert_eq!(normalize_github_url(url).expect("should pass through"), url); + } + } + + #[test] + fn test_normalize_github_url_resolves_asset() { + use super::normalize_github_url_with_api_base; + use crate::test::{MockGitHubApi, MockRelease}; + + let api = MockGitHubApi::start(vec![MockRelease { + owner: "owner_a".to_owned(), + repo: "repo_a".to_owned(), + tag: "v1.0.0".to_owned(), + assets: vec![ + ("manifest.yaml".to_owned(), b"manifest body".to_vec()), + ("resolved.yaml".to_owned(), b"resolved body".to_vec()), + ], + }]) + .expect("mock API failed to start"); + + let browser_url = + "https://github.com/owner_a/repo_a/releases/download/v1.0.0/manifest.yaml"; + let resolved = normalize_github_url_with_api_base(browser_url, &api.base_url()) + .expect("normalize should succeed"); + assert_eq!(resolved, format!("{}/assets/manifest.yaml", api.base_url())); + } + + #[test] + fn test_normalize_github_url_caches_release() { + use super::normalize_github_url_with_api_base; + use crate::test::{MockGitHubApi, MockRelease}; + + let api = MockGitHubApi::start(vec![MockRelease { + owner: "owner_b".to_owned(), + repo: "repo_b".to_owned(), + tag: "v2.0.0".to_owned(), + assets: vec![ + ("manifest.yaml".to_owned(), b"m".to_vec()), + ("resolved.yaml".to_owned(), b"r".to_vec()), + ], + }]) + .expect("mock API failed to start"); + + // Two different assets from the same release should hit the tags endpoint once. + for filename in ["manifest.yaml", "resolved.yaml"] { + let url = + format!("https://github.com/owner_b/repo_b/releases/download/v2.0.0/{filename}"); + _ = normalize_github_url_with_api_base(&url, &api.base_url()) + .expect("normalize should succeed"); + } + assert_eq!( + api.request_count(), + 1, + "release metadata should be cached across asset lookups" + ); + } + + #[test] + fn test_normalize_github_url_missing_asset() { + use super::normalize_github_url_with_api_base; + use crate::test::{MockGitHubApi, MockRelease}; + use crate::Error::RemoteFileDownloadFailed; + + let api = MockGitHubApi::start(vec![MockRelease { + owner: "owner_c".to_owned(), + repo: "repo_c".to_owned(), + tag: "v3.0.0".to_owned(), + assets: vec![("manifest.yaml".to_owned(), b"m".to_vec())], + }]) + .expect("mock API failed to start"); + + let browser_url = "https://github.com/owner_c/repo_c/releases/download/v3.0.0/missing.yaml"; + let err = normalize_github_url_with_api_base(browser_url, &api.base_url()) + .expect_err("missing asset should error"); + assert!( + matches!(&err, RemoteFileDownloadFailed { error, .. } if error.contains("missing.yaml")), + "unexpected error: {err:?}" + ); + } + + #[test] + fn test_normalize_github_url_api_404() { + use super::normalize_github_url_with_api_base; + use crate::test::{MockGitHubApi, MockRelease}; + use crate::Error::RemoteFileDownloadFailed; + + // Mock serves a release for a different tag, so the requested tag 404s. + let api = MockGitHubApi::start(vec![MockRelease { + owner: "owner_d".to_owned(), + repo: "repo_d".to_owned(), + tag: "v4.0.0".to_owned(), + assets: vec![("manifest.yaml".to_owned(), b"m".to_vec())], + }]) + .expect("mock API failed to start"); + + let browser_url = + "https://github.com/owner_d/repo_d/releases/download/nonexistent/manifest.yaml"; + let err = normalize_github_url_with_api_base(browser_url, &api.base_url()) + .expect_err("unknown tag should error"); + assert!( + matches!(&err, RemoteFileDownloadFailed { error, .. } if error.contains("GitHub API request failed")), + "unexpected error: {err:?}" + ); + } + + #[test] + fn test_find_asset_url() { + use super::find_asset_url; + use crate::Error::RemoteFileDownloadFailed; + + let release = serde_json::json!({ + "assets": [ + { "name": "manifest.yaml", "url": "https://api.github.com/a/1" }, + { "name": "resolved.yaml", "url": "https://api.github.com/a/2" }, + ] + }); + + assert_eq!( + find_asset_url(&release, "manifest.yaml", "v1", "orig").expect("found"), + "https://api.github.com/a/1" + ); + + // Asset missing. + let err = find_asset_url(&release, "missing.yaml", "v1", "orig").expect_err("not found"); + assert!( + matches!(&err, RemoteFileDownloadFailed { error, .. } if error.contains("missing.yaml") && error.contains("v1")) + ); + + // Release has no `assets` array. + let empty = serde_json::json!({}); + let err = find_asset_url(&empty, "manifest.yaml", "v1", "orig").expect_err("no assets"); + assert!( + matches!(&err, RemoteFileDownloadFailed { error, .. } if error.contains("no assets")) + ); + + // Asset entry missing `url`. + let no_url = serde_json::json!({ "assets": [{ "name": "manifest.yaml" }] }); + let err = find_asset_url(&no_url, "manifest.yaml", "v1", "orig").expect_err("missing url"); + assert!(matches!(&err, RemoteFileDownloadFailed { error, .. } if error.contains("'url'"))); + } } diff --git a/docs/define-your-own-telemetry-schema.md b/docs/define-your-own-telemetry-schema.md index 641287c5f..c8d948886 100644 --- a/docs/define-your-own-telemetry-schema.md +++ b/docs/define-your-own-telemetry-schema.md @@ -106,6 +106,17 @@ custom registry. It is important to note that some templates are specific to the OTEL registry. We are working to remove this type of limitation. The `` parameter can be a local directory, a local or -remote archive or a Git URL. +remote archive, a remote file URL (such as a published registry manifest), or a +Git URL. GitHub release asset URLs are also supported and are automatically +resolved via the GitHub API. + It is also possible to use specific Git references, such as a tag, a branch or even a specific commit with the `@` syntax. + +To download from private repositories, set the `WEAVER_HTTP_AUTH_TOKEN` or +`GITHUB_TOKEN` environment variable: + +```bash +GITHUB_TOKEN=$(gh auth token) weaver registry check \ + -r "https://github.com/org/repo/releases/download/v1.0.0/manifest.yaml" +``` diff --git a/docs/usage.md b/docs/usage.md index b32f7e2be..ee877b59d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -89,7 +89,11 @@ The process exits with a code of 0 if the registry validation is successful. ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -144,7 +148,11 @@ The process exits with a code of 0 if the generation is successful. * `-c`, `--config ` — List of `weaver.yaml` configuration files to use. When there is a conflict, the last one will override the previous ones for the keys that are defined in both * `-D`, `--param ` — Parameters key=value, defined in the command line, to pass to the templates. The value must be a valid YAML value * `--params ` — Parameters, defined in a YAML file, to pass to the templates -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -181,7 +189,11 @@ Please use 'weaver registry generate' or 'weaver registry package' instead. ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -225,7 +237,11 @@ DEPRECATED - Searches a registry. This command is deprecated and will be removed ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -254,7 +270,11 @@ Calculate a set of general statistics on a semantic convention registry ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -291,7 +311,11 @@ Update markdown files that contain markers indicating the templates used to upda ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -379,7 +403,11 @@ This diff can then be rendered in multiple formats: ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -415,7 +443,11 @@ This uses the standard OpenTelemetry SDK, defaulting to OTLP gRPC on localhost:4 ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -454,7 +486,11 @@ Includes: Flexible input ingestion, configurable assessment, and template-based ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -539,7 +575,11 @@ The server communicates over stdio using JSON-RPC. ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -605,7 +645,11 @@ Packages a semantic convention registry into a self-contained artifact. ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false @@ -696,7 +740,11 @@ Start the API server (Experimental) ###### **Options:** -* `-r`, `--registry ` — Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL +* `-r`, `--registry ` — Local folder, Git repo URL, Git archive URL, or remote file URL of the semantic convention registry. For Git URLs, a reference can be specified using the `@refspec` syntax and a sub-folder can be specified using the `[sub-folder]` syntax after the URL. + + For remote files (e.g. a published registry manifest), pass the URL directly. GitHub release asset URLs are supported and will be resolved automatically via the GitHub API. + + To authenticate with private repositories, set the WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. Default value: `https://github.com/open-telemetry/semantic-conventions.git[model]` * `-s`, `--follow-symlinks` — Boolean flag to specify whether to follow symlinks when loading the registry. Default is false diff --git a/schemas/semconv-schemas.md b/schemas/semconv-schemas.md index 4d4d172e3..9030f44cc 100644 --- a/schemas/semconv-schemas.md +++ b/schemas/semconv-schemas.md @@ -55,7 +55,12 @@ Properties: the dependency is resolved by its `schema_url` alone. Can be: - A local directory or archive (`.zip`, `.tar.gz`) - A remote archive URL + - A remote file URL (e.g. a published registry manifest) - A Git repository URL + - A GitHub release asset URL (automatically resolved via the GitHub API) + + For private repositories, set the `WEAVER_HTTP_AUTH_TOKEN` or `GITHUB_TOKEN` + environment variable to authenticate HTTP downloads. For example, a definition manifest for a registry that extends OTel semantic conventions: diff --git a/src/main.rs b/src/main.rs index 398a04624..736471f89 100644 --- a/src/main.rs +++ b/src/main.rs @@ -132,6 +132,15 @@ fn run_command(cli: &Cli) -> ExitDirectives { if cli.allow_git_credentials { weaver_common::vdir::enable_git_credentials(); } + // Read HTTP auth token from environment for authenticated remote archive downloads. + // WEAVER_HTTP_AUTH_TOKEN takes precedence; GITHUB_TOKEN is the fallback. + if let Ok(token) = + std::env::var("WEAVER_HTTP_AUTH_TOKEN").or_else(|_| std::env::var("GITHUB_TOKEN")) + { + if !token.is_empty() { + weaver_common::vdir::set_http_auth_token(token); + } + } let cmd_result = match &cli.command { Some(Commands::Registry(params)) => semconv_registry(params), Some(Commands::Diagnostic(params)) => diagnostic::diagnostic(params), diff --git a/src/registry/mod.rs b/src/registry/mod.rs index 01067c09d..b2c98bb1b 100644 --- a/src/registry/mod.rs +++ b/src/registry/mod.rs @@ -166,10 +166,17 @@ pub enum RegistrySubCommand { /// Set of parameters used to specify a semantic convention registry. #[derive(Args, Debug)] pub struct RegistryArgs { - /// Local folder, Git repo URL, or Git archive URL of the semantic - /// convention registry. For Git URLs, a reference can be specified - /// using the `@refspec` syntax and a sub-folder can be specified - /// using the `[sub-folder]` syntax after the URL. + /// Local folder, Git repo URL, Git archive URL, or remote file URL + /// of the semantic convention registry. For Git URLs, a reference + /// can be specified using the `@refspec` syntax and a sub-folder + /// can be specified using the `[sub-folder]` syntax after the URL. + /// + /// For remote files (e.g. a published registry manifest), pass the + /// URL directly. GitHub release asset URLs are supported and will be + /// resolved automatically via the GitHub API. + /// + /// To authenticate with private repositories, set the + /// WEAVER_HTTP_AUTH_TOKEN or GITHUB_TOKEN environment variable. #[arg( short = 'r', long,