Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions gigl/common/types/uri/http_uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ class HttpUri(Uri):
"""Represents an HTTP URI."""

def __init__(self, uri: Union[str, Path, HttpUri]) -> None:
self._has_valid_prefix(uri=uri)
self._has_no_backslash(uri=uri)
self.is_valid(uri=self._token_to_string(uri), raise_exception=True)
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Matching the behaviour of the GcsUri constructor which I suspect is the right thing to do.

This simplifies joining logic as now validly constructed GcsUri and HttpUri are always absolute.

super().__init__(uri=uri)

@classmethod
Expand Down
60 changes: 53 additions & 7 deletions gigl/common/types/uri/uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,46 @@ def _token_to_string(token: _URI_LIKE) -> str:
return str(token)
return ""

# TODO(kmonte): You should not be able to join a Uri with a Uri of a different type.
# *or* join HTTP on HTTP or GCS on GCS.
# This is not backwards compatible, so come around to this later.
@classmethod
def _is_absolute(cls, token: _URI_LIKE) -> bool:
token_str = cls._token_to_string(token)

# Note: "://" is used to detect GcsUri and HttpUri prefixes, but will incorrectly
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not thrilled about this but it seems like the simplest way to detect GcsUri or HttpUri without either (a) storing subclass prefixes in the base class or (b) checking against subclass type directly

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to not check the subclasses here? e.g. if isinstance(token, (GcsURI, HTTPURI): raise

Copy link
Copy Markdown
Collaborator Author

@jchmura-sc jchmura-sc May 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thought was that it's not great to have to hardcode this, in case new subclasses appear.

Also, we probably want to raise when the rhs token is a string like "gs://foo"?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_abs() could be an abstract method or property that subclasses could implement.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed, the check against :// is still there to prevent joins of the sort: Uri.join(LocalUri("foo"), "gs://bar").

# classify relative LocalUri path's containing "://" as absolute
return Path(token_str).is_absolute() or "://" in token_str

@classmethod
def _has_matching_uri_type(
cls, token: _URI_LIKE, allow_base_uri_join: bool = False
) -> bool:
token_is_path_like = not isinstance(token, Uri)
if token_is_path_like: # str and Path tokens do not have a Uri type to match.
return True

token_matches_join_type = token.__class__ is cls
if token_matches_join_type: # Same concrete Uri type, e.g. GcsUri.join(GcsUri(...)).
return True

base_join_with_concrete_token = allow_base_uri_join and cls is Uri
if base_join_with_concrete_token: # Uri.join(GcsUri(...), "suffix") stays supported for compatibility.
return True

# Remaining Uri tokens are cross-type mixes, e.g. GcsUri with HttpUri.
return False

@classmethod
def join(cls, token: _URI_LIKE, *tokens: _URI_LIKE) -> Self:
Comment thread
jchmura-sc marked this conversation as resolved.
"""
Join multiple tokens to create a new Uri instance.
The first token may be an absolute or relative path. Every additional token must be relative.

Note:
- Rejecting suffix strings containing "://" may break callers that
intentionally store URI-looking strings inside paths.
- Rejecting absolute LocalUri suffix tokens is stricter than os.path.join,
which implicitly discards earlier tokens on the join path.
- Concrete Uri joins cannot mix Uri token types; base Uri.join keeps
the first token generic for compatibility.

Args:
token: The first token to join.
Expand All @@ -45,6 +78,23 @@ def join(cls, token: _URI_LIKE, *tokens: _URI_LIKE) -> Self:
A new Uri instance representing the joined URI.

"""
# Keep base Uri.join generic for existing callers that pass concrete Uri instances.
if not cls._has_matching_uri_type(token, allow_base_uri_join=True):
raise TypeError(
f"Cannot join {cls.__name__} with {token.__class__.__name__}"
)

for suffix in tokens:
if not cls._has_matching_uri_type(suffix):
raise TypeError(
f"Cannot join {cls.__name__} with {suffix.__class__.__name__}"
)

if cls._is_absolute(suffix):
raise TypeError(
f"URI join suffixes must be relative; got absolute path: {suffix}"
)

token = cls._token_to_string(token)
token_strs: list[str] = [cls._token_to_string(token) for token in tokens]
joined_tmp_path = os.path.join(token, *token_strs)
Expand Down Expand Up @@ -88,10 +138,6 @@ def __eq__(self, other: Any) -> bool:
return False

def __truediv__(self, other: _URI_LIKE) -> Self:
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, __truediv__ would reject joining a relative LocalUri onto a GCSUri or HTTPUri. In this implementation, we allow it.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per my comment above, Its probably safe to disallow this.
There is a small case where a GcSUri can be joined with string. (same with other URI) - which should be the only case this is allowed.

if isinstance(other, Uri) and not isinstance(other, type(self)):
raise TypeError(
f"Cannot use '/' operator to join {type(self).__name__} with {type(other).__name__}"
)
return self.join(self, other)


Expand Down
49 changes: 49 additions & 0 deletions tests/unit/common/types/uri_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,55 @@ def test_join(self):
with self.subTest("LocalUri with Path"):
joined = LocalUri.join("/foo/bar", Path("file.text"))
self.assertEqual(joined, LocalUri("/foo/bar/file.text"))
with self.subTest("Uri with concrete first token"):
joined = Uri.join(GcsUri("gs://bucket"), "file.txt")
self.assertEqual(joined, Uri("gs://bucket/file.txt"))
self.assertIsInstance(joined, Uri)
with self.subTest("LocalUri suffix"):
relative_local_uri = LocalUri("file.txt")
joined = LocalUri.join("/foo/bar", relative_local_uri)
self.assertEqual(joined, LocalUri("/foo/bar/file.txt"))
self.assertIsInstance(joined, LocalUri)

def test_join_invalid_suffix(self):
with self.subTest("relative LocalUri suffix with non-local join"):
relative_local_uri = LocalUri("file.txt")
with self.assertRaises(TypeError):
GcsUri.join("gs://bucket/path", relative_local_uri)

with self.subTest("mixed Uri first token"):
with self.assertRaises(TypeError):
LocalUri.join(GcsUri("gs://bucket/path"), "file.txt")

with self.subTest("absolute LocalUri suffix"):
absolute_local_uri = LocalUri("/other/file.txt")
with self.assertRaises(TypeError):
LocalUri.join("/foo/bar", absolute_local_uri)

with self.subTest("absolute HttpUri suffix"):
http_uri = HttpUri("http://abc.com/file.txt")
with self.assertRaises(TypeError):
HttpUri.join("http://abc.com/xyz", http_uri)

with self.subTest("absolute GcsUri suffix"):
gcs_uri = GcsUri("gs://bucket/file.txt")
with self.assertRaises(TypeError):
GcsUri.join("gs://bucket/path", gcs_uri)

def test_join_rejects_relative_path_with_uri_separator(self):
with self.assertRaises(TypeError):
LocalUri.join("/foo/bar", "folder://file.txt")

def test_base_uri_join_rejects_concrete_uri_suffix(self):
# Concrete Uri suffixes require a matching concrete join, not base Uri.join.
relative_local_uri = LocalUri("file.txt")

with self.assertRaises(TypeError):
Uri.join("/foo/bar", relative_local_uri)

def test_http_uri_constructor_rejects_invalid_remote_path(self):
with self.assertRaises(TypeError):
HttpUri("file.txt")

def test_div_join(self):
joined: Uri
Expand Down