Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/formatting.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Do not suppress errors with workarounds like `# type: ignore`:

1. `make type_check`
1. `make type_check` — runs **ty** static type checker (config in `pyproject.toml` under `[tool.ty]`)
2. `make unit_test_py PY_TEST_FILES="relevant_test.py"`
3. `make integration_test PY_TEST_FILES="relevant_test.py"` (if cross-component behavior changed)
4. `make check_format` (or `make format` to auto-fix)
Expand Down
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
**/*.pyo
**/*.pyd
**/*.pkl
.ty
.mypy_cache
_autosummary

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dist/
*.egg-info/
miniconda/
.mypy_cache/
.ty/
tools/
.metals/
.bloop/
Expand Down
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ make format_py # Auto-fix Python only
make format_scala # Auto-fix Scala only
make format_md # Auto-fix Markdown only
make check_format # Check without fixing
make type_check # mypy static type checking
make type_check # ty static type checking

# Build
make compile_protos # Regenerate protobuf code after .proto changes
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ format_md:
format: format_py format_scala format_md

type_check:
uv run mypy ${PYTHON_DIRS} --check-untyped-defs
uv run ty check ${PYTHON_DIRS}

lint_test: check_format assert_yaml_configs_parse
@echo "Lint checks pass!"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ make unit_test_py PY_TEST_FILES="eval_metrics_test.py"
# Runs just Scala unit tests
make unit_test_scala

# Run the python static type checker `mypy`
# Run the python static type checker `ty`
make type_check

# Run all formatting/linting tests
Expand Down
2 changes: 1 addition & 1 deletion gigl/distributed/dataset_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def build_dataset_from_task_config_uri(
)

# Need to do this "backwards" so the parent class can be defined first.
# Otherwise, mypy complains that:
# Otherwise, the type checker complains that:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we check that ty still complains?

(I am going to leave identical comments on all of the changes like this - that way the robots can pick them up and check automatically)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I threw bot at it, looks like we still might need these.

# "expression has type "type[DistPartitioner]", variable has type "type[DistRangePartitioner]"
if not should_use_range_partitioning:
partitioner_class = DistPartitioner
Expand Down
4 changes: 2 additions & 2 deletions gigl/distributed/dist_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1268,7 +1268,7 @@ def _label_pfn(source_node_ids, _):
del label_edge_index

if is_positive:
# This assert is added to pass mypy type check, in practice we will not see this fail
# This assert is added to pass the type checker, in practice we will not see this fail
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we check that ty still complains?

(I am going to leave identical comments on all of the changes like this - that way the robots can pick them up and check automatically)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needed

assert (
self._positive_label_edge_index is not None
), "Must register positive labels prior to partitioning them"
Expand All @@ -1277,7 +1277,7 @@ def _label_pfn(source_node_ids, _):
if len(self._positive_label_edge_index) == 0:
self._positive_label_edge_index = None
else:
# This assert is added to pass mypy type check, in practice we will not see this fail
# This assert is added to pass the type checker, in practice we will not see this fail
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we check that ty still complains?

(I am going to leave identical comments on all of the changes like this - that way the robots can pick them up and check automatically)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needed

assert (
self._negative_label_edge_index is not None
), "Must register negative labels prior to partitioning them"
Expand Down
2 changes: 1 addition & 1 deletion gigl/src/common/vertex_ai_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def _build_job_config(
if vertex_ai_resource_config.timeout
else None,
# This should be `aiplatform.gapic.Scheduling.Strategy[inferencer_resource_config.scheduling_strategy]`
# But mypy complains otherwise...
# But the type checker complains otherwise...
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we check that ty still complains?

(I am going to leave identical comments on all of the changes like this - that way the robots can pick them up and check automatically)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needed

# gigl/src/inference/v2/glt_inferencer.py:124: error: The type "type[Strategy]" is not generic and not indexable [misc]
# TODO(kmonte): Fix this
scheduling_strategy=getattr(
Expand Down
2 changes: 1 addition & 1 deletion gigl/src/data_preprocessor/lib/ingest/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def _get_bigquery_ptransform(
)


# Below type ignores are due to mypy star expansion issues: https://github.com/python/mypy/issues/6799
# Below type ignores are due to star expansion issues with the type checker: https://github.com/python/mypy/issues/6799
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we check that ty still complains?

(I am going to leave identical comments on all of the changes like this - that way the robots can pick them up and check automatically)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed - nice

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait did we keep the link as is?

Copy link
Copy Markdown
Collaborator Author

@svij-sc svij-sc Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This along with others are fixed in the follow up PR where I address the types in one go.
Since it was a long list of changes to fix type issues I left it as a separate scoped PR than this one - can merge if you think easier to review.

Fix here (comment is removed as ignore not needed):

def yield_instance_dict_ptransform(self, *args, **kwargs) -> InstanceDictPTransform:
return _get_bigquery_ptransform(
self.reference_uri,
self.sharded_read_config,
*args,
**kwargs,
)

@dataclass(frozen=True)
class BigqueryNodeDataReference(NodeDataReference):
"""
Expand Down
2 changes: 1 addition & 1 deletion gigl/src/data_preprocessor/lib/ingest/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType
from gigl.src.data_preprocessor.lib.types import InstanceDictPTransform

# Type hints for abstract dataclasses are currently not supported. https://github.com/python/mypy/issues/5374
# Type hints for abstract dataclasses may have limited support in type checkers. https://github.com/python/mypy/issues/5374
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we check that ty still complains?

(I am going to leave identical comments on all of the changes like this - that way the robots can pick them up and check automatically)

Also update the TODO from python/mypy?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was fixed with ty - updated

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto on link.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was. removed in follow up:

@dataclass(frozen=True)
class DataReference(ABC):
"""
Contains a URI string to the data reference, and provides a means of yielding



@dataclass(frozen=True) # type: ignore
Expand Down
4 changes: 2 additions & 2 deletions gigl/types/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,8 @@ def select_label_edge_types(
str,
int,
# TODO(kmonte): Add GLT Partition book here
# We cannot at the moment as we mypy ignore GLT
# And adding it as a type here will break mypy.
# We cannot at the moment as we type-ignore GLT
# And adding it as a type here will break the type checker.
# PartitionBook
Comment on lines +390 to 392
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we check that ty still complains?

(I am going to leave identical comments on all of the changes like this - that way the robots can pick them up and check automatically)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed - I added:

GiGL/gigl/types/graph.py

Lines 383 to 394 in af3dc20

_GraphEntity = TypeVar(
"_GraphEntity",
torch.Tensor,
GraphPartitionData,
FeaturePartitionData,
SerializedTFRecordInfo,
Optional[SerializedTFRecordInfo],
list,
str,
int,
PartitionBook,
)

)

Expand Down
65 changes: 0 additions & 65 deletions mypy.ini

This file was deleted.

86 changes: 83 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,8 @@ lint = [
"isort~=5.12.0",
"mdformat==0.7.22",
"mdformat_tables==1.0.0",
"mypy==1.8.0",
"mypy-extensions",
"mypy-protobuf==3.3.0",
"ty~=0.0.29",
"mypy-protobuf==3.3.0", # Used for protobuf stub generation (protoc-gen-mypy), not type checking
]


Expand Down Expand Up @@ -283,3 +282,84 @@ exclude = [
remove-all-unused-imports = true
in-place = true
recursive = true

[tool.ty.environment]
python-version = "3.11"

[tool.ty.src]
exclude = ["*_pb2.py", "*_pb2.pyi", "**/*.ipynb"]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we add a TODO to include our notebooks here eventually?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tbh, I think its fine if we don't
Notebooks need not have verbose static type checking

Most act as documentation


[tool.ty.analysis]
Copy link
Copy Markdown
Collaborator Author

@svij-sc svij-sc Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Migrated from mypy.ini - removed any that could be fixed - thank you robots!

# Migrated from mypy.ini per-module ignore_missing_imports sections,
# plus additional modules that are optional or not installed in all environments.
allowed-unresolved-imports = [
"absl.**",
"apache_beam.**",
"common.**",
"fastavro.**",
"google.cloud.**",
"google_cloud_pipeline_components.**",
"graphlearn_torch.**",
"hydra.**",
"kfp.**",
"kfp_server_api.**",
"matplotlib.**",
"msgpack",
"networkx.**",
"ogb.**",
"orjson",
"parameterized.**",
"pyarrow.**",
"setuptools",
"tensorflow.**",
"tensorflow_data_validation.**",
"tensorflow_metadata.**",
"tensorflow_transform.**",
"tfx_bsl.**",
# torch lacks inline types/stubs; ty cannot resolve torch types in all environments
"torch.**",
"torch_geometric.**",
"torch_sparse.**",
"torchrec.**",
]
# When mypy's ignore_missing_imports=True was set for a module, mypy would also
# implicitly treat all types from that module as Any, suppressing downstream
# type errors (e.g. unresolved-attribute, invalid-argument-type) on values from
# those modules. ty's allowed-unresolved-imports only suppresses the import
# error but leaves the type as Unknown, causing cascading errors. This setting
# restores the mypy behavior by replacing imports from untyped libraries with Any.
replace-imports-with-any = [
"apache_beam.**",
"fastavro.**",
"google.cloud.**",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

likewise, is there a reason we're blanket ignoring all gcp imports?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

"google_cloud_pipeline_components.**",
"graphlearn_torch.**",
"hydra.**",
"kfp.**",
"kfp_server_api.**",
"matplotlib.**",
"ogb.**",
"tensorflow.**",
"tensorflow_data_validation.**",
"tensorflow_metadata.**",
"tensorflow_transform.**",
"tfx_bsl.**",
"torch.**",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmmm, we weren't type-ignoring torch before.

IIRC I saw some issues with torch when I looked into this earlier, the ty people suggested to enable some flag - did you try this? astral-sh/ty#2244

Since we're so reliant on torch I'd rather not type-ignore it if possible.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unforunately the flag is not the problem

From the robots analysis, there are 200+ issues that were not being caught before; I think we should reserve for a future change?
Seems might just be using the apis in an incorrect way according to types - event hough it seems to be working.

Group 1: Tensor.getitem with NodeType/EdgeType/CondensedNodeType keys (~64 errors)

These happen because dict[NodeType, Tensor] access returns Tensor, and then a second indexing
like result[tensor_index] hits Tensor's strict getitem signature. The custom NewType keys
(NodeType = str, CondensedNodeType = int, EdgeType = NamedTuple) don't satisfy Tensor's index
types.

Fix: These are dict-on-dict access patterns — the real issue is that ty is resolving the chained
subscript incorrectly or the intermediate type needs explicit annotation. Likely fixable via
local variable annotations or targeted # type: ignore comments.

Group 2: FloatTensor/LongTensor vs Tensor return type mismatches (~25 errors)

Functions annotated to return FloatTensor/LongTensor but torch.gather(), torch.stack(),
torch.cat(), torch.mean(), torch.tensor() all return generic Tensor.

Fix: Change return type annotations from FloatTensor/LongTensor to Tensor. These deprecated
aliases are not what torch operations actually return.

Group 3: init argument type mismatches (~20 errors)

Parameters using torch.dtype, torch.device, etc. where ty resolves the stub types more strictly.

Fix: Explicit casts or type annotations at call sites.

Group 4: HeteroData/dict access returning object/Unknown (~15 errors)

When iterating over graph data structures, values come back as object rather than typed tensors.

Fix: Add explicit type annotations to loop variables.

Group 5: List invariance issues (~8 errors)

list[Tensor | None] assigned to list[Tensor], or Optional[Tensor] appended to list[FloatTensor].

Fix: Widen the list type annotations.

Group 6: Misc (operators, spawn, call-non-callable, etc.) (~10 errors)

Fix: Case-by-case — spawn import fix, operator type annotations, etc.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'd prefer to keep the torch typing around (if my reading is correct that this infact is equivalent to type-ignoring torch) and just add a bunch of # type-ignore and TODOs to fix.

IDK how feasible that'd be to setup tho.

Copy link
Copy Markdown
Collaborator Author

@svij-sc svij-sc Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ye for sure.

its too wide of a scope change for 1 pr.
Id pref if we do it as a follow up - I can throw cc at it once we get these 2 prs merged in.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I gave the robots a go at it and the generated https://github.com/Snapchat/GiGL/pull/597/changes - WDYT about merging it into your follow up PR?

I do feel pretty strongly about not type-ignoring torch, even if temporarily.

"torch_geometric.**",
"torch_sparse.**",
"torchrec.**",
]

[tool.ty.rules]
# Suppress noise from existing type: ignore comments that ty may not need.
unused-type-ignore-comment = "ignore"
unused-ignore-comment = "ignore"

# Generated protobuf files trigger type errors from the google-protobuf stubs
# (e.g. RegisterMessage argument types). These files are auto-generated by
# protoc and cannot be fixed — suppress all type errors in them.
[[tool.ty.overrides]]
include = ["**/*_pb2.py"]
[tool.ty.overrides.rules]
invalid-argument-type = "ignore"
unresolved-attribute = "ignore"
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo)
):
assert (
seralized_positive_label_info is not None
) # We use assert instead of self.assertIsNotNone since this allows type narrowing with mypy
) # We use assert instead of self.assertIsNotNone since this allows type narrowing with the type checker

edge_info = preprocessed_metadata_pb_wrapper.preprocessed_metadata_pb.condensed_edge_type_to_preprocessed_metadata[
condensed_edge_type
Expand Down Expand Up @@ -391,7 +391,7 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo)
):
assert (
serialized_negative_label_info is not None
) # We use assert instead of self.assertIsNotNone since this allows type narrowing with mypy
) # We use assert instead of self.assertIsNotNone since this allows type narrowing with the type checker

edge_info = preprocessed_metadata_pb_wrapper.preprocessed_metadata_pb.condensed_edge_type_to_preprocessed_metadata[
condensed_edge_type
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/src/common/vertex_ai_launcher_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def test_launch_training_graph_store_cuda(self, mock_vertex_ai_service_class):
process_command.split(),
)
self.assertIsNotNone(compute_job_config.args)
assert compute_job_config.args is not None # Type narrowing for mypy
assert compute_job_config.args is not None # Type narrowing for the type checker
self.assertIn(f"--job_name={job_name}", compute_job_config.args)
self.assertIn(
f"--learning_rate={process_runtime_args['learning_rate']}",
Expand Down Expand Up @@ -298,7 +298,7 @@ def test_launch_inference_single_pool_cpu(self, mock_vertex_ai_service_class):
# Verify command and args
self.assertEqual(job_config.command, process_command.split())
self.assertIsNotNone(job_config.args)
assert job_config.args is not None # Type narrowing for mypy
assert job_config.args is not None # Type narrowing for the type checker
self.assertIn(f"--job_name={job_name}", job_config.args)
self.assertIn(f"--task_config_uri={task_config_uri}", job_config.args)
self.assertIn(f"--resource_config_uri={resource_config_uri}", job_config.args)
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/transforms/graph_transformer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ def test_gradient_flow(self):

# Check that gradients exist and are not NaN
self.assertIsNotNone(sequences.grad)
assert sequences.grad is not None # Type narrowing for mypy
assert sequences.grad is not None # Type narrowing for the type checker
self.assertFalse(torch.isnan(sequences.grad).any())

def test_transformer_with_classification_head(self):
Expand Down