Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/con_duct/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ def _create_ls_parser() -> argparse.ArgumentParser:
"exit_code",
"wall_clock_time",
"peak_rss",
"files_size",
],
)
parser.add_argument(
Expand Down
20 changes: 20 additions & 0 deletions src/con_duct/ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import glob
import json
import logging
from pathlib import Path
import re
from types import ModuleType
from typing import Any, Dict, List, Optional
Expand Down Expand Up @@ -32,6 +33,7 @@
"average_vsz": "{value!S}",
"end_time": "{value:.2f!N}",
"exit_code": "{value!E}",
"files_size": "{value!S}",
"memory_total": "{value!S}",
"peak_pcpu": "{value:.2f!N}%",
"peak_pmem": "{value:.2f!N}%",
Expand Down Expand Up @@ -65,9 +67,26 @@
LS_FIELD_CHOICES: List[str] = (
list(VALUE_TRANSFORMATION_MAP.keys()) + NON_TRANSFORMED_FIELDS
)
COMPUTED_FIELDS: List[str] = ["files_size"]
MINIMUM_SCHEMA_VERSION: str = "0.2.0"


def compute_files_size(prefix: str) -> int:
"""Compute total size in bytes of all files for a given session prefix.

Files that cannot be accessed (e.g. due to permissions) are silently skipped.
"""
total = 0
for path_str in glob.glob(glob.escape(prefix) + "*"):
path = Path(path_str)
if path.is_file():
try:
total += path.stat().st_size
except OSError:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warrants at least logging for debug

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot add logging ... since unexpected, I think it could even be WARNING

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added lgr.debug("Could not get size of %s: %s", path_str, e) in the except OSError block in ca997ba.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Upgraded to lgr.warning(...) in c19eaf9.

pass
return total


def load_duct_runs(
info_files: List[str], eval_filter: Optional[str] = None
) -> List[Dict[str, Any]]:
Expand All @@ -87,6 +106,7 @@ def load_duct_runs(
)
continue
ensure_compliant_schema(this)
this["files_size"] = compute_files_size(this["prefix"])
if eval_filter is not None and not (
eval_results := eval(eval_filter, _flatten_dict(this), dict(re=re))
):
Expand Down
76 changes: 76 additions & 0 deletions test/test_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
MINIMUM_SCHEMA_VERSION,
_flatten_dict,
_restrict_row,
compute_files_size,
ensure_compliant_schema,
load_duct_runs,
ls,
Expand Down Expand Up @@ -164,6 +165,49 @@ def side_effect(filename: str) -> Any:
assert "Skipping empty file" in caplog.text


def test_compute_files_size_sums_all_files() -> None:
"""Test that compute_files_size sums sizes of all files with the given prefix."""
with tempfile.TemporaryDirectory() as tmpdir:
prefix = os.path.join(tmpdir, "run_")
# Use arbitrary suffixes; compute_files_size globs all files with the prefix
for suffix, content in [("a", b"hello"), ("b", b"world!"), ("c", b"{}...")]:
with open(f"{prefix}{suffix}", "wb") as f:
f.write(content)
expected = sum(len(c) for c in [b"hello", b"world!", b"{}..."])
assert compute_files_size(prefix) == expected


def test_compute_files_size_empty_prefix() -> None:
"""Test that compute_files_size returns 0 when no files match the prefix."""
with tempfile.TemporaryDirectory() as tmpdir:
prefix = os.path.join(tmpdir, "nonexistent_")
assert compute_files_size(prefix) == 0


def test_load_duct_runs_includes_files_size() -> None:
"""Test that load_duct_runs populates files_size for each run."""
with tempfile.TemporaryDirectory() as tmpdir:
prefix = os.path.join(tmpdir, "run_")
info_path = f"{prefix}info.json"
with open(info_path, "w") as f:
json.dump(
{
"schema_version": MINIMUM_SCHEMA_VERSION,
"prefix": prefix,
"execution_summary": {},
"message": "",
},
f,
)
# Create a sibling file to count toward files_size
with open(f"{prefix}stdout", "w") as f:
f.write("some output")
result = load_duct_runs([info_path])
assert len(result) == 1
assert "files_size" in result[0]
assert result[0]["files_size"] > 0


class TestLS(unittest.TestCase):
def setUp(self) -> None:
"""Create a temporary directory and test files."""
Expand Down Expand Up @@ -389,3 +433,35 @@ def test_ls_reverse(self) -> None:
prefixes_reversed = [row["prefix"] for row in parsed_reversed]

assert prefixes_reversed == list(reversed(prefixes_normal))

def test_ls_files_size_in_output(self) -> None:
"""Test that files_size field appears in ls output and is humanized."""
args = argparse.Namespace(
paths=[os.path.join(self.temp_dir.name, "file1_info.json")],
colors=False,
fields=["files_size"],
eval_filter=None,
format="json",
func=ls,
reverse=False,
)
result = self._run_ls(["file1_info.json"], "json", args)
parsed = json.loads(result)
assert len(parsed) == 1
assert "files_size" in parsed[0]

def test_ls_files_size_humanized_in_summaries(self) -> None:
"""Test that files_size is humanized in summaries output."""
args = argparse.Namespace(
paths=[os.path.join(self.temp_dir.name, "file1_info.json")],
colors=False,
fields=["files_size"],
eval_filter=None,
format="summaries",
func=ls,
reverse=False,
)
result = self._run_ls(["file1_info.json"], "summaries", args)
assert "Files Size:" in result
# Should show humanized size (Bytes or kB etc.) not raw integer
assert any(unit in result for unit in ["Byte", "kB", "MB", "GB"])
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think some of those tests warrant just addition of testing within existing tests instead of breeding their own -- I bet we do not have a test per each column. @copilot please review and adjust to retain coverage and testing but reduce code added in the tests

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consolidated in ca997ba:

  • Merged the empty-prefix case into test_compute_files_size_sums_all_files as a second assertion
  • Removed test_load_duct_runs_includes_files_size — added assert "files_size" in result[0] to test_load_duct_runs_sanity instead
  • Removed test_ls_files_size_in_output and test_ls_files_size_humanized_in_summaries — added files_size to the _run_ls helper's default fields, then added the assertions to test_ls_sanity (humanized check) and test_ls_json_output (field presence check)

5 changes: 3 additions & 2 deletions test/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path
from utils import run_duct_command
from con_duct._constants import SUFFIXES
from con_duct.ls import LS_FIELD_CHOICES, _flatten_dict
from con_duct.ls import COMPUTED_FIELDS, LS_FIELD_CHOICES, _flatten_dict


def test_info_fields(temp_output_dir: str) -> None:
Expand All @@ -29,4 +29,5 @@ def test_info_fields(temp_output_dir: str) -> None:
actual_info_schema = _flatten_dict(json.loads(info_file.read_text())).keys()
os.remove(info_file)

assert set(actual_info_schema) == set(LS_FIELD_CHOICES)
schema_field_choices = [f for f in LS_FIELD_CHOICES if f not in COMPUTED_FIELDS]
assert set(actual_info_schema) == set(schema_field_choices)
Loading