From 92d5956ba66f40eb37e30e882d923ac431a20e54 Mon Sep 17 00:00:00 2001 From: Yuu Date: Thu, 8 May 2025 11:36:57 +0900 Subject: [PATCH] feat: add len_var scorer (B-0) --- metrics/__init__.py | 0 metrics/len_var/README.md | 8 ++++++++ metrics/len_var/__init__.py | 0 metrics/len_var/scorer.py | 17 +++++++++++++++++ metrics/len_var/tests/test_len_var.py | 9 +++++++++ 5 files changed, 34 insertions(+) create mode 100644 metrics/__init__.py create mode 100644 metrics/len_var/README.md create mode 100644 metrics/len_var/__init__.py create mode 100644 metrics/len_var/scorer.py create mode 100644 metrics/len_var/tests/test_len_var.py diff --git a/metrics/__init__.py b/metrics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metrics/len_var/README.md b/metrics/len_var/README.md new file mode 100644 index 00000000..5b77b8cc --- /dev/null +++ b/metrics/len_var/README.md @@ -0,0 +1,8 @@ +# Length-Variance (len_var) Scorer + +| Key | Value | +|-----|-------| +| **Purpose** | Detects internal length fluctuation in a single answer | +| **Formula** | σ(window_lengths) / μ(window_lengths) | +| **Threshold (tentative)** | <0.20 : ✅ / 0.20-0.39 : ⚠️ / ≥0.40 : 🔥 | +| **Dependencies** | None (pure Python stdlib) | diff --git a/metrics/len_var/__init__.py b/metrics/len_var/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metrics/len_var/scorer.py b/metrics/len_var/scorer.py new file mode 100644 index 00000000..724b774d --- /dev/null +++ b/metrics/len_var/scorer.py @@ -0,0 +1,17 @@ +import statistics as st +from typing import List + +def _window_lengths(tokens: List[str], window: int = 5) -> List[int]: + return [len(" ".join(tokens[i:i+window])) + for i in range(len(tokens) - window + 1)] + +def score(prediction: str, reference: str = "", window: int = 5) -> float: + """ + Length-variance score (0 = 安定, 1 ≒ 大揺れ) + 定義: sliding-window の文字長 標準偏差 ÷ 平均長 + """ + toks = prediction.split() + if len(toks) < window: + return 0.0 + lens = _window_lengths(toks, window) + return st.pstdev(lens) / max(st.mean(lens), 1) diff --git a/metrics/len_var/tests/test_len_var.py b/metrics/len_var/tests/test_len_var.py new file mode 100644 index 00000000..c51d2da6 --- /dev/null +++ b/metrics/len_var/tests/test_len_var.py @@ -0,0 +1,9 @@ +from metrics.len_var.scorer import score + +def test_constant_length(): + s = "word " * 10 + assert score(s) < 0.05 + +def test_varied_length(): + s = "a a a a a " + "this sentence is considerably longer " + assert score(s) > 0.2