Skip to content

Commit c2ccb7e

Browse files
committed
Merge branch 'trunk' into MINOR-cleanup-testutils-scala
# Conflicts: # tools/src/test/java/org/apache/kafka/tools/LeaderElectionCommandTest.java
2 parents 6a0dfd2 + 3e32d9a commit c2ccb7e

152 files changed

Lines changed: 985 additions & 580 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/scripts/pr-format.py

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,24 @@ def split_paragraphs(text: str):
107107
def resolve_reviewer(login: str) -> tuple:
108108
"""Map a GitHub login to (name, email).
109109
110-
Tries the repo commit history first, then falls back to the GitHub user profile.
111-
If the display name is unavailable, the login is used as the name.
112-
If the email is unavailable, '{login}@email-not-found' is used as a placeholder.
110+
Tries three tiers in order: repo commit history, GitHub user profile,
111+
and past `Reviewers:` trailers in git log (matched by name).
112+
Noreply emails (@users.noreply.github.com) are treated as missing since
113+
they are GitHub privacy placeholders that do not identify the reviewer.
114+
Returns (name, None) when no usable email is found; the caller falls
115+
back to the '(@login)' form in the Reviewers trailer.
113116
"""
117+
def _usable_email(e):
118+
if not e or e.endswith("@users.noreply.github.com"):
119+
return None
120+
return e
121+
114122
name = None
115123
email = None
116124

117-
# Tier 1: find from repo commit history
125+
# Tier 1: find from repo commit history. Misses when the reviewer has no
126+
# merged commit in apache/kafka, or had "Keep my email private" enabled
127+
# at commit time (GitHub rewrites the author to the noreply form).
118128
try:
119129
cmd = f"gh api repos/apache/kafka/commits?author={login}&per_page=1"
120130
p = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
@@ -123,11 +133,12 @@ def resolve_reviewer(login: str) -> tuple:
123133
if commits:
124134
author = commits[0].get("commit", {}).get("author", {})
125135
name = author.get("name")
126-
email = author.get("email")
136+
email = _usable_email(author.get("email"))
127137
except Exception as e:
128138
logger.debug(f"Failed to resolve {login} from commit history: {e}")
129139

130-
# Tier 2: GitHub user profile
140+
# Tier 2: GitHub user profile. Only exposes an email when the reviewer
141+
# has set a Public email in their profile settings.
131142
if not name or not email:
132143
try:
133144
cmd = f"gh api users/{login}"
@@ -137,23 +148,47 @@ def resolve_reviewer(login: str) -> tuple:
137148
if not name:
138149
name = user.get("name")
139150
if not email:
140-
email = user.get("email")
151+
email = _usable_email(user.get("email"))
141152
except Exception as e:
142153
logger.debug(f"Failed to resolve {login} from GitHub profile: {e}")
143154

155+
# Tier 3: past Reviewers: trailers in git log, matched by name. Catches
156+
# pure reviewers (no commits in apache/kafka, no public profile email)
157+
# who have been credited with a real email in an earlier merged PR.
158+
# git log is newest-first, so the first usable match is the most recent.
159+
if name and not email:
160+
try:
161+
p = subprocess.run(
162+
["git", "log",
163+
"--pretty=format:%(trailers:key=Reviewers,valueonly=true,unfold=true)"],
164+
capture_output=True, text=True,
165+
)
166+
if p.returncode == 0:
167+
pattern = re.compile(rf"{re.escape(name)}\s*<([^>]+)>")
168+
for line in p.stdout.splitlines():
169+
for m in pattern.finditer(line):
170+
candidate = _usable_email(m.group(1))
171+
if candidate:
172+
email = candidate
173+
break
174+
if email:
175+
break
176+
except Exception as e:
177+
logger.debug(f"Failed to resolve {login} from past Reviewers trailers: {e}")
178+
144179
if not name:
145180
name = login
146181

147-
if not email:
148-
email = f"{login}@email-not-found"
149-
150182
return (name, email)
151183

152184

153-
def already_exists(email: str, existing_reviewers: List[str]) -> bool:
154-
"""Check if a reviewer with the given email is already in the existing reviewers list."""
155-
existing_emails = re.findall(r'<(.+?)>', ", ".join(existing_reviewers))
156-
return email.lower() in [e.lower() for e in existing_emails]
185+
def already_exists(identity: str, existing_reviewers: List[str]) -> bool:
186+
"""Check if a reviewer identity is already in the existing reviewers list.
187+
188+
identity is the delimited token that uniquely identifies a reviewer, either
189+
'<email>' (for the email form) or '(@login)' (for the login fallback).
190+
"""
191+
return identity.lower() in ", ".join(existing_reviewers).lower()
157192

158193

159194
def update_reviewers_trailer(body: str, trailer: str) -> str:
@@ -207,11 +242,15 @@ def update_reviewers_trailer(body: str, trailer: str) -> str:
207242
reviewer_login = get_env("REVIEWER_LOGIN")
208243
pr_author = (gh_json.get("author") or {}).get("login")
209244
if reviewer_login and reviewer_login != pr_author:
210-
existing_reviewers = parse_trailers(title, body).get("Reviewers", [])
211245
name, email = resolve_reviewer(reviewer_login)
212-
if not already_exists(email, existing_reviewers):
246+
if email:
247+
identity = f"<{email}>"
248+
else:
249+
identity = f"(@{reviewer_login})"
250+
resolved = f"{name} {identity}"
251+
existing_reviewers = parse_trailers(title, body).get("Reviewers", [])
252+
if not already_exists(identity, existing_reviewers):
213253
existing_value = ", ".join(existing_reviewers)
214-
resolved = f"{name} <{email}>"
215254
new_value = f"{existing_value}, {resolved}" if existing_value else resolved
216255
body = update_reviewers_trailer(body, f"Reviewers: {new_value}")
217256

0 commit comments

Comments
 (0)