Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions examples/mlx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: MLX Problem Set
deadline: "2026-05-01 03:59"
description: "Test MLX"
problems:
- directory: mlx/example
name: example_mlx
deadline: "2026-05-01 03:59"
gpus:
- M4_Max
133 changes: 133 additions & 0 deletions examples/mlx/example/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import math
import os
import re
import sys
import time
from pathlib import Path

import mlx.core as mx

from reference import check_implementation, generate_input
from submission import custom_kernel

WARMUP_ITERS = 10
BENCH_ITERS = 100


class PopcornOutput:
def __init__(self, fd: int):
self.file = os.fdopen(fd, "w")

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.file.close()

def log(self, key, value):
print(f"{key}: {value}", file=self.file, flush=True)


def get_test_cases(file_name):
content = Path(file_name).read_text()
tests = []
pattern = r"\s*([a-zA-Z_]+):\s*([a-zA-Z_]+|[+-]?[0-9]+)\s*"
for line in content.splitlines():
if not line.strip():
continue
case = {}
for part in line.split(";"):
m = re.fullmatch(pattern, part)
if not m:
print(f"invalid test case: '{line}'", file=sys.stderr)
sys.exit(113)
key, val = m[1], m[2]
try:
val = int(val)
except ValueError:
pass
case[key] = val
tests.append(case)
return tests


def run_testing(logger, tests):
passed = True
logger.log("test-count", len(tests))
for idx, test in enumerate(tests):
logger.log(f"test.{idx}.spec", test)
data = generate_input(**test)
output = custom_kernel(data)
mx.eval(output)
error = check_implementation(data, output)
if error:
logger.log(f"test.{idx}.status", "fail")
logger.log(f"test.{idx}.error", error)
passed = False
else:
logger.log(f"test.{idx}.status", "pass")
logger.log("check", "pass" if passed else "fail")
return 0 if passed else 112


def run_benchmarking(logger, tests):
# warmup
data = generate_input(**tests[0])
for _ in range(WARMUP_ITERS):
mx.eval(custom_kernel(data))

passed = True
logger.log("benchmark-count", len(tests))
for idx, test in enumerate(tests):
logger.log(f"benchmark.{idx}.spec", test)
data = generate_input(**test)
mx.eval(data)

output = custom_kernel(data)
mx.eval(output)
error = check_implementation(data, output)
if error:
logger.log(f"benchmark.{idx}.status", "fail")
logger.log(f"benchmark.{idx}.error", error)
passed = False
continue

durations = []
for i in range(BENCH_ITERS):
start = time.perf_counter_ns()
mx.eval(custom_kernel(data))
durations.append(time.perf_counter_ns() - start)
if i > 1:
avg = sum(durations) / len(durations)
std = math.sqrt(sum((d - avg) ** 2 for d in durations) / (len(durations) - 1))
if std / math.sqrt(len(durations)) / avg < 0.01:
break

avg = sum(durations) / len(durations)
logger.log(f"benchmark.{idx}.runs", len(durations))
logger.log(f"benchmark.{idx}.mean", avg)

logger.log("check", "pass" if passed else "fail")
return 0 if passed else 112


def main():
fd = os.getenv("POPCORN_FD")
if not fd:
return 111
if len(sys.argv) < 3:
return 2

mode = sys.argv[1]
tests = get_test_cases(sys.argv[2])

with PopcornOutput(int(fd)) as logger:
if mode == "test":
return run_testing(logger, tests)
if mode in ("benchmark", "leaderboard"):
return run_benchmarking(logger, tests)
return 2


if __name__ == "__main__":
raise SystemExit(main())
29 changes: 29 additions & 0 deletions examples/mlx/example/reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import mlx.core as mx


ATOL = 1e-3
RTOL = 1e-3


def generate_input(size, seed=42):
mx.random.seed(seed)
A = mx.random.normal(shape=(size, size)).astype(mx.float16)
B = mx.random.normal(shape=(size, size)).astype(mx.float16)
mx.eval(A, B)
return A, B


def reference_kernel(data):
A, B = data
return A + B


def check_implementation(data, output):
expected = reference_kernel(data)
mx.eval(expected)
if output.shape != expected.shape:
return f"shape mismatch: expected {expected.shape}, got {output.shape}"
if not mx.allclose(output, expected, atol=ATOL, rtol=RTOL).item():
max_diff = mx.max(mx.abs(output - expected)).item()
return f"mismatch found! max diff: {max_diff}"
return ""
6 changes: 6 additions & 0 deletions examples/mlx/example/submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import mlx.core as mx


def custom_kernel(data):
A, B = data
return A + B
32 changes: 32 additions & 0 deletions examples/mlx/example/task.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
files:
- {"name": "submission.py", "source": "@SUBMISSION@"}
- {"name": "reference.py", "source": "reference.py"}
- {"name": "eval.py", "source": "eval.py"}

lang: "py"

description: |
Implement a float16 vector addition kernel using MLX.

Input: tuple(mx.array, mx.array) with arrays of shape (N, N) and type mx.float16.
Output: mx.array of shape (N, N) and type mx.float16

config:
main: "eval.py"

tests:
- {"size": 128, "seed": 5236}
- {"size": 256, "seed": 5531}
- {"size": 512, "seed": 9173}

benchmarks:
- {"size": 1024, "seed": 31232}
- {"size": 4096, "seed": 2146}
- {"size": 16384, "seed": 54352}

test_timeout: 180
benchmark_timeout: 180
ranked_timeout: 180

gpus:
- M4_Max
73 changes: 73 additions & 0 deletions instructions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
## Changes Summary

### New files
- src/libkernelbot/launchers/local.py — LocalLauncher that runs submissions directly on the host machine via run_config(). Blocks CUDA submissions.

### Modified files — Adding Metal/MLX support

1. src/libkernelbot/consts.py
- Added MetalGPU enum (M4_Max)
- Registered it in _GPU_LOOKUP under "Local" runner
- Added M4_Max: None to GPU_TO_SM

2. src/libkernelbot/launchers/__init__.py — Exports LocalLauncher

3. src/kernelbot/main.py — Registers LocalLauncher() in create_backend()

4. src/kernelbot/cogs/admin_cog.py — Added MetalGPU to Discord GPU dropdowns

### Modified files — Bug fixes for macOS compatibility

5. src/libkernelbot/run_eval.py — Three fixes in make_system_info():
- Added MPS/Metal detection via torch.backends.mps
- Catch FileNotFoundError for nvidia-smi/rocm-smi (don't exist on macOS)
- Catch FileNotFoundError for /proc/cpuinfo (doesn't exist on macOS)

6. src/kernelbot/api/main.py — Replace / with _ in auto-derived dev leaderboard names so nested directories don't break API routing

---

## Manual Test Steps

# 1. Start Postgres (if not already running)
brew services start postgresql@14

# 2. Create DB and run migrations
export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot"
createdb kernelbot # skip if already exists
cd /path/to/kernelbot
uv run yoyo apply --database "$DATABASE_URL" src/migrations/

# 3. Create test user
psql "$DATABASE_URL" -c "INSERT INTO leaderboard.user_info (id, user_name, cli_id, cli_valid)
VALUES ('999999', 'testuser', 'test-cli-id-123', true)
ON CONFLICT (id) DO UPDATE SET cli_id = 'test-cli-id-123', cli_valid = true;"

# 4. Install mlx
uv pip install mlx

# 5. Start the API server
cd src/kernelbot
export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot"
export ADMIN_TOKEN="your-admin-token"
export PROBLEM_DEV_DIR="/path/to/kernelbot/examples"
export GITHUB_TOKEN="dummy"
export GITHUB_REPO="dummy/dummy"
export DISABLE_SSL=1
uv run python main.py --api-only

# 6. (In another terminal) Create the dev leaderboard
curl -X POST "http://localhost:8000/admin/leaderboards" \
-H "Authorization: Bearer your-admin-token" \
-H "Content-Type: application/json" \
-d '{"directory": "mlx/example"}'

# 7. Submit a test
curl -X POST "http://localhost:8000/mlx_example-dev/M4_Max/test" \
-H "X-Popcorn-Cli-Id: test-cli-id-123" \
-F "file=@examples/mlx/example/submission.py"

# 8. Submit a benchmark
curl -X POST "http://localhost:8000/mlx_example-dev/M4_Max/benchmark" \
-H "X-Popcorn-Cli-Id: test-cli-id-123" \
-F "file=@examples/mlx/example/submission.py"
3 changes: 3 additions & 0 deletions src/envs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot"
export ADMIN_TOKEN="your-admin-token"
export PROBLEM_DEV_DIR="/Users/jackkhuu/Desktop/oss/reference-kernels/problems"
2 changes: 1 addition & 1 deletion src/kernelbot/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ async def create_dev_leaderboard(
definition = make_task_definition(directory_path)

# Auto-derive name and deadline like admin_cog.leaderboard_create_local
leaderboard_name = f"{directory}-dev"
leaderboard_name = f"{directory.replace('/', '_')}-dev"
deadline_value = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=365)
Comment on lines +647 to 648

# GPUs must be specified in task.yml
Expand Down
5 changes: 3 additions & 2 deletions src/kernelbot/cogs/admin_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)
from kernelbot.env import env
from kernelbot.ui.misc import ConfirmationView, DeleteConfirmationModal, GPUSelectionView
from libkernelbot.consts import GitHubGPU, ModalGPU
from libkernelbot.consts import GitHubGPU, MetalGPU, ModalGPU
from libkernelbot.leaderboard_db import LeaderboardDoesNotExist, LeaderboardItem, SubmissionItem
from libkernelbot.task import LeaderboardDefinition, make_task_definition
from libkernelbot.utils import (
Expand Down Expand Up @@ -208,6 +208,7 @@ async def unban_user(self, interaction: discord.Interaction, user_id: str):
@app_commands.choices(
gpu=[app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in GitHubGPU]
+ [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in ModalGPU]
+ [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in MetalGPU]
)
@with_error_handling
async def leaderboard_create_local(
Expand Down Expand Up @@ -386,7 +387,7 @@ async def create_leaderboard_in_db(
if gpu is None:
# Ask the user to select GPUs
view = GPUSelectionView(
[gpu.name for gpu in GitHubGPU] + [gpu.name for gpu in ModalGPU]
[gpu.name for gpu in GitHubGPU] + [gpu.name for gpu in ModalGPU] + [gpu.name for gpu in MetalGPU]
)

await send_discord_message(
Expand Down
3 changes: 2 additions & 1 deletion src/kernelbot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from libkernelbot import consts
from libkernelbot.backend import KernelBackend
from libkernelbot.background_submission_manager import BackgroundSubmissionManager
from libkernelbot.launchers import GitHubLauncher, ModalLauncher
from libkernelbot.launchers import GitHubLauncher, LocalLauncher, ModalLauncher
from libkernelbot.utils import setup_logging

logger = setup_logging(__name__)
Expand All @@ -29,6 +29,7 @@ def create_backend(debug_mode: bool = False) -> KernelBackend:
backend.register_launcher(
GitHubLauncher(env.GITHUB_REPO, env.GITHUB_TOKEN, env.GITHUB_WORKFLOW_BRANCH)
)
backend.register_launcher(LocalLauncher())
return backend
Comment on lines 28 to 37


Expand Down
7 changes: 6 additions & 1 deletion src/libkernelbot/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ class ModalGPU(Enum):
L4x4 = "L4x4"


class MetalGPU(Enum):
M4_Max = "M4_Max"


@dataclasses.dataclass
class GPU:
name: str
Expand All @@ -52,7 +56,7 @@ def _make_gpu_lookup(runner_map: dict[str, Type[Enum]]):
return lookup


_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU})
_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU, "Local": MetalGPU})


def get_gpu_by_name(name: str) -> GPU:
Expand Down Expand Up @@ -132,6 +136,7 @@ class RankCriterion(Enum):
"MI300x8": None,
"MI250": None,
"MI355X": None,
"M4_Max": None,
}


Expand Down
3 changes: 2 additions & 1 deletion src/libkernelbot/launchers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .github import GitHubLauncher
from .launcher import Launcher
from .local import LocalLauncher
from .modal import ModalLauncher

__all__ = [Launcher, GitHubLauncher, ModalLauncher]
__all__ = [Launcher, GitHubLauncher, LocalLauncher, ModalLauncher]
Loading
Loading