gpu-mode · Jack-Khuu · May 1, 2026 · May 1, 2026 · May 4, 2026 · May 5, 2026
diff --git a/examples/mlx.yaml b/examples/mlx.yaml
@@ -0,0 +1,9 @@
+name: MLX Problem Set
+deadline: "2026-05-01 03:59"
+description: "Test MLX"
+problems:
+  - directory: mlx/example
+    name: example_mlx
+    deadline: "2026-05-01 03:59"
+    gpus:
+      - M4_Max 
diff --git a/examples/mlx/example/eval.py b/examples/mlx/example/eval.py
@@ -0,0 +1,133 @@
+import math
+import os
+import re
+import sys
+import time
+from pathlib import Path
+
+import mlx.core as mx
+
+from reference import check_implementation, generate_input
+from submission import custom_kernel
+
+WARMUP_ITERS = 10
+BENCH_ITERS = 100
+
+
+class PopcornOutput:
+    def __init__(self, fd: int):
+        self.file = os.fdopen(fd, "w")
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.file.close()
+
+    def log(self, key, value):
+        print(f"{key}: {value}", file=self.file, flush=True)
+
+
+def get_test_cases(file_name):
+    content = Path(file_name).read_text()
+    tests = []
+    pattern = r"\s*([a-zA-Z_]+):\s*([a-zA-Z_]+|[+-]?[0-9]+)\s*"
+    for line in content.splitlines():
+        if not line.strip():
+            continue
+        case = {}
+        for part in line.split(";"):
+            m = re.fullmatch(pattern, part)
+            if not m:
+                print(f"invalid test case: '{line}'", file=sys.stderr)
+                sys.exit(113)
+            key, val = m[1], m[2]
+            try:
+                val = int(val)
+            except ValueError:
+                pass
+            case[key] = val
+        tests.append(case)
+    return tests
+
+
+def run_testing(logger, tests):
+    passed = True
+    logger.log("test-count", len(tests))
+    for idx, test in enumerate(tests):
+        logger.log(f"test.{idx}.spec", test)
+        data = generate_input(**test)
+        output = custom_kernel(data)
+        mx.eval(output)
+        error = check_implementation(data, output)
+        if error:
+            logger.log(f"test.{idx}.status", "fail")
+            logger.log(f"test.{idx}.error", error)
+            passed = False
+        else:
+            logger.log(f"test.{idx}.status", "pass")
+    logger.log("check", "pass" if passed else "fail")
+    return 0 if passed else 112
+
+
+def run_benchmarking(logger, tests):
+    # warmup
+    data = generate_input(**tests[0])
+    for _ in range(WARMUP_ITERS):
+        mx.eval(custom_kernel(data))
+
+    passed = True
+    logger.log("benchmark-count", len(tests))
+    for idx, test in enumerate(tests):
+        logger.log(f"benchmark.{idx}.spec", test)
+        data = generate_input(**test)
+        mx.eval(data)
+
+        output = custom_kernel(data)
+        mx.eval(output)
+        error = check_implementation(data, output)
+        if error:
+            logger.log(f"benchmark.{idx}.status", "fail")
+            logger.log(f"benchmark.{idx}.error", error)
+            passed = False
+            continue
+
+        durations = []
+        for i in range(BENCH_ITERS):
+            start = time.perf_counter_ns()
+            mx.eval(custom_kernel(data))
+            durations.append(time.perf_counter_ns() - start)
+            if i > 1:
+                avg = sum(durations) / len(durations)
+                std = math.sqrt(sum((d - avg) ** 2 for d in durations) / (len(durations) - 1))
+                if std / math.sqrt(len(durations)) / avg < 0.01:
+                    break
+
+        avg = sum(durations) / len(durations)
+        logger.log(f"benchmark.{idx}.runs", len(durations))
+        logger.log(f"benchmark.{idx}.mean", avg)
+
+    logger.log("check", "pass" if passed else "fail")
+    return 0 if passed else 112
+
+
+def main():
+    fd = os.getenv("POPCORN_FD")
+    if not fd:
+        return 111
+    if len(sys.argv) < 3:
+        return 2
+
+    mode = sys.argv[1]
+    tests = get_test_cases(sys.argv[2])
+
+    with PopcornOutput(int(fd)) as logger:
+        if mode == "test":
+            return run_testing(logger, tests)
+        if mode in ("benchmark", "leaderboard"):
+            return run_benchmarking(logger, tests)
+        return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/examples/mlx/example/reference.py b/examples/mlx/example/reference.py
@@ -0,0 +1,29 @@
+import mlx.core as mx
+
+
+ATOL = 1e-3
+RTOL = 1e-3
+
+
+def generate_input(size, seed=42):
+    mx.random.seed(seed)
+    A = mx.random.normal(shape=(size, size)).astype(mx.float16)
+    B = mx.random.normal(shape=(size, size)).astype(mx.float16)
+    mx.eval(A, B)
+    return A, B
+
+
+def reference_kernel(data):
+    A, B = data
+    return A + B
+
+
+def check_implementation(data, output):
+    expected = reference_kernel(data)
+    mx.eval(expected)
+    if output.shape != expected.shape:
+        return f"shape mismatch: expected {expected.shape}, got {output.shape}"
+    if not mx.allclose(output, expected, atol=ATOL, rtol=RTOL).item():
+        max_diff = mx.max(mx.abs(output - expected)).item()
+        return f"mismatch found! max diff: {max_diff}"
+    return ""
diff --git a/examples/mlx/example/submission.py b/examples/mlx/example/submission.py
@@ -0,0 +1,6 @@
+import mlx.core as mx
+
+
+def custom_kernel(data):
+    A, B = data
+    return A + B
diff --git a/examples/mlx/example/task.yml b/examples/mlx/example/task.yml
@@ -0,0 +1,32 @@
+files:
+  - {"name": "submission.py", "source": "@SUBMISSION@"}
+  - {"name": "reference.py", "source": "reference.py"}
+  - {"name": "eval.py", "source": "eval.py"}
+
+lang: "py"
+
+description: |
+  Implement a float16 vector addition kernel using MLX.
+
+  Input: tuple(mx.array, mx.array) with arrays of shape (N, N) and type mx.float16.
+  Output: mx.array of shape (N, N) and type mx.float16
+
+config:
+  main: "eval.py"
+
+tests:
+  - {"size": 128, "seed": 5236}
+  - {"size": 256, "seed": 5531}
+  - {"size": 512, "seed": 9173}
+
+benchmarks:
+  - {"size": 1024, "seed": 31232}
+  - {"size": 4096, "seed": 2146}
+  - {"size": 16384, "seed": 54352}
+
+test_timeout: 180
+benchmark_timeout: 180
+ranked_timeout: 180
+
+gpus:
+  - M4_Max
diff --git a/instructions.txt b/instructions.txt
@@ -0,0 +1,73 @@
+## Changes Summary
+
+### New files
+- src/libkernelbot/launchers/local.py — LocalLauncher that runs submissions directly on the host machine via run_config(). Blocks CUDA submissions.
+
+### Modified files — Adding Metal/MLX support
+
+1. src/libkernelbot/consts.py
+   - Added MetalGPU enum (M4_Max)
+   - Registered it in _GPU_LOOKUP under "Local" runner
+   - Added M4_Max: None to GPU_TO_SM
+
+2. src/libkernelbot/launchers/__init__.py — Exports LocalLauncher
+
+3. src/kernelbot/main.py — Registers LocalLauncher() in create_backend()
+
+4. src/kernelbot/cogs/admin_cog.py — Added MetalGPU to Discord GPU dropdowns
+
+### Modified files — Bug fixes for macOS compatibility
+
+5. src/libkernelbot/run_eval.py — Three fixes in make_system_info():
+   - Added MPS/Metal detection via torch.backends.mps
+   - Catch FileNotFoundError for nvidia-smi/rocm-smi (don't exist on macOS)
+   - Catch FileNotFoundError for /proc/cpuinfo (doesn't exist on macOS)
+
+6. src/kernelbot/api/main.py — Replace / with _ in auto-derived dev leaderboard names so nested directories don't break API routing
+
+---
+
+## Manual Test Steps
+
+# 1. Start Postgres (if not already running)
+brew services start postgresql@14
+
+# 2. Create DB and run migrations
+export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot"
+createdb kernelbot  # skip if already exists
+cd /path/to/kernelbot
+uv run yoyo apply --database "$DATABASE_URL" src/migrations/
+
+# 3. Create test user
+psql "$DATABASE_URL" -c "INSERT INTO leaderboard.user_info (id, user_name, cli_id, cli_valid)
+VALUES ('999999', 'testuser', 'test-cli-id-123', true)
+ON CONFLICT (id) DO UPDATE SET cli_id = 'test-cli-id-123', cli_valid = true;"
+
+# 4. Install mlx
+uv pip install mlx
+
+# 5. Start the API server
+cd src/kernelbot
+export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot"
+export ADMIN_TOKEN="your-admin-token"
+export PROBLEM_DEV_DIR="/path/to/kernelbot/examples"
+export GITHUB_TOKEN="dummy"
+export GITHUB_REPO="dummy/dummy"
+export DISABLE_SSL=1
+uv run python main.py --api-only
+
+# 6. (In another terminal) Create the dev leaderboard
+curl -X POST "http://localhost:8000/admin/leaderboards" \
+  -H "Authorization: Bearer your-admin-token" \
+  -H "Content-Type: application/json" \
+  -d '{"directory": "mlx/example"}'
+
+# 7. Submit a test
+curl -X POST "http://localhost:8000/mlx_example-dev/M4_Max/test" \
+  -H "X-Popcorn-Cli-Id: test-cli-id-123" \
+  -F "file=@examples/mlx/example/submission.py"
+
+# 8. Submit a benchmark
+curl -X POST "http://localhost:8000/mlx_example-dev/M4_Max/benchmark" \
+  -H "X-Popcorn-Cli-Id: test-cli-id-123" \
+  -F "file=@examples/mlx/example/submission.py"
diff --git a/src/envs.txt b/src/envs.txt
@@ -0,0 +1,3 @@
+export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot"
+export ADMIN_TOKEN="your-admin-token"
+export PROBLEM_DEV_DIR="/Users/jackkhuu/Desktop/oss/reference-kernels/problems"
diff --git a/src/kernelbot/api/main.py b/src/kernelbot/api/main.py
@@ -644,7 +644,7 @@ async def create_dev_leaderboard(
     definition = make_task_definition(directory_path)
 
     # Auto-derive name and deadline like admin_cog.leaderboard_create_local
-    leaderboard_name = f"{directory}-dev"
+    leaderboard_name = f"{directory.replace('/', '_')}-dev"
     deadline_value = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=365)
 
     # GPUs must be specified in task.yml

diff --git a/src/kernelbot/cogs/admin_cog.py b/src/kernelbot/cogs/admin_cog.py
@@ -19,7 +19,7 @@
 )
 from kernelbot.env import env
 from kernelbot.ui.misc import ConfirmationView, DeleteConfirmationModal, GPUSelectionView
-from libkernelbot.consts import GitHubGPU, ModalGPU
+from libkernelbot.consts import GitHubGPU, MetalGPU, ModalGPU
 from libkernelbot.leaderboard_db import LeaderboardDoesNotExist, LeaderboardItem, SubmissionItem
 from libkernelbot.task import LeaderboardDefinition, make_task_definition
 from libkernelbot.utils import (
@@ -208,6 +208,7 @@ async def unban_user(self, interaction: discord.Interaction, user_id: str):
     @app_commands.choices(
         gpu=[app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in GitHubGPU]
         + [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in ModalGPU]
+        + [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in MetalGPU]
     )
     @with_error_handling
     async def leaderboard_create_local(
@@ -386,7 +387,7 @@ async def create_leaderboard_in_db(
         if gpu is None:
             # Ask the user to select GPUs
             view = GPUSelectionView(
-                [gpu.name for gpu in GitHubGPU] + [gpu.name for gpu in ModalGPU]
+                [gpu.name for gpu in GitHubGPU] + [gpu.name for gpu in ModalGPU] + [gpu.name for gpu in MetalGPU]
             )
 
             await send_discord_message(

diff --git a/src/kernelbot/main.py b/src/kernelbot/main.py
@@ -16,7 +16,7 @@
 from libkernelbot import consts
 from libkernelbot.backend import KernelBackend
 from libkernelbot.background_submission_manager import BackgroundSubmissionManager
-from libkernelbot.launchers import GitHubLauncher, ModalLauncher
+from libkernelbot.launchers import GitHubLauncher, LocalLauncher, ModalLauncher
 from libkernelbot.utils import setup_logging
 
 logger = setup_logging(__name__)
@@ -29,6 +29,7 @@ def create_backend(debug_mode: bool = False) -> KernelBackend:
     backend.register_launcher(
         GitHubLauncher(env.GITHUB_REPO, env.GITHUB_TOKEN, env.GITHUB_WORKFLOW_BRANCH)
     )
+    backend.register_launcher(LocalLauncher())
     return backend
 
 

diff --git a/src/libkernelbot/consts.py b/src/libkernelbot/consts.py
@@ -35,6 +35,10 @@ class ModalGPU(Enum):
     L4x4 = "L4x4"
 
 
+class MetalGPU(Enum):
+    M4_Max = "M4_Max"
+
+
 @dataclasses.dataclass
 class GPU:
     name: str
@@ -52,7 +56,7 @@ def _make_gpu_lookup(runner_map: dict[str, Type[Enum]]):
     return lookup
 
 
-_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU})
+_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU, "Local": MetalGPU})
 
 
 def get_gpu_by_name(name: str) -> GPU:
@@ -132,6 +136,7 @@ class RankCriterion(Enum):
     "MI300x8": None,
     "MI250": None,
     "MI355X": None,
+    "M4_Max": None,
 }
 
 

diff --git a/src/libkernelbot/launchers/__init__.py b/src/libkernelbot/launchers/__init__.py
@@ -1,5 +1,6 @@
 from .github import GitHubLauncher
 from .launcher import Launcher
+from .local import LocalLauncher
 from .modal import ModalLauncher
 
-__all__ = [Launcher, GitHubLauncher, ModalLauncher]
+__all__ = [Launcher, GitHubLauncher, LocalLauncher, ModalLauncher]