Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions scripts/cluster_configs/nightly_best.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Multi-agent "best launch" nightly training program config. Multi-agent
# gigaflow over the 8 local CARLA maps, full reward shaping (conditioning +
# randomization on), partner-blindness / phantom-braking perturbations
# enabled. Keys here override pufferlib/config/ocean/drive.ini.
#
# Launch via scripts/launch_nightly_best.sh (3 seeds, date-stamped).

# Environment — multi-agent gigaflow over all 8 local CARLA towns
env.simulation_mode: gigaflow
env.map_dir: pufferlib/resources/drive/binaries/carla
env.num_maps: 8
env.num_agents: 720000

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will break the memory i think?

env.min_agents_per_env: 1
env.max_agents_per_env: 150
env.use_map_cache: 1
env.scenario_length: 1200
# 0 disables periodic scenario resampling — every sub-env keeps the same map
# for the full run instead of swapping every 38400 steps.
env.resample_frequency: 0
env.termination_mode: 1
env.inactive_agent_threshold: 0.4
env.dynamics_model: jerk
env.target_type: static
env.spawn_initial_speed: 0.0
env.dt: 0.3
env.traffic_light_behavior: 1
env.collision_behavior: 1
env.offroad_behavior: 1

# Goal setup — three sequential waypoints, route-based placement [20, 60m]
env.num_target_waypoints: 3
env.min_waypoint_spacing: 20.0
env.max_waypoint_spacing: 60.0
env.goal_radius: 2.0
env.goal_speed: 3.0

# Observation shaping
env.obs_slots_lane_n: 80
env.obs_slots_boundary_n: 80
Comment on lines +38 to +39

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the env.obs_slots_boundary_n: 80 can be reduced, since it is not similar to gigaflow implen

env.obs_slots_partners_n: 16
env.obs_slots_traffic_controls_n: 4
env.obs_range_partner_m: 200.0
env.obs_range_road_front_m: 200.0
env.obs_range_road_behind_m: 40.0
env.obs_range_road_side_m: 50.0
env.obs_range_traffic_control_m: 100.0
env.obs_norm_xy_offset_m: 200.0
env.obs_norm_goal_offset_m: 200.0
env.obs_norm_road_seg_length_m: 10.0
env.obs_norm_road_seg_width_m: 5.0
env.obs_norm_veh_length_m: 15.0
env.obs_norm_veh_width_m: 10.0
env.obs_dropout_lane: 0.5
env.obs_dropout_boundary: 0.4

# Perturbations (on during training; eval's clean macro zeros these)
env.partner_blindness_prob: 0.03
env.partner_blindness_trigger_prob: 0.05
env.phantom_braking_prob: 0.02
env.phantom_braking_trigger_prob: 0.02
env.phantom_braking_duration: 10

# Reward shaping (conditioning + randomization on)
env.reward_conditioning: true
env.reward_randomization: true
env.reward_goal: 1.0
env.reward_collision: 1.5
env.reward_offroad: 1.5
env.reward_stop_line: 1.0
env.reward_comfort: 0.05
env.reward_lane_align: 0.025
env.reward_vel_align: 1.0
env.reward_lane_center: 0.005
env.reward_velocity: 0.0025
env.reward_reverse: 0.005
env.reward_timestep: 2.5e-05
env.reward_overspeed: 0.05

# Policy — 3x1024 backbone, split actor/critic, gigaflow encoder
policy.input_size: 256
policy.backbone_hidden_size: 1024
policy.backbone_num_layers: 3
policy.actor_hidden_size: 1024
policy.actor_num_layers: 0
policy.critic_hidden_size: 1024
policy.critic_num_layers: 0
policy.split_network: true
policy.encoder_gigaflow: true
policy.dropout: 0.0
Comment on lines +80 to +89

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be deprecated, especially the policy.encoder_gigaflow


# Training — 10B steps, large minibatch, compiled bfloat16
train.total_timesteps: 10_000_000_000
train.learning_rate: 0.0005
train.minibatch_size: 153600
train.max_minibatch_size: 153600
Comment on lines +94 to +95

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

imo the bath size can be simplified to power of 2, with adv filtering and masking you never get perfect bs anyway

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is not the values you use? We're just trying to match the best run as closely as we can here

train.update_epochs: 3
train.bptt_horizon: 128
train.compile: true
train.precision: bfloat16
train.normalize_rewards: false
train.checkpoint_interval: 500
train.optimizer: adamw

# Eval — keep validation_gigaflow (CARLA sweep) inline, disable everything else
# (validation_replay needs nuPlan bins; behaviors_* need labelled scene
# categories not used in this nightly). Interval 250 keeps eval cost ~5% of
# wall-clock instead of ~85%.
eval.validation_defaults.interval: 250
eval.validation_replay.enabled: 0
eval.validation_gigaflow.render_backend: egl
eval.behaviors_full_dir.enabled: 0
eval.behaviors_hard_stop.enabled: 0
eval.behaviors_highway_straight.enabled: 0
eval.behaviors_lane_change.enabled: 0
eval.behaviors_merge.enabled: 0
eval.behaviors_parked_cars.enabled: 0
eval.behaviors_roundabout.enabled: 0
eval.behaviors_stopped_traffic.enabled: 0
eval.behaviors_traffic_light_green.enabled: 0
eval.behaviors_traffic_light_stop.enabled: 0
eval.behaviors_unprotected_left.enabled: 0
eval.behaviors_unprotected_right.enabled: 0

# W&B — group has no space (submit_cluster.py joins the inner command
# without quoting arg values). Launchers (launch_nightly_best.sh and
# Modal's nightly()) override wandb_group to today's date at launch so
# runs cluster by night in the UI; the static value here is just the
# fallback for ad-hoc invocations.
wandb: True
wandb_project: nightly-multi
wandb_group: nightly-multi
7 changes: 5 additions & 2 deletions scripts/cluster_configs/single_agent_speed_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ eval.behaviors_unprotected_right.enabled: 0

# W&B. Group has no space: submit_cluster.py joins the inner command into a
# bash -c string without quoting arg values, so a space would split the arg.
# Launchers (launch_single_agent.sh and Modal's nightly()) override
# wandb_group to today's date at launch so runs cluster by night in the UI;
# the static value here is just the fallback for ad-hoc invocations.
wandb: True
wandb_project: single_agent_nightly_test
wandb_group: Nightly_Test
wandb_project: nightly-single
wandb_group: nightly-single
48 changes: 48 additions & 0 deletions scripts/launch_nightly_best.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
# Launch multi-agent "nightly best" training on the cluster via submit_cluster.py.
# Mirrors launch_single_agent.sh but uses nightly_best.yaml (multi-agent
# gigaflow over 8 CARLA towns, 10B total steps). Code-isolated per run,
# container-wrapped, gpu-heartbeated, date-stamped wandb run names.
#
# Run on the login node (it sources the venv and submits from there):
# ./scripts/launch_nightly_best.sh
#
# Overridable via the environment:
# PROGRAM_CONFIG program_config YAML (default: nightly_best.yaml)
# SEEDS colon sweep passed to --args train.seed (default 0:1:2 -> 3 jobs)
# ACCOUNT/PARTITION/TIME SLURM overrides
# MEM SLURM --mem (default 192gb; the multi-agent config plus
# inline validation_gigaflow eval can spike past 128gb at
# epoch 250)
# PREFIX run-name prefix (default <date>_multi_agent)
#
# Examples:
# SEEDS=0 ./scripts/launch_nightly_best.sh # one-seed dry run
# PARTITION=h100_tandon ./scripts/launch_nightly_best.sh # if h200 QOS is full
set -euo pipefail

PROGRAM_CONFIG="${PROGRAM_CONFIG:-scripts/cluster_configs/nightly_best.yaml}"
COMPUTE_CONFIG="${COMPUTE_CONFIG:-scripts/cluster_configs/nyu_greene.yaml}"
ACCOUNT="${ACCOUNT:-torch_pr_924_tandon_advanced}"
PARTITION="${PARTITION:-h200_tandon}"
TIME="${TIME:-1800}"
MEM="${MEM:-192gb}"
SEEDS="${SEEDS:-0:1:2}"
PREFIX="${PREFIX:-$(date +%Y-%m-%d)_multi_agent}"
DATE_STAMP="$(date +%Y-%m-%d)"

source "/scratch/$USER/venvs/pufferdrive/bin/activate"

# One submission per seed so we can pass a per-seed run_name (wandb display
# name like 2026-05-31_seed0).
IFS=':' read -ra SEED_LIST <<< "$SEEDS"
for SEED in "${SEED_LIST[@]}"; do
python scripts/submit_cluster.py \
--save_dir "/scratch/$USER/runs" \
--prefix "$PREFIX" \
--compute_config "$COMPUTE_CONFIG" \
--program_config "$PROGRAM_CONFIG" \
--container --heartbeat \
--account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" --mem "$MEM" \
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}" "wandb_group=${DATE_STAMP}"
done
2 changes: 1 addition & 1 deletion scripts/launch_single_agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ for SEED in "${SEED_LIST[@]}"; do
--program_config "$PROGRAM_CONFIG" \
--container --heartbeat \
--account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" \
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}"
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}" "wandb_group=${DATE_STAMP}"
done
Loading