diff --git a/scripts/cluster_configs/nightly_best.yaml b/scripts/cluster_configs/nightly_best.yaml new file mode 100644 index 0000000000..40739bd7f1 --- /dev/null +++ b/scripts/cluster_configs/nightly_best.yaml @@ -0,0 +1,131 @@ +# Multi-agent "best launch" nightly training program config. Multi-agent +# gigaflow over the 8 local CARLA maps, full reward shaping (conditioning + +# randomization on), partner-blindness / phantom-braking perturbations +# enabled. Keys here override pufferlib/config/ocean/drive.ini. +# +# Launch via scripts/launch_nightly_best.sh (3 seeds, date-stamped). + +# Environment — multi-agent gigaflow over all 8 local CARLA towns +env.simulation_mode: gigaflow +env.map_dir: pufferlib/resources/drive/binaries/carla +env.num_maps: 8 +env.num_agents: 720000 +env.min_agents_per_env: 1 +env.max_agents_per_env: 150 +env.use_map_cache: 1 +env.scenario_length: 1200 +# 0 disables periodic scenario resampling — every sub-env keeps the same map +# for the full run instead of swapping every 38400 steps. +env.resample_frequency: 0 +env.termination_mode: 1 +env.inactive_agent_threshold: 0.4 +env.dynamics_model: jerk +env.target_type: static +env.spawn_initial_speed: 0.0 +env.dt: 0.3 +env.traffic_light_behavior: 1 +env.collision_behavior: 1 +env.offroad_behavior: 1 + +# Goal setup — three sequential waypoints, route-based placement [20, 60m] +env.num_target_waypoints: 3 +env.min_waypoint_spacing: 20.0 +env.max_waypoint_spacing: 60.0 +env.goal_radius: 2.0 +env.goal_speed: 3.0 + +# Observation shaping +env.obs_slots_lane_n: 80 +env.obs_slots_boundary_n: 80 +env.obs_slots_partners_n: 16 +env.obs_slots_traffic_controls_n: 4 +env.obs_range_partner_m: 200.0 +env.obs_range_road_front_m: 200.0 +env.obs_range_road_behind_m: 40.0 +env.obs_range_road_side_m: 50.0 +env.obs_range_traffic_control_m: 100.0 +env.obs_norm_xy_offset_m: 200.0 +env.obs_norm_goal_offset_m: 200.0 +env.obs_norm_road_seg_length_m: 10.0 +env.obs_norm_road_seg_width_m: 5.0 +env.obs_norm_veh_length_m: 15.0 +env.obs_norm_veh_width_m: 10.0 +env.obs_dropout_lane: 0.5 +env.obs_dropout_boundary: 0.4 + +# Perturbations (on during training; eval's clean macro zeros these) +env.partner_blindness_prob: 0.03 +env.partner_blindness_trigger_prob: 0.05 +env.phantom_braking_prob: 0.02 +env.phantom_braking_trigger_prob: 0.02 +env.phantom_braking_duration: 10 + +# Reward shaping (conditioning + randomization on) +env.reward_conditioning: true +env.reward_randomization: true +env.reward_goal: 1.0 +env.reward_collision: 1.5 +env.reward_offroad: 1.5 +env.reward_stop_line: 1.0 +env.reward_comfort: 0.05 +env.reward_lane_align: 0.025 +env.reward_vel_align: 1.0 +env.reward_lane_center: 0.005 +env.reward_velocity: 0.0025 +env.reward_reverse: 0.005 +env.reward_timestep: 2.5e-05 +env.reward_overspeed: 0.05 + +# Policy — 3x1024 backbone, split actor/critic, gigaflow encoder +policy.input_size: 256 +policy.backbone_hidden_size: 1024 +policy.backbone_num_layers: 3 +policy.actor_hidden_size: 1024 +policy.actor_num_layers: 0 +policy.critic_hidden_size: 1024 +policy.critic_num_layers: 0 +policy.split_network: true +policy.encoder_gigaflow: true +policy.dropout: 0.0 + +# Training — 10B steps, large minibatch, compiled bfloat16 +train.total_timesteps: 10_000_000_000 +train.learning_rate: 0.0005 +train.minibatch_size: 153600 +train.max_minibatch_size: 153600 +train.update_epochs: 3 +train.bptt_horizon: 128 +train.compile: true +train.precision: bfloat16 +train.normalize_rewards: false +train.checkpoint_interval: 500 +train.optimizer: adamw + +# Eval — keep validation_gigaflow (CARLA sweep) inline, disable everything else +# (validation_replay needs nuPlan bins; behaviors_* need labelled scene +# categories not used in this nightly). Interval 250 keeps eval cost ~5% of +# wall-clock instead of ~85%. +eval.validation_defaults.interval: 250 +eval.validation_replay.enabled: 0 +eval.validation_gigaflow.render_backend: egl +eval.behaviors_full_dir.enabled: 0 +eval.behaviors_hard_stop.enabled: 0 +eval.behaviors_highway_straight.enabled: 0 +eval.behaviors_lane_change.enabled: 0 +eval.behaviors_merge.enabled: 0 +eval.behaviors_parked_cars.enabled: 0 +eval.behaviors_roundabout.enabled: 0 +eval.behaviors_stopped_traffic.enabled: 0 +eval.behaviors_traffic_light_green.enabled: 0 +eval.behaviors_traffic_light_stop.enabled: 0 +eval.behaviors_unprotected_left.enabled: 0 +eval.behaviors_unprotected_right.enabled: 0 + +# W&B — group has no space (submit_cluster.py joins the inner command +# without quoting arg values). Launchers (launch_nightly_best.sh and +# Modal's nightly()) override wandb_group to today's date at launch so +# runs cluster by night in the UI; the static value here is just the +# fallback for ad-hoc invocations. +wandb: True +wandb_project: nightly-multi +wandb_group: nightly-multi diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml index c4b0a8372b..d0f62e0ec2 100644 --- a/scripts/cluster_configs/single_agent_speed_run.yaml +++ b/scripts/cluster_configs/single_agent_speed_run.yaml @@ -67,6 +67,9 @@ eval.behaviors_unprotected_right.enabled: 0 # W&B. Group has no space: submit_cluster.py joins the inner command into a # bash -c string without quoting arg values, so a space would split the arg. +# Launchers (launch_single_agent.sh and Modal's nightly()) override +# wandb_group to today's date at launch so runs cluster by night in the UI; +# the static value here is just the fallback for ad-hoc invocations. wandb: True -wandb_project: single_agent_nightly_test -wandb_group: Nightly_Test +wandb_project: nightly-single +wandb_group: nightly-single diff --git a/scripts/launch_nightly_best.sh b/scripts/launch_nightly_best.sh new file mode 100755 index 0000000000..438a28f102 --- /dev/null +++ b/scripts/launch_nightly_best.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Launch multi-agent "nightly best" training on the cluster via submit_cluster.py. +# Mirrors launch_single_agent.sh but uses nightly_best.yaml (multi-agent +# gigaflow over 8 CARLA towns, 10B total steps). Code-isolated per run, +# container-wrapped, gpu-heartbeated, date-stamped wandb run names. +# +# Run on the login node (it sources the venv and submits from there): +# ./scripts/launch_nightly_best.sh +# +# Overridable via the environment: +# PROGRAM_CONFIG program_config YAML (default: nightly_best.yaml) +# SEEDS colon sweep passed to --args train.seed (default 0:1:2 -> 3 jobs) +# ACCOUNT/PARTITION/TIME SLURM overrides +# MEM SLURM --mem (default 192gb; the multi-agent config plus +# inline validation_gigaflow eval can spike past 128gb at +# epoch 250) +# PREFIX run-name prefix (default _multi_agent) +# +# Examples: +# SEEDS=0 ./scripts/launch_nightly_best.sh # one-seed dry run +# PARTITION=h100_tandon ./scripts/launch_nightly_best.sh # if h200 QOS is full +set -euo pipefail + +PROGRAM_CONFIG="${PROGRAM_CONFIG:-scripts/cluster_configs/nightly_best.yaml}" +COMPUTE_CONFIG="${COMPUTE_CONFIG:-scripts/cluster_configs/nyu_greene.yaml}" +ACCOUNT="${ACCOUNT:-torch_pr_924_tandon_advanced}" +PARTITION="${PARTITION:-h200_tandon}" +TIME="${TIME:-1800}" +MEM="${MEM:-192gb}" +SEEDS="${SEEDS:-0:1:2}" +PREFIX="${PREFIX:-$(date +%Y-%m-%d)_multi_agent}" +DATE_STAMP="$(date +%Y-%m-%d)" + +source "/scratch/$USER/venvs/pufferdrive/bin/activate" + +# One submission per seed so we can pass a per-seed run_name (wandb display +# name like 2026-05-31_seed0). +IFS=':' read -ra SEED_LIST <<< "$SEEDS" +for SEED in "${SEED_LIST[@]}"; do + python scripts/submit_cluster.py \ + --save_dir "/scratch/$USER/runs" \ + --prefix "$PREFIX" \ + --compute_config "$COMPUTE_CONFIG" \ + --program_config "$PROGRAM_CONFIG" \ + --container --heartbeat \ + --account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" --mem "$MEM" \ + --args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}" "wandb_group=${DATE_STAMP}" +done diff --git a/scripts/launch_single_agent.sh b/scripts/launch_single_agent.sh index e0e1135ab2..b867400c77 100755 --- a/scripts/launch_single_agent.sh +++ b/scripts/launch_single_agent.sh @@ -41,5 +41,5 @@ for SEED in "${SEED_LIST[@]}"; do --program_config "$PROGRAM_CONFIG" \ --container --heartbeat \ --account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" \ - --args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}" + --args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}" "wandb_group=${DATE_STAMP}" done