[Feature] Add remote policy module for inference server clients#3894
Draft
vmoens wants to merge 2 commits into
Draft
[Feature] Add remote policy module for inference server clients#3894vmoens wants to merge 2 commits into
vmoens wants to merge 2 commits into
Conversation
🔗 Helpful Links🧪 See artifacts and rendered test results at hud.pytorch.org/pr/pytorch/rl/3894
Note: Links to docs will display an error until the docs builds have been completed. ❗ 1 Active SEVsThere are 1 currently active SEVs. If your PR is affected, please view them below: ❌ 12 New FailuresAs of commit 2e76fc2 with merge base d7ef78b ( NEW FAILURES - The following jobs have failed:
This comment was automatically generated by Dr. CI and updates every 15 minutes. |
This was referenced Jun 21, 2026
Contributor
Benchmark Results: PR
|
| Benchmark | main ops | PR ops | Change |
|---|---|---|---|
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-ListStorage-RandomSampler-400] |
36.68 | 197.76 | +439.09% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] |
1,825 | 458.04 | -74.90% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-ListStorage-None-400] |
187.87 | 56.32 | -70.02% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] |
1,900 | 2,199 | +15.79% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-400] |
459.40 | 531.91 | +15.78% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] |
2,830 | 3,269 | +15.52% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[True-backward] |
53.07 | 60.25 | +13.53% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[True-backward] |
115.40 | 127.86 | +10.80% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-sampler7-10000] |
809.06 | 732.16 | -9.51% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] |
3,079 | 2,839 | -7.78% |
benchmarks/test_objectives_benchmarks.py::test_a2c_speed[True-backward] |
112.65 | 121.27 | +7.65% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-sampler6-10000] |
748.79 | 695.32 | -7.14% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[True-backward] |
135.74 | 145.43 | +7.14% |
benchmarks/test_objectives_benchmarks.py::test_redq_speed[True-None] |
216.79 | 232.21 | +7.11% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[100-img_shape1-atari] |
5,297 | 4,935 | -6.83% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[reduce-overhead-None] |
272.00 | 289.90 | +6.58% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-400] |
483.63 | 514.21 | +6.32% |
benchmarks/test_envs_benchmark.py::test_simple |
1.7003 | 1.8023 | +6.00% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[True-backward] |
109.32 | 115.46 | +5.62% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[pickle] |
11,819 | 12,424 | +5.12% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-False-0-gru] |
2.9511 | 3.0918 | +4.77% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[100-img_shape1-atari] |
682.19 | 714.40 | +4.72% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[True-backward] |
962.39 | 1,008 | +4.70% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-False-True] |
43,348 | 41,337 | -4.64% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[vec_generalized_advantage_estimate-False-1-512] |
2,290 | 2,394 | +4.52% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-False-True] |
33,179 | 31,733 | -4.36% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[reduce-overhead-None] |
471.26 | 491.78 | +4.35% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[False-None] |
88.16 | 91.94 | +4.29% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-100000-10000-100-True] |
24.32 | 25.36 | +4.29% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] |
3,150 | 3,022 | -4.06% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-single-True] |
1.3724 | 1.3171 | -4.02% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-False-True] |
38,397 | 36,881 | -3.95% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-cudnn-False-0-gru] |
1.3018 | 1.3525 | +3.90% |
benchmarks/test_objectives_benchmarks.py::test_redq_speed[False-backward] |
54.46 | 56.54 | +3.83% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[True-None] |
276.77 | 287.18 | +3.76% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[True-None] |
1,740 | 1,675 | -3.75% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-False-0-lstm] |
1.9671 | 2.0402 | +3.71% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-False-False] |
78,696 | 75,789 | -3.69% |
benchmarks/test_objectives_benchmarks.py::test_redq_speed[reduce-overhead-None] |
225.32 | 233.23 | +3.51% |
benchmarks/test_objectives_benchmarks.py::test_values[td1_return_estimate-False-False] |
38.14 | 39.45 | +3.43% |
benchmarks/test_envs_benchmark.py::test_transformed |
0.8790 | 0.9089 | +3.40% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[50-img_shape0-small] |
4,345 | 4,493 | +3.39% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[False-backward] |
62.37 | 64.48 | +3.38% |
benchmarks/test_objectives_benchmarks.py::test_values[td_lambda_return_estimate-True-False] |
25.57 | 26.43 | +3.38% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[safetensors] |
23,589 | 22,809 | -3.31% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] |
2,142 | 2,076 | -3.11% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-False-True] |
30,505 | 29,560 | -3.10% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[200-img_shape3-large_batch] |
761.69 | 739.40 | -2.93% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-True-True] |
20,005 | 19,422 | -2.92% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-1000000-10000-100-True] |
23.49 | 24.16 | +2.86% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-400] |
928.52 | 901.96 | -2.86% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[True-backward] |
249.51 | 256.49 | +2.79% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-False-False] |
64,236 | 62,518 | -2.67% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[False-backward] |
78.45 | 80.52 | +2.64% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[False-None] |
49.39 | 50.69 | +2.62% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-False-False-True] |
28,482 | 27,750 | -2.57% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-100000-10000-100-False] |
52.81 | 54.10 | +2.44% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-True-True] |
22,123 | 21,592 | -2.40% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[torch.save] |
7,103 | 7,273 | +2.38% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[False-None] |
162.23 | 166.07 | +2.37% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[False-backward] |
244.36 | 250.07 | +2.34% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[True-None] |
472.44 | 483.49 | +2.34% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-True-False] |
32,242 | 31,493 | -2.32% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[True-None] |
689.39 | 705.23 | +2.30% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[100-img_shape2-large_img] |
174.72 | 170.71 | -2.29% |
benchmarks/test_objectives_benchmarks.py::test_values[generalized_advantage_estimate-True-True] |
102.94 | 105.24 | +2.23% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-400] |
191.94 | 195.94 | +2.08% |
benchmarks/test_objectives_benchmarks.py::test_values[td0_return_estimate-False-False] |
8,179 | 8,010 | -2.07% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[50-img_shape0-small] |
3,548 | 3,476 | -2.05% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[200-img_shape3-large_batch] |
330.73 | 337.41 | +2.02% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] |
3,276 | 3,210 | -2.01% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[generalized_advantage_estimate-False-1-512] |
115.37 | 117.67 | +1.99% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[100-img_shape1-atari] |
273.26 | 278.54 | +1.93% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[100-img_shape2-large_img] |
397.14 | 404.53 | +1.86% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-True-False] |
38,842 | 38,120 | -1.86% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_without_rb[100-img_shape0-atari] |
30.26 | 30.82 | +1.83% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-True-True] |
20,686 | 20,310 | -1.82% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[untyped_storage] |
8.7959 | 8.6363 | -1.81% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-False-False] |
55,013 | 54,017 | -1.81% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sample_mixed_devices[1000000-memmap_cpu_storage_cpu... |
82.64 | 84.13 | +1.80% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-False-False] |
50,110 | 49,216 | -1.79% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[100-img_shape1-atari] |
640.48 | 651.81 | +1.77% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-cudnn-True-0-lstm] |
0.9554 | 0.9390 | -1.73% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-buffers-True] |
0.5397 | 0.5306 | -1.70% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[200-img_shape3-large_batch] |
308.56 | 313.77 | +1.69% |
benchmarks/test_objectives_benchmarks.py::test_cql_speed[True-None] |
83.92 | 85.28 | +1.63% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-True-False] |
34,735 | 34,172 | -1.62% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] |
2,741 | 2,696 | -1.62% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[True-None] |
116.42 | 118.29 | +1.61% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-10000-10000-100-True] |
25.75 | 26.16 | +1.60% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] |
3,236 | 3,185 | -1.57% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-1000000-10000-100-False] |
49.77 | 50.54 | +1.55% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-False-False] |
64,138 | 63,193 | -1.47% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-10000-10000-100-False] |
54.92 | 55.70 | +1.43% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-400] |
1,093 | 1,078 | -1.42% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-no-buffers-False] |
0.2218 | 0.2249 | +1.41% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[vec_generalized_advantage_estimate-True-32-512] |
28.60 | 29.00 | +1.39% |
benchmarks/test_collectors_benchmark.py::test_single |
9.0373 | 8.9127 | -1.38% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-False-True] |
30,357 | 29,942 | -1.37% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-True-True] |
20,478 | 20,753 | +1.34% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[100-img_shape2-large_img] |
418.72 | 424.16 | +1.30% |
benchmarks/test_envs_benchmark.py::test_parallel |
0.9795 | 0.9668 | -1.30% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-False-True] |
34,814 | 34,371 | -1.27% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-True-True] |
23,534 | 23,238 | -1.26% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-True-0-gru] |
4.1996 | 4.2506 | +1.22% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-False-True] |
37,281 | 37,719 | +1.17% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-True-0-lstm] |
3.0865 | 3.1227 | +1.17% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-cudnn-True-0-gru] |
1.4246 | 1.4079 | -1.17% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[50-img_shape0-small] |
870.32 | 880.47 | +1.17% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[False-None] |
721.45 | 713.07 | -1.16% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb[100-img_shape0-atari] |
26.36 | 26.67 | +1.15% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] |
2,536 | 2,565 | +1.14% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[reduce-overhead-None] |
706.19 | 714.15 | +1.13% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sampler_sample_scale[1000000-cpu] |
98.21 | 99.31 | +1.12% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[vec_generalized_advantage_estimate-False-32-512] |
552.97 | 559.04 | +1.10% |
benchmarks/test_objectives_benchmarks.py::test_values[vec_td1_return_estimate-False-False] |
54.39 | 54.98 | +1.09% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[True-backward] |
282.88 | 285.96 | +1.09% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-False-False] |
56,995 | 56,397 | -1.05% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] |
169.18 | 170.93 | +1.04% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[100-img_shape2-large_img] |
571.36 | 565.50 | -1.03% |
| ... | ... | ... | Showing 120 of 187 comparisons, sorted by absolute change. |
GPU
Compared 197 benchmarks. Regressions over 5%: 9. Improvements over 5%: 14.
| Benchmark | main ops | PR ops | Change |
|---|---|---|---|
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] |
2,666 | 3,549 | +33.14% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] |
2,553 | 3,373 | +32.12% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] |
3,554 | 2,571 | -27.65% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] |
2,851 | 3,485 | +22.25% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] |
2,827 | 3,360 | +18.86% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] |
1,971 | 2,300 | +16.69% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] |
1,898 | 2,189 | +15.31% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-sampler6-10000] |
675.79 | 777.66 | +15.07% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] |
3,138 | 3,569 | +13.71% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-400] |
476.20 | 532.79 | +11.88% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] |
2,067 | 2,293 | +10.93% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[True-backward] |
980.18 | 881.93 | -10.02% |
benchmarks/test_collectors_benchmark.py::test_single_with_rb_pixels |
5.3369 | 4.8024 | -10.01% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-400] |
731.43 | 803.62 | +9.87% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[reduce-overhead-None] |
115.80 | 105.52 | -8.88% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[untyped_storage] |
8.9035 | 8.2128 | -7.76% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-400] |
464.99 | 498.56 | +7.22% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[True-backward] |
333.59 | 355.99 | +6.71% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-400] |
480.87 | 449.07 | -6.61% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-400] |
1,014 | 949.23 | -6.37% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[True-backward] |
313.98 | 332.15 | +5.78% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[100-img_shape2-large_img] |
423.22 | 399.91 | -5.51% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[numpy] |
377,078 | 356,924 | -5.34% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[100-img_shape2-large_img] |
407.16 | 386.85 | -4.99% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] |
2,918 | 2,775 | -4.91% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[True-backward] |
239.94 | 250.89 | +4.56% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[reduce-overhead-None] |
1,846 | 1,927 | +4.41% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[100-img_shape2-large_img] |
568.42 | 544.59 | -4.19% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-sampler7-10000] |
766.70 | 735.17 | -4.11% |
benchmarks/test_objectives_benchmarks.py::test_values[vec_generalized_advantage_estimate-True-True] |
309.32 | 297.01 | -3.98% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-False-False] |
63,767 | 66,222 | +3.85% |
benchmarks/test_envs_benchmark.py::test_simple |
1.2485 | 1.2009 | -3.81% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-True-0-gru] |
47.43 | 49.23 | +3.81% |
benchmarks/test_objectives_benchmarks.py::test_cql_speed[True-backward] |
218.04 | 226.33 | +3.80% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-True-False] |
32,724 | 31,551 | -3.59% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-True-True] |
20,029 | 19,329 | -3.49% |
benchmarks/test_objectives_benchmarks.py::test_a2c_speed[True-None] |
741.71 | 715.92 | -3.48% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-False-False] |
49,796 | 48,087 | -3.43% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_without_rb[100-img_shape0-atari] |
30.53 | 29.48 | -3.42% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[100-img_shape2-large_img] |
172.99 | 167.13 | -3.38% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[True-backward] |
367.17 | 355.08 | -3.29% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-False-True] |
31,023 | 30,005 | -3.28% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[vec_generalized_advantage_estimate-False-1-512] |
1,353 | 1,308 | -3.27% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-ListStorage-RandomSampler-400] |
37.22 | 38.42 | +3.22% |
benchmarks/test_objectives_benchmarks.py::test_a2c_speed[False-backward] |
146.37 | 151.05 | +3.20% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[False-None] |
113.52 | 109.92 | -3.17% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sampler_sample_scale[1000000-cuda] |
2,181 | 2,249 | +3.12% |
benchmarks/test_objectives_benchmarks.py::test_values[generalized_advantage_estimate-True-True] |
47.87 | 46.44 | -2.99% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[False-backward] |
279.30 | 272.04 | -2.60% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-False-False-False] |
45,448 | 46,615 | +2.57% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[True-None] |
1,875 | 1,923 | +2.51% |
benchmarks/test_objectives_benchmarks.py::test_values[td1_return_estimate-False-False] |
20.23 | 19.74 | -2.42% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[100-img_shape1-atari] |
278.80 | 272.30 | -2.33% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-buffers-False] |
0.5910 | 0.6047 | +2.32% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sample_mixed_devices[1000000-cuda_storage_cpu_sampler] |
90.21 | 88.12 | -2.32% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-False-True] |
41,096 | 42,032 | +2.28% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_without_rb_cuda[200-img_shape1-large_batch] |
8.8590 | 8.6596 | -2.25% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[100-img_shape1-atari] |
4,002 | 4,091 | +2.22% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-False-False] |
57,776 | 56,527 | -2.16% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[False-backward] |
84.60 | 82.83 | -2.08% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb_cuda[100-img_shape0-atari] |
16.94 | 16.59 | -2.07% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-True-True] |
20,981 | 20,546 | -2.07% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_without_rb_cuda[100-img_shape0-atari] |
17.75 | 17.38 | -2.07% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb_cuda[200-img_shape1-large_batch] |
8.5101 | 8.3345 | -2.06% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[reduce-overhead-None] |
794.19 | 810.39 | +2.04% |
benchmarks/test_objectives_benchmarks.py::test_cql_speed[reduce-overhead-None] |
87.60 | 89.32 | +1.97% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-True-True] |
18,794 | 18,430 | -1.94% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-True-False] |
39,160 | 38,406 | -1.92% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[True-None] |
749.97 | 735.63 | -1.91% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-True-False] |
34,446 | 33,807 | -1.85% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-False-False] |
55,069 | 54,054 | -1.84% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-False-0-gru] |
22.91 | 22.50 | -1.78% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_without_rb[200-img_shape1-large_batch] |
15.37 | 15.11 | -1.69% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-True-False] |
29,462 | 28,971 | -1.67% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-10000-10000-100-True] |
23.24 | 23.62 | +1.64% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[50-img_shape0-small] |
857.67 | 871.41 | +1.60% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] |
167.49 | 170.16 | +1.59% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb[200-img_shape1-large_batch] |
13.52 | 13.31 | -1.59% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-True-False] |
34,612 | 34,064 | -1.58% |
benchmarks/test_objectives_benchmarks.py::test_a2c_speed[reduce-overhead-None] |
854.00 | 867.54 | +1.58% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[reduce-overhead-None] |
128.44 | 130.46 | +1.57% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-400] |
198.19 | 195.08 | -1.57% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[True-backward] |
471.81 | 479.12 | +1.55% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-True-False] |
32,172 | 31,678 | -1.54% |
benchmarks/test_envs_benchmark.py::test_transformed |
0.7019 | 0.7124 | +1.50% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[50-img_shape0-small] |
3,561 | 3,511 | -1.40% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-False-True] |
37,775 | 37,250 | -1.39% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-True-False] |
42,390 | 41,817 | -1.35% |
benchmarks/test_envs_benchmark.py::test_serial |
0.4216 | 0.4269 | +1.26% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb[100-img_shape0-atari] |
26.41 | 26.08 | -1.22% |
benchmarks/test_envs_benchmark.py::test_parallel |
0.5503 | 0.5436 | -1.21% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-100000-10000-100-True] |
22.76 | 23.03 | +1.18% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[reduce-overhead-None] |
42.68 | 43.17 | +1.16% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[200-img_shape3-large_batch] |
745.76 | 737.19 | -1.15% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-False-False-True] |
28,893 | 28,562 | -1.15% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-False-0-lstm] |
21.59 | 21.35 | -1.14% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[200-img_shape3-large_batch] |
327.61 | 323.99 | -1.11% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-no-buffers-True] |
0.2109 | 0.2132 | +1.10% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[safetensors] |
23,571 | 23,829 | +1.09% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[reduce-overhead-None] |
107.18 | 106.02 | -1.09% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[generalized_advantage_estimate-False-1-512] |
46.23 | 46.71 | +1.04% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-True-True] |
23,376 | 23,133 | -1.04% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-False-False] |
63,312 | 62,684 | -0.99% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-False-False] |
75,662 | 76,407 | +0.98% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-400] |
981.64 | 991.03 | +0.96% |
benchmarks/test_objectives_benchmarks.py::test_a2c_speed[True-backward] |
361.90 | 365.23 | +0.92% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[False-backward] |
453.98 | 458.14 | +0.92% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[False-backward] |
70.95 | 71.60 | +0.92% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-False-True] |
32,598 | 32,302 | -0.91% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[False-None] |
97.40 | 98.26 | +0.88% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[50-img_shape0-small] |
6,024 | 5,971 | -0.88% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[200-img_shape3-large_batch] |
306.42 | 303.72 | -0.88% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[100-img_shape1-atari] |
697.50 | 691.38 | -0.88% |
benchmarks/test_objectives_benchmarks.py::test_values[td0_return_estimate-False-False] |
11,727 | 11,625 | -0.87% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] |
1,915 | 1,931 | +0.87% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-serial-buffers-True] |
0.5139 | 0.5181 | +0.83% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-False-True] |
30,840 | 30,589 | -0.82% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[True-None] |
821.40 | 827.75 | +0.77% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-1000000-10000-100-False] |
48.68 | 48.31 | -0.76% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-True-True] |
22,075 | 21,911 | -0.74% |
| ... | ... | ... | Showing 120 of 197 comparisons, sorted by absolute change. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Stack from ghstack (oldest at bottom):