Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE RelWithDebInfo)
endif()

# https://github.com/NVIDIA/TorchFort/issues/3
#https: // github.com/NVIDIA/TorchFort/issues/3
cmake_policy(SET CMP0057 NEW)

# User-defined build options
#User - defined build options
set(TORCHFORT_CUDA_CC_LIST "70;80;90" CACHE STRING "List of CUDA compute capabilities to build torchfort for.")
set(TORCHFORT_NCCL_ROOT CACHE STRING "Path to search for NCCL installation. Default NVIDA HPC SDK provided NCCL version if available.")
set(TORCHFORT_YAML_CPP_ROOT CACHE STRING "Path to search for yaml-cpp installation.")
Expand All @@ -16,7 +16,7 @@ option(TORCHFORT_BUILD_EXAMPLES "Build examples" OFF)
option(TORCHFORT_BUILD_TESTS "Build tests" OFF)
option(TORCHFORT_ENABLE_GPU "Enable GPU/CUDA support" ON)

# For backward-compatibility with existing variable
#For backward - compatibility with existing variable
if (YAML_CPP_ROOT)
set(TORCHFORT_YAML_CPP_ROOT ${YAML_CPP_ROOT})
endif()
Expand All @@ -34,13 +34,12 @@ endif()
project(torchfort LANGUAGES ${LANGS})

if (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
# __rdtsc() in torch not supported by nvc++. Use g++ for CXX files.
#__rdtsc() in torch not supported by nvc++.Use g++ for CXX files.
message(FATAL_ERROR "TorchFort does not support compilation of C++ files with nvc++. "
"Set CMAKE_CXX_COMPILER to g++ to proceed.")
endif()


# unit testing with gtest
#unit testing with gtest
if (TORCHFORT_BUILD_TESTS)
enable_testing()
include(CTest)
Expand All @@ -49,29 +48,29 @@ if (TORCHFORT_BUILD_TESTS)
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
#For Windows : Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
include(GoogleTest)
endif()

# MPI
#MPI
find_package(MPI REQUIRED)

# CUDA
#CUDA
if (TORCHFORT_ENABLE_GPU)
find_package(CUDAToolkit REQUIRED)

# HPC SDK
# Locate and append NVHPC CMake configuration if available
#HPC SDK
#Locate and append NVHPC CMake configuration if available
find_program(NVHPC_CXX_BIN "nvc++")
if (NVHPC_CXX_BIN)
string(REPLACE "compilers/bin/nvc++" "cmake" NVHPC_CMAKE_DIR ${NVHPC_CXX_BIN})
set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};${NVHPC_CMAKE_DIR}")
find_package(NVHPC COMPONENTS "")
endif()
# Get NCCL library (with optional override)

#Get NCCL library(with optional override)
if (TORCHFORT_NCCL_ROOT)
find_path(NCCL_INCLUDE_DIR REQUIRED
NAMES nccl.h
Expand Down Expand Up @@ -103,8 +102,8 @@ if (TORCHFORT_ENABLE_GPU)

message(STATUS "Using NCCL library: ${NCCL_LIBRARY}")

# PyTorch
# Set TORCH_CUDA_ARCH_LIST string to match TORCHFORT_CUDA_CC_LIST
#PyTorch
#Set TORCH_CUDA_ARCH_LIST string to match TORCHFORT_CUDA_CC_LIST
foreach(CUDA_CC ${TORCHFORT_CUDA_CC_LIST})
string(REGEX REPLACE "([0-9])$" ".\\1" CUDA_CC_W_DOT ${CUDA_CC})
list(APPEND TORCH_CUDA_ARCH_LIST ${CUDA_CC_W_DOT})
Expand All @@ -114,15 +113,15 @@ endif()

find_package(Torch REQUIRED)

# Generate configuration header
#Generate configuration header
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/src/csrc/include/torchfort_config.h.in
${CMAKE_BINARY_DIR}/include/torchfort_config.h
@ONLY
)

# yaml-cpp
#find_package(yaml-cpp REQUIRED)
#yaml - cpp
#find_package(yaml - cpp REQUIRED)
find_path(YAML_CPP_INCLUDE_DIR REQUIRED
NAMES yaml-cpp/yaml.h
HINTS ${TORCHFORT_YAML_CPP_ROOT}/include
Expand All @@ -133,7 +132,7 @@ find_library(YAML_CPP_LIBRARY REQUIRED
)
message(STATUS "Using yaml-cpp library: ${YAML_CPP_LIBRARY}")

# C/C++ shared library
#C / C++ shared library
add_library(${PROJECT_NAME} SHARED)
set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)

Expand Down Expand Up @@ -165,6 +164,7 @@ target_sources(${PROJECT_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/src/csrc/models/rl/sac_model.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/csrc/rl/policy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/csrc/rl/running_normalizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/csrc/rl/setup.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/csrc/rl/utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/csrc/rl/off_policy/interface.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/csrc/rl/off_policy/ddpg.cpp
Expand Down Expand Up @@ -219,17 +219,17 @@ install(
INCLUDES DESTINATION ${CMAKE_INSTALL_PREFIX}/include
)

# Install generated configuration header
#Install generated configuration header
install(
FILES ${CMAKE_BINARY_DIR}/include/torchfort_config.h
DESTINATION ${CMAKE_INSTALL_PREFIX}/include
)

# Fortran library and module
#Fortran library and module
if (TORCHFORT_BUILD_FORTRAN)

if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
# Creating -gpu argument string for nvfortran GPU compilation
#Creating - gpu argument string for nvfortran GPU compilation
foreach(CUDA_CC ${TORCHFORT_CUDA_CC_LIST})
list(APPEND CUF_GPU_ARG "cc${CUDA_CC}")
endforeach()
Expand All @@ -256,17 +256,17 @@ if (TORCHFORT_BUILD_FORTRAN)
install(
TARGETS "${PROJECT_NAME}_fort"
)
# install Fortran module
#install Fortran module
install(FILES ${CMAKE_BINARY_DIR}/include/torchfort.mod DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
endif()

# install Python files
#install Python files
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/python/wandb_helper.py DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/python)

# install docs
#install docs
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/docs DESTINATION ${CMAKE_INSTALL_PREFIX})

# build examples
#build examples
if (TORCHFORT_BUILD_EXAMPLES)
add_subdirectory(examples/cpp/cart_pole)
if (TORCHFORT_BUILD_FORTRAN)
Expand All @@ -275,7 +275,7 @@ if (TORCHFORT_BUILD_EXAMPLES)
endif()
endif()

# build tests
#build tests
if (TORCHFORT_BUILD_TESTS)
add_subdirectory(tests/general)
add_subdirectory(tests/supervised)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# TorchFort
#TorchFort

An Online Deep Learning Interface for HPC programs on NVIDIA GPUs

Expand Down
23 changes: 22 additions & 1 deletion docs/api/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ The block in the configuration file defining algorithm properties takes the foll
parameters:
<option> = <value>

Currently, only type ``uniform`` is supported. The following table lists the available options:
Currently, types ``uniform`` and ``prioritized`` are supported. The following table lists the available options:

+---------------------------+-----------------+-----------------+------------------------------------------------------------------+
| Replay Buffer Type | Option | Data Type | Description |
Expand All @@ -414,11 +414,32 @@ Currently, only type ``uniform`` is supported. The following table lists the ava
+ +-----------------+-----------------+------------------------------------------------------------------+
| | ``n_envs`` | integer | Number of environments |
+---------------------------+-----------------+-----------------+------------------------------------------------------------------+
| ``prioritized`` | ``min_size`` | integer | Minimum number of samples before buffer is ready for training |
+ +-----------------+-----------------+------------------------------------------------------------------+
| | ``max_size`` | integer | Maximum capacity |
+ +-----------------+-----------------+------------------------------------------------------------------+
| | ``n_envs`` | integer | Number of environments |
+ +-----------------+-----------------+------------------------------------------------------------------+
| | ``alpha`` | float | Prioritization exponent; 0=uniform, 1=full (default 0.6) |
+ +-----------------+-----------------+------------------------------------------------------------------+
| | ``beta0`` | float | Initial importance-sampling weight exponent (default 0.4) |
+ +-----------------+-----------------+------------------------------------------------------------------+
| | ``beta_max`` | float | Final importance-sampling weight exponent (default 1.0) |
+ +-----------------+-----------------+------------------------------------------------------------------+
| | ``beta_steps`` | integer | Steps to anneal beta from beta0 to beta_max (default 100000) |
+---------------------------+-----------------+-----------------+------------------------------------------------------------------+

Note that the effective sizes for each environment is :math:`\mathrm{min\_size} / \mathrm{n\_envs}` and :math:`\mathrm{max\_size} / \mathrm{n\_envs}`.
You need to ensure that you can store at least one sample for each environment. However, for better algorithm performance, it is highly advised to provide buffers
which can store longer trajectories.

The ``prioritized`` buffer implements Prioritized Experience Replay (`Schaul et al., 2016 <https://arxiv.org/abs/1511.05952>`_), sampling
transitions in proportion to their last observed temporal-difference (TD) error rather than uniformly. The degree of prioritization is controlled
by ``alpha`` (with ``alpha = 0`` recovering uniform sampling), and the resulting sampling bias is corrected by importance-sampling weights whose
exponent ``beta`` is annealed linearly from ``beta0`` to ``beta_max`` over ``beta_steps`` sampling steps. All off-policy algorithms (``DDPG``,
``TD3``, ``SAC``) transparently apply these importance-sampling weights to their losses and feed the per-sample TD errors back to update the
priorities; no changes to the algorithm configuration are required to switch between ``uniform`` and ``prioritized`` buffers.

For on-policy algorithms, the block looks as follows:

.. code-block:: yaml
Expand Down
22 changes: 11 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# basic packages
ruamel-yaml
#basic packages
ruamel - yaml
Comment thread
azrael417 marked this conversation as resolved.
Outdated

# pytorch and some dependencies
torch==2.8.0
#pytorch and some dependencies
torch ==
2.8.0

# training monitoring
wandb
#training monitoring
wandb

# RL example visualization related
pygame
moviepy
#RL example visualization related
pygame moviepy

# Supervised learning example visualization related
matplotlib
#Supervised learning example visualization related
matplotlib
12 changes: 5 additions & 7 deletions src/csrc/include/internal/rl/off_policy/ddpg.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ template <typename T>
void train_ddpg(const ModelPack& p_model, const ModelPack& p_model_target, const ModelPack& q_model,
const ModelPack& q_model_target, torch::Tensor state_old_tensor, torch::Tensor state_new_tensor,
torch::Tensor action_old_tensor, torch::Tensor action_new_tensor, torch::Tensor reward_tensor,
torch::Tensor d_tensor, const T& gamma, const T& rho, T& p_loss_val, T& q_loss_val) {
torch::Tensor d_tensor, torch::Tensor is_weights, const T& gamma, const T& rho,
torch::Tensor& td_errors, T& p_loss_val, T& q_loss_val) {

// nvtx marker
torchfort::nvtx::rangePush("torchfort_train_ddpg");
Expand All @@ -72,10 +73,6 @@ void train_ddpg(const ModelPack& p_model, const ModelPack& p_model_target, const
// value functions
q_model.model->train();

// opt
// loss is fixed by algorithm
auto q_loss_func = torch::nn::MSELoss(torch::nn::MSELossOptions().reduction(torch::kMean));

// policy function
// compute y: use the target models for q_new, no grads
torch::Tensor y_tensor;
Expand All @@ -87,10 +84,11 @@ void train_ddpg(const ModelPack& p_model, const ModelPack& p_model_target, const
}

// backward and update step
// compute loss
// IS-weighted MSE loss: mean(w * (q - y)^2)
torch::Tensor q_old_tensor =
torch::squeeze(q_model.model->forward(std::vector<torch::Tensor>{state_old_tensor, action_old_tensor})[0], 1);
torch::Tensor q_loss_tensor = q_loss_func->forward(q_old_tensor, y_tensor);
td_errors = torch::abs(q_old_tensor - y_tensor).detach();
torch::Tensor q_loss_tensor = torch::mean(is_weights * torch::square(q_old_tensor - y_tensor));

auto state = q_model.state;
if (state->step_train_current % q_model.grad_accumulation_steps == 0) {
Expand Down
15 changes: 7 additions & 8 deletions src/csrc/include/internal/rl/off_policy/sac.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ template <typename T>
void train_sac(const PolicyPack& p_model, const std::vector<ModelPack>& q_models,
const std::vector<ModelPack>& q_models_target, torch::Tensor state_old_tensor,
torch::Tensor state_new_tensor, torch::Tensor action_old_tensor, torch::Tensor reward_tensor,
torch::Tensor d_tensor, const std::shared_ptr<AlphaModel>& alpha_model,
torch::Tensor d_tensor, torch::Tensor is_weights, const std::shared_ptr<AlphaModel>& alpha_model,
const std::shared_ptr<torch::optim::Optimizer>& alpha_optimizer,
const std::shared_ptr<BaseLRScheduler>& alpha_lr_scheduler, const T& target_entropy, const T& gamma,
const T& rho, T& p_loss_val, T& q_loss_val) {
const T& rho, torch::Tensor& td_errors, T& p_loss_val, T& q_loss_val) {

// nvtx marker
torchfort::nvtx::rangePush("torchfort_train_sac");
Expand All @@ -84,10 +84,6 @@ void train_sac(const PolicyPack& p_model, const std::vector<ModelPack>& q_models
q_model_target.model->train();
}

// opt
// loss is fixed by algorithm
auto q_loss_func = torch::nn::MSELoss(torch::nn::MSELossOptions().reduction(torch::kMean));

// if we are updating the entropy coefficient, do that first
torch::Tensor alpha_loss;
auto state = p_model.state;
Expand Down Expand Up @@ -168,9 +164,12 @@ void train_sac(const PolicyPack& p_model, const std::vector<ModelPack>& q_models
}

// backward and update step
// IS-weighted MSE loss: mean(w * (q - y)^2), summed across critics
// td_errors taken from first critic only
torch::Tensor q_old_tensor =
torch::squeeze(q_models[0].model->forward(std::vector<torch::Tensor>{state_old_tensor, action_old_tensor})[0], 1);
torch::Tensor q_loss_tensor = q_loss_func->forward(q_old_tensor, y_tensor);
td_errors = torch::abs(q_old_tensor - y_tensor).detach();
torch::Tensor q_loss_tensor = torch::mean(is_weights * torch::square(q_old_tensor - y_tensor));
state = q_models[0].state;
if (state->step_train_current % q_models[0].grad_accumulation_steps == 0) {
q_models[0].optimizer->zero_grad();
Expand All @@ -179,7 +178,7 @@ void train_sac(const PolicyPack& p_model, const std::vector<ModelPack>& q_models
// compute loss
q_old_tensor = torch::squeeze(
q_models[i].model->forward(std::vector<torch::Tensor>{state_old_tensor, action_old_tensor})[0], 1);
q_loss_tensor = q_loss_tensor + q_loss_func->forward(q_old_tensor, y_tensor);
q_loss_tensor = q_loss_tensor + torch::mean(is_weights * torch::square(q_old_tensor - y_tensor));
state = q_models[i].state;
if (state->step_train_current % q_models[i].grad_accumulation_steps == 0) {
q_models[i].optimizer->zero_grad();
Expand Down
16 changes: 7 additions & 9 deletions src/csrc/include/internal/rl/off_policy/td3.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ template <typename T>
void train_td3(const ModelPack& p_model, const ModelPack& p_model_target, const std::vector<ModelPack>& q_models,
const std::vector<ModelPack>& q_models_target, torch::Tensor state_old_tensor,
torch::Tensor state_new_tensor, torch::Tensor action_old_tensor, torch::Tensor action_new_tensor,
torch::Tensor reward_tensor, torch::Tensor d_tensor, const T& gamma, const T& rho, T& p_loss_val,
T& q_loss_val, bool update_policy) {
torch::Tensor reward_tensor, torch::Tensor d_tensor, torch::Tensor is_weights, const T& gamma,
const T& rho, torch::Tensor& td_errors, T& p_loss_val, T& q_loss_val, bool update_policy) {

// nvtx marker
torchfort::nvtx::rangePush("torchfort_train_td3");
Expand All @@ -76,10 +76,6 @@ void train_td3(const ModelPack& p_model, const ModelPack& p_model_target, const
q_model.model->train();
}

// opt
// loss is fixed by algorithm
auto q_loss_func = torch::nn::MSELoss(torch::nn::MSELossOptions().reduction(torch::kMean));

// policy function
// compute y: use the target models for q_new, no grads
torch::Tensor y_tensor;
Expand All @@ -96,18 +92,20 @@ void train_td3(const ModelPack& p_model, const ModelPack& p_model_target, const
}

// backward and update step
// compute loss for critics and zero grads while we are at it
// IS-weighted MSE loss: mean(w * (q - y)^2), summed across critics
// td_errors taken from first critic only (consistent with policy update using q_models[0])
torch::Tensor q_old_tensor =
torch::squeeze(q_models[0].model->forward(std::vector<torch::Tensor>{state_old_tensor, action_old_tensor})[0], 1);
torch::Tensor q_loss_tensor = q_loss_func->forward(q_old_tensor, y_tensor);
td_errors = torch::abs(q_old_tensor - y_tensor).detach();
torch::Tensor q_loss_tensor = torch::mean(is_weights * torch::square(q_old_tensor - y_tensor));
auto state = q_models[0].state;
if (state->step_train_current % q_models[0].grad_accumulation_steps == 0) {
q_models[0].optimizer->zero_grad();
}
for (int i = 1; i < q_models.size(); ++i) {
q_old_tensor = torch::squeeze(
q_models[i].model->forward(std::vector<torch::Tensor>{state_old_tensor, action_old_tensor})[0], 1);
q_loss_tensor = q_loss_tensor + q_loss_func->forward(q_old_tensor, y_tensor);
q_loss_tensor = q_loss_tensor + torch::mean(is_weights * torch::square(q_old_tensor - y_tensor));
state = q_models[i].state;
if (state->step_train_current % q_models[i].grad_accumulation_steps == 0) {
q_models[i].optimizer->zero_grad();
Expand Down
Loading
Loading