diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2b8c6934f9..718028541d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -282,6 +282,8 @@ if(RAFT_COMPILE_LIBRARY)
     src/raft_runtime/solver/lanczos_solver_int64_float.cu
     src/raft_runtime/solver/lanczos_solver_int_double.cu
     src/raft_runtime/solver/lanczos_solver_int_float.cu
+    src/raft_runtime/solver/randomized_svds_float.cu
+    src/raft_runtime/solver/randomized_svds_double.cu
   )
   set_target_properties(
     raft_objs
diff --git a/cpp/include/raft/sparse/solver/detail/cholesky_qr.cuh b/cpp/include/raft/sparse/solver/detail/cholesky_qr.cuh
new file mode 100644
index 0000000000..2176c38b2b
--- /dev/null
+++ b/cpp/include/raft/sparse/solver/detail/cholesky_qr.cuh
@@ -0,0 +1,159 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusolver_dn_handle.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/linalg/detail/cublas_wrappers.hpp>
+#include <raft/linalg/detail/cusolver_wrappers.hpp>
+#include <raft/linalg/gemm.cuh>
+#include <raft/linalg/qr.cuh>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/cudart_utils.hpp>
+
+#include <rmm/device_scalar.hpp>
+#include <rmm/device_uvector.hpp>
+
+namespace raft::sparse::solver::detail {
+
+/**
+ * @brief Single pass of CholeskyQR: orthogonalize Q in-place via Cholesky factorization
+ *        of the Gram matrix W = Q^T @ Q.
+ *
+ * @return true on success, false if Cholesky factorization failed (matrix not SPD / rank deficient)
+ */
+template <typename ValueTypeT>
+bool cholesky_qr_pass(raft::resources const& handle,
+                      ValueTypeT* Q,
+                      int m,
+                      int k,
+                      ValueTypeT* W,
+                      ValueTypeT* workspace,
+                      int workspace_size,
+                      int* dev_info)
+{
+  auto stream     = raft::resource::get_cuda_stream(handle);
+  auto cublas_h   = raft::resource::get_cublas_handle(handle);
+  auto cusolver_h = raft::resource::get_cusolver_dn_handle(handle);
+
+  const ValueTypeT one  = 1;
+  const ValueTypeT zero = 0;
+
+  // W = Q^T @ Q  (k x k)
+  // Q is col-major (m x k), so: W = Q^T * Q via gemm(TRANS, NOTRANS, k, k, m)
+  raft::linalg::gemm(handle,
+                     true,   // trans_a
+                     false,  // trans_b
+                     k,
+                     k,
+                     m,
+                     &one,
+                     Q,
+                     m,
+                     Q,
+                     m,
+                     &zero,
+                     W,
+                     k,
+                     stream);
+
+  // L = cholesky(W, LOWER)  — W is overwritten with L in lower triangle
+  RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnpotrf(
+    cusolver_h, CUBLAS_FILL_MODE_LOWER, k, W, k, workspace, workspace_size, dev_info, stream));
+
+  // Check if Cholesky succeeded
+  int h_dev_info = 0;
+  raft::update_host(&h_dev_info, dev_info, 1, stream);
+  raft::resource::sync_stream(handle);
+  if (h_dev_info != 0) { return false; }
+
+  // Q = Q @ L^{-T}
+  // This is equivalent to solving X * L^T = Q for X, i.e. trsm with RIGHT, LOWER, TRANS
+  RAFT_CUBLAS_TRY(raft::linalg::detail::cublastrsm(cublas_h,
+                                                   CUBLAS_SIDE_RIGHT,
+                                                   CUBLAS_FILL_MODE_LOWER,
+                                                   CUBLAS_OP_T,
+                                                   CUBLAS_DIAG_NON_UNIT,
+                                                   m,
+                                                   k,
+                                                   &one,
+                                                   W,
+                                                   k,
+                                                   Q,
+                                                   m,
+                                                   stream));
+
+  return true;
+}
+
+/**
+ * @brief CholeskyQR2 orthogonalization: two passes of CholeskyQR for numerical stability.
+ *
+ * This is the GPU-optimized orthogonalization from Tomás, Quintana-Ortí, Anzt (2024),
+ * "Fast Truncated SVD of Sparse and Dense Matrices on Graphics Processors".
+ * It uses GEMM + Cholesky + TRSM operations which are highly efficient on GPU,
+ * providing ~3x speedup over standard Householder QR.
+ *
+ * If Cholesky factorization fails (input is rank-deficient), falls back to standard QR.
+ *
+ * @param handle raft resources handle
+ * @param Q matrix to orthogonalize of shape (m, k), col-major, modified in-place
+ * @return true if CholeskyQR2 succeeded, false if fell back to QR
+ */
+template <typename ValueTypeT>
+bool cholesky_qr2(raft::resources const& handle,
+                  raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major> Q)
+{
+  int m = Q.extent(0);
+  int k = Q.extent(1);
+
+  auto stream     = raft::resource::get_cuda_stream(handle);
+  auto cusolver_h = raft::resource::get_cusolver_dn_handle(handle);
+
+  // Allocate workspace for Gram matrix and Cholesky
+  rmm::device_uvector<ValueTypeT> W(k * k, stream);
+  rmm::device_scalar<int> dev_info(stream);
+
+  // Query workspace size for potrf
+  int potrf_workspace_size = 0;
+  RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnpotrf_bufferSize(
+    cusolver_h, CUBLAS_FILL_MODE_LOWER, k, W.data(), k, &potrf_workspace_size));
+  rmm::device_uvector<ValueTypeT> potrf_workspace(potrf_workspace_size, stream);
+
+  // First pass
+  if (!cholesky_qr_pass(handle,
+                        Q.data_handle(),
+                        m,
+                        k,
+                        W.data(),
+                        potrf_workspace.data(),
+                        potrf_workspace_size,
+                        dev_info.data())) {
+    // Fallback to standard QR (qrGetQ handles src==dst via internal copy)
+    raft::linalg::qrGetQ(handle, Q.data_handle(), Q.data_handle(), m, k, stream);
+    return false;
+  }
+
+  // Second pass for improved numerical stability
+  if (!cholesky_qr_pass(handle,
+                        Q.data_handle(),
+                        m,
+                        k,
+                        W.data(),
+                        potrf_workspace.data(),
+                        potrf_workspace_size,
+                        dev_info.data())) {
+    // Fallback to standard QR (qrGetQ handles src==dst via internal copy)
+    raft::linalg::qrGetQ(handle, Q.data_handle(), Q.data_handle(), m, k, stream);
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace raft::sparse::solver::detail
diff --git a/cpp/include/raft/sparse/solver/detail/csr_linear_operator.cuh b/cpp/include/raft/sparse/solver/detail/csr_linear_operator.cuh
new file mode 100644
index 0000000000..d29449f5cf
--- /dev/null
+++ b/cpp/include/raft/sparse/solver/detail/csr_linear_operator.cuh
@@ -0,0 +1,85 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/sparse/linalg/spmm.hpp>
+
+namespace raft::sparse::solver::detail {
+
+/**
+ * @brief CSR matrix wrapper providing apply() / apply_transpose() for the sparse SVD solver.
+ *
+ * This is NOT a general-purpose LinearOperator (in the sense of scipy's
+ * `scipy.sparse.linalg.LinearOperator`): it only exposes the two products the
+ * randomized SVD inner loop needs (Y = A @ X and Z = A^T @ X) via cuSPARSE SpMM.
+ * It intentionally lives in `detail/` and is not part of the public API.
+ *
+ * @note The cuSPARSE spmm wrapper requires `int` for indptr/indices types, so this wrapper
+ *       is currently limited to int-indexed CSR matrices.
+ *
+ * @tparam ValueTypeT Data type of matrix values
+ * @tparam NNZTypeT Type for number of non-zeros
+ */
+template <typename ValueTypeT, typename NNZTypeT = int>
+struct csr_linear_operator {
+  /**
+   * @brief Construct from a const CSR matrix view
+   *
+   * @note `m_`/`n_` are cached at construction because
+   *       `raft::device_csr_matrix_view::structure_view()` is not const-qualified
+   *       and cannot be invoked from a const member function.
+   */
+  explicit csr_linear_operator(raft::device_csr_matrix_view<const ValueTypeT, int, int, NNZTypeT> A)
+    : A_(A), m_(A.structure_view().get_n_rows()), n_(A.structure_view().get_n_cols())
+  {
+  }
+
+  int rows() const { return m_; }
+  int cols() const { return n_; }
+
+  /** @brief Access the underlying const CSR matrix view */
+  raft::device_csr_matrix_view<const ValueTypeT, int, int, NNZTypeT> csr_view() const { return A_; }
+
+  /**
+   * @brief Compute Y = A @ X
+   * @param[in] handle raft resources handle
+   * @param[in] X input dense matrix of shape (n, k) col-major
+   * @param[out] Y output dense matrix of shape (m, k) col-major
+   */
+  void apply(raft::resources const& handle,
+             raft::device_matrix_view<const ValueTypeT, uint32_t, raft::col_major> X,
+             raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major> Y) const
+  {
+    ValueTypeT alpha = 1;
+    ValueTypeT beta  = 0;
+    raft::sparse::linalg::spmm(handle, false, false, &alpha, A_, X, &beta, Y);
+  }
+
+  /**
+   * @brief Compute Z = A^T @ X
+   * @param[in] handle raft resources handle
+   * @param[in] X input dense matrix of shape (m, k) col-major
+   * @param[out] Z output dense matrix of shape (n, k) col-major
+   */
+  void apply_transpose(raft::resources const& handle,
+                       raft::device_matrix_view<const ValueTypeT, uint32_t, raft::col_major> X,
+                       raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major> Z) const
+  {
+    ValueTypeT alpha = 1;
+    ValueTypeT beta  = 0;
+    raft::sparse::linalg::spmm(handle, true, false, &alpha, A_, X, &beta, Z);
+  }
+
+ private:
+  raft::device_csr_matrix_view<const ValueTypeT, int, int, NNZTypeT> A_;
+  int m_;
+  int n_;
+};
+
+}  // namespace raft::sparse::solver::detail
diff --git a/cpp/include/raft/sparse/solver/detail/randomized_svds.cuh b/cpp/include/raft/sparse/solver/detail/randomized_svds.cuh
new file mode 100644
index 0000000000..25b202f1eb
--- /dev/null
+++ b/cpp/include/raft/sparse/solver/detail/randomized_svds.cuh
@@ -0,0 +1,256 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/logger.hpp>
+#include <raft/core/nvtx.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/linalg/gemm.cuh>
+#include <raft/linalg/svd.cuh>
+#include <raft/linalg/transpose.cuh>
+#include <raft/random/rng.cuh>
+#include <raft/random/rng_state.hpp>
+#include <raft/sparse/solver/detail/cholesky_qr.cuh>
+#include <raft/sparse/solver/detail/svds_sign_correction.cuh>
+#include <raft/sparse/solver/svds_config.hpp>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/cudart_utils.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+#include <algorithm>
+#include <optional>
+#include <random>
+
+namespace raft::sparse::solver::detail {
+
+/**
+ * @brief Randomized SVD for sparse matrices using block power iteration with CholeskyQR2.
+ *
+ * Implements randomized SVD (Halko et al. 2009) with GPU-optimized CholeskyQR2
+ * orthogonalization (Tomás et al. 2024).
+ *
+ * The operator interface allows implicit operators (e.g. mean-centered sparse matrices)
+ * without materializing the dense matrix.
+ *
+ * @tparam ValueTypeT Data type (float or double)
+ * @tparam OperatorT Linear operator type providing apply() and apply_transpose()
+ *
+ * @param[in] handle raft resources handle
+ * @param[in] config SVD configuration (n_components, n_oversamples, n_power_iters, seed)
+ * @param[in] op linear operator representing the matrix to decompose
+ * @param[out] singular_values output singular values of shape (k,) in descending order
+ * @param[out] U optional output left singular vectors of shape (m, k), col-major
+ * @param[out] Vt optional output right singular vectors of shape (k, n), col-major
+ */
+template <typename ValueTypeT, typename OperatorT>
+void sparse_randomized_svd(
+  raft::resources const& handle,
+  sparse_svd_config<ValueTypeT> const& config,
+  OperatorT const& op,
+  raft::device_vector_view<ValueTypeT, uint32_t> singular_values,
+  std::optional<raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major>> U,
+  std::optional<raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major>> Vt)
+{
+  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
+    "raft::sparse::solver::sparse_randomized_svd(%d, %d, %d)",
+    op.rows(),
+    op.cols(),
+    config.n_components);
+
+  int m = op.rows();
+  int n = op.cols();
+  int k = config.n_components;
+  int p = config.n_oversamples;
+
+  RAFT_EXPECTS(k > 0, "n_components must be positive");
+  RAFT_EXPECTS(k < std::min(m, n), "n_components must be less than min(m, n)");
+  RAFT_EXPECTS(p >= 0, "n_oversamples must be non-negative");
+  RAFT_EXPECTS(config.n_power_iters >= 0, "n_power_iters must be non-negative");
+  RAFT_EXPECTS(singular_values.extent(0) == static_cast<uint32_t>(k),
+               "singular_values must have size n_components");
+  if (U) {
+    RAFT_EXPECTS(
+      U->extent(0) == static_cast<uint32_t>(m) && U->extent(1) == static_cast<uint32_t>(k),
+      "U must have shape (m, n_components)");
+  }
+  if (Vt) {
+    RAFT_EXPECTS(
+      Vt->extent(0) == static_cast<uint32_t>(k) && Vt->extent(1) == static_cast<uint32_t>(n),
+      "Vt must have shape (n_components, n)");
+  }
+
+  auto stream = raft::resource::get_cuda_stream(handle);
+
+  int min_dim = std::min(m, n);
+  if (k + p > min_dim) {
+    RAFT_LOG_WARN(
+      "n_components (%d) + n_oversamples (%d) = %d exceeds min(n_rows, n_cols) = %d. "
+      "Clamping to %d. This may affect approximation quality.",
+      k,
+      p,
+      k + p,
+      min_dim,
+      min_dim);
+  }
+  int block_size = std::min(k + p, min_dim);
+  RAFT_EXPECTS(block_size >= k,
+               "block_size (n_components + n_oversamples) must be >= n_components");
+
+  // Initialize RNG
+  uint64_t seed = config.seed.value_or(std::random_device{}());
+  raft::random::RngState rng_state(seed);
+
+  // Step 1-3: Y = A @ Omega, orthogonalize
+  auto Y = raft::make_device_matrix<ValueTypeT, uint32_t, raft::col_major>(
+    handle, static_cast<uint32_t>(m), static_cast<uint32_t>(block_size));
+  {
+    auto Omega = raft::make_device_matrix<ValueTypeT, uint32_t, raft::col_major>(
+      handle, static_cast<uint32_t>(n), static_cast<uint32_t>(block_size));
+    raft::random::normal(handle,
+                         rng_state,
+                         Omega.data_handle(),
+                         static_cast<std::size_t>(n) * block_size,
+                         ValueTypeT(0),
+                         ValueTypeT(1));
+    op.apply(handle,
+             raft::make_device_matrix_view<const ValueTypeT, uint32_t, raft::col_major>(
+               Omega.data_handle(), n, block_size),
+             Y.view());
+  }  // Omega freed here
+  if (!cholesky_qr2(handle, Y.view())) {
+    RAFT_LOG_WARN("CholeskyQR2 fell back to standard QR during initial orthogonalization");
+  }
+
+  // Step 4: Power iterations
+  auto Z = raft::make_device_matrix<ValueTypeT, uint32_t, raft::col_major>(
+    handle, static_cast<uint32_t>(n), static_cast<uint32_t>(block_size));
+
+  for (int iter = 0; iter < config.n_power_iters; ++iter) {
+    // Z = A^T @ Q  -> (n, block_size)
+    op.apply_transpose(handle,
+                       raft::make_device_matrix_view<const ValueTypeT, uint32_t, raft::col_major>(
+                         Y.data_handle(), m, block_size),
+                       Z.view());
+    if (!cholesky_qr2(handle, Z.view())) {
+      RAFT_LOG_WARN(
+        "CholeskyQR2 fell back to standard QR during power iteration %d (transpose step)", iter);
+    }
+
+    // Y = A @ Z  -> (m, block_size)
+    op.apply(handle,
+             raft::make_device_matrix_view<const ValueTypeT, uint32_t, raft::col_major>(
+               Z.data_handle(), n, block_size),
+             Y.view());
+    if (!cholesky_qr2(handle, Y.view())) {
+      RAFT_LOG_WARN("CholeskyQR2 fell back to standard QR during power iteration %d (forward step)",
+                    iter);
+    }
+  }
+
+  // Q = Y after power iterations (already orthogonal)
+  // Q is (m, block_size)
+
+  // Step 5: Bt = A^T @ Q  -> (n, block_size)
+  op.apply_transpose(handle,
+                     raft::make_device_matrix_view<const ValueTypeT, uint32_t, raft::col_major>(
+                       Y.data_handle(), m, block_size),
+                     Z.view());
+
+  // Step 6-7: SVD of B = Bt^T where Bt = Z (n x block_size, tall matrix)
+  // We compute SVD(Bt) directly to avoid cuSOLVER gesvd issues with wide matrices.
+  // SVD(Bt) = U_bt * S * Vt_bt → SVD(B) has U_b = V_bt and Vt_b = U_bt^T
+  auto S_full = raft::make_device_vector<ValueTypeT, uint32_t>(handle, block_size);
+
+  // Bt = B^T is (n x block_size), we already have Bt = Z from step 5
+  // Z is (n, block_size) = A^T @ Q = B^T. We can reuse Z directly!
+  // SVD(Bt) with Bt being (n x block_size): jobu='S' gives U_bt (n x block_size),
+  // jobvt='A' gives Vt_bt (block_size x block_size). Both are needed when either
+  // U or Vt is requested by the caller; we only allocate the ones we will read.
+  std::optional<raft::device_matrix<ValueTypeT, uint32_t, raft::col_major>> U_bt;
+  std::optional<raft::device_matrix<ValueTypeT, uint32_t, raft::col_major>> Vt_bt;
+  if (Vt) {
+    U_bt.emplace(raft::make_device_matrix<ValueTypeT, uint32_t, raft::col_major>(
+      handle, static_cast<uint32_t>(n), static_cast<uint32_t>(block_size)));
+  }
+  if (U) {
+    Vt_bt.emplace(raft::make_device_matrix<ValueTypeT, uint32_t, raft::col_major>(
+      handle, static_cast<uint32_t>(block_size), static_cast<uint32_t>(block_size)));
+  }
+
+  // Z is consumed by svdQR (modifies input in-place) — this is fine since Z is not used after.
+  // svdQR requires non-null pointers for left/right vectors when gen_*_vec is true; pass
+  // a scratch matrix when the caller didn't request that side.
+  std::optional<raft::device_matrix<ValueTypeT, uint32_t, raft::col_major>> U_bt_scratch;
+  std::optional<raft::device_matrix<ValueTypeT, uint32_t, raft::col_major>> Vt_bt_scratch;
+  ValueTypeT* U_bt_ptr  = U_bt ? U_bt->data_handle() : nullptr;
+  ValueTypeT* Vt_bt_ptr = Vt_bt ? Vt_bt->data_handle() : nullptr;
+  if (U_bt_ptr == nullptr) {
+    U_bt_scratch.emplace(raft::make_device_matrix<ValueTypeT, uint32_t, raft::col_major>(
+      handle, static_cast<uint32_t>(n), static_cast<uint32_t>(block_size)));
+    U_bt_ptr = U_bt_scratch->data_handle();
+  }
+  if (Vt_bt_ptr == nullptr) {
+    Vt_bt_scratch.emplace(raft::make_device_matrix<ValueTypeT, uint32_t, raft::col_major>(
+      handle, static_cast<uint32_t>(block_size), static_cast<uint32_t>(block_size)));
+    Vt_bt_ptr = Vt_bt_scratch->data_handle();
+  }
+
+  raft::linalg::svdQR(handle,
+                      Z.data_handle(),
+                      n,
+                      block_size,
+                      S_full.data_handle(),
+                      U_bt_ptr,
+                      Vt_bt_ptr,
+                      true,  // transpose right vectors: Vt_bt -> V_bt (block_size x block_size)
+                      true,  // generate left vectors
+                      true,  // generate right vectors
+                      stream);
+  // After svdQR with trans_right=true:
+  //   U_bt is (n, block_size) — left singular vectors of Bt
+  //   Vt_bt is now V_bt (block_size x block_size) — right singular vectors of Bt (transposed)
+  //   S_full has block_size singular values
+  //
+  // For B = Bt^T: U_b = V_bt = Vt_bt (after transpose), Vt_b = U_bt^T
+  // So: U_b[:, :k] = Vt_bt[:, :k] and Vt_b[:k, :] = U_bt[:, :k]^T
+
+  // Step 8: U = Q @ U_b[:, :k] = Q @ V_bt[:, :k]
+  // Q is Y (m, block_size), V_bt is (block_size, block_size)
+  // U = Y @ V_bt[:, :k] → (m, block_size) * (block_size, k) → (m, k)
+  if (U) {
+    const ValueTypeT one  = 1;
+    const ValueTypeT zero = 0;
+    raft::linalg::gemm(handle,
+                       Y.data_handle(),
+                       m,
+                       block_size,
+                       Vt_bt->data_handle(),  // This is V_bt after trans_right=true
+                       U->data_handle(),
+                       m,
+                       k,
+                       CUBLAS_OP_N,
+                       CUBLAS_OP_N,
+                       one,
+                       zero,
+                       stream);
+  }
+
+  // Step 9: Truncate S, optionally compute Vt
+  raft::copy(singular_values.data_handle(), S_full.data_handle(), k, stream);
+
+  // Vt[:k, :] = U_bt[:, :k]^T
+  // U_bt is col-major (n, block_size); transpose its first k columns to (k, n) col-major.
+  if (Vt) { raft::linalg::transpose(handle, U_bt->data_handle(), Vt->data_handle(), n, k, stream); }
+
+  // Step 10: Sign correction (no-op when neither U nor Vt is requested)
+  svd_sign_correction<ValueTypeT>(handle, U, Vt);
+}
+
+}  // namespace raft::sparse::solver::detail
diff --git a/cpp/include/raft/sparse/solver/detail/svds_sign_correction.cuh b/cpp/include/raft/sparse/solver/detail/svds_sign_correction.cuh
new file mode 100644
index 0000000000..e35889d91d
--- /dev/null
+++ b/cpp/include/raft/sparse/solver/detail/svds_sign_correction.cuh
@@ -0,0 +1,155 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/util/cuda_utils.cuh>
+
+#include <cstdint>
+#include <optional>
+
+namespace raft::sparse::solver::detail {
+
+/**
+ * @brief CUDA kernel for SVD sign correction.
+ *
+ * For each component i, finds the element with largest absolute value in either
+ * column i of U (if U is not null) or row i of Vt. If that element is negative,
+ * flips the sign of U[:, i] (when present) and Vt[i, :] (when present).
+ * This ensures deterministic output regardless of the SVD algorithm's internal sign choices.
+ *
+ * One thread block per component. Uses shared memory reduction to find the argmax.
+ *
+ * @param U left singular vectors, col-major (m x k), U[:, i] is at U + i*m. May be nullptr.
+ * @param Vt right singular vectors, col-major (k x n), Vt[i, :] is row i. May be nullptr.
+ * @param m number of rows of U (ignored if U is nullptr)
+ * @param n number of columns of Vt (ignored if Vt is nullptr)
+ * @param k number of components
+ *
+ * @note At least one of U / Vt must be non-null. When U is non-null it is used to
+ *       derive the signs; otherwise Vt is used.
+ */
+template <typename ValueTypeT>
+RAFT_KERNEL svd_sign_correction_kernel(ValueTypeT* U, ValueTypeT* Vt, int m, int n, int k)
+{
+  int comp = blockIdx.x;  // one block per component
+  if (comp >= k) return;
+
+  extern __shared__ char smem[];
+  auto* s_max_val = reinterpret_cast<ValueTypeT*>(smem);
+  auto* s_max_idx = reinterpret_cast<int*>(smem + blockDim.x * sizeof(ValueTypeT));
+
+  ValueTypeT local_max_val = 0;
+  int local_max_idx        = 0;
+
+  // Source for sign decision: prefer U column; fall back to Vt row
+  if (U != nullptr) {
+    ValueTypeT* u_col = U + static_cast<int64_t>(comp) * m;
+    for (int row = threadIdx.x; row < m; row += blockDim.x) {
+      ValueTypeT abs_val = raft::abs(u_col[row]);
+      if (abs_val > local_max_val) {
+        local_max_val = abs_val;
+        local_max_idx = row;
+      }
+    }
+  } else {
+    // Vt is col-major (k x n); row `comp` is at Vt[comp + j*k] for j=0..n-1
+    for (int col = threadIdx.x; col < n; col += blockDim.x) {
+      ValueTypeT abs_val = raft::abs(Vt[comp + static_cast<int64_t>(col) * k]);
+      if (abs_val > local_max_val) {
+        local_max_val = abs_val;
+        local_max_idx = col;
+      }
+    }
+  }
+
+  s_max_val[threadIdx.x] = local_max_val;
+  s_max_idx[threadIdx.x] = local_max_idx;
+  __syncthreads();
+
+  // Tree reduction
+  for (int stride = blockDim.x / 2; stride > 0; stride >>= 1) {
+    if (threadIdx.x < stride) {
+      if (s_max_val[threadIdx.x + stride] > s_max_val[threadIdx.x]) {
+        s_max_val[threadIdx.x] = s_max_val[threadIdx.x + stride];
+        s_max_idx[threadIdx.x] = s_max_idx[threadIdx.x + stride];
+      }
+    }
+    __syncthreads();
+  }
+
+  // Thread 0 has the result
+  ValueTypeT signed_val;
+  if (U != nullptr) {
+    signed_val = U[static_cast<int64_t>(comp) * m + s_max_idx[0]];
+  } else {
+    signed_val = Vt[comp + static_cast<int64_t>(s_max_idx[0]) * k];
+  }
+  bool flip = (s_max_val[0] > 0) && (signed_val < 0);
+  __syncthreads();
+
+  if (!flip) return;
+
+  // Flip U column if present
+  if (U != nullptr) {
+    ValueTypeT* u_col = U + static_cast<int64_t>(comp) * m;
+    for (int row = threadIdx.x; row < m; row += blockDim.x) {
+      u_col[row] = -u_col[row];
+    }
+  }
+
+  // Flip Vt row if present: row `comp` is at Vt[comp + j*k] for j=0..n-1
+  if (Vt != nullptr) {
+    for (int col = threadIdx.x; col < n; col += blockDim.x) {
+      Vt[comp + static_cast<int64_t>(col) * k] = -Vt[comp + static_cast<int64_t>(col) * k];
+    }
+  }
+}
+
+/**
+ * @brief Apply deterministic sign correction to SVD output.
+ *
+ * For each component, ensures the element with largest absolute value in U
+ * (or Vt, if U is not present) is positive. Whichever of U / Vt is present is
+ * modified in-place to maintain A ≈ U @ diag(S) @ Vt.
+ *
+ * @param handle raft resources handle
+ * @param U optional left singular vectors of shape (m, k), col-major, modified in-place
+ * @param Vt optional right singular vectors of shape (k, n), col-major, modified in-place
+ *
+ * @note If both U and Vt are absent, this is a no-op. When only Vt is present, the
+ *       sign convention differs from the both-present case (signs are derived from
+ *       `argmax(|Vt[j, :]|)` rather than `argmax(|U[:, j]|)`), but is still
+ *       deterministic for fixed inputs.
+ */
+template <typename ValueTypeT>
+void svd_sign_correction(
+  raft::resources const& handle,
+  std::optional<raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major>> U,
+  std::optional<raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major>> Vt)
+{
+  if (!U && !Vt) return;
+
+  int k = U ? static_cast<int>(U->extent(1)) : static_cast<int>(Vt->extent(0));
+  int m = U ? static_cast<int>(U->extent(0)) : 0;
+  int n = Vt ? static_cast<int>(Vt->extent(1)) : 0;
+
+  auto stream = raft::resource::get_cuda_stream(handle);
+
+  // threads_per_block must be a power of 2 for the tree reduction in the kernel
+  constexpr int threads_per_block = 256;
+  int smem_size                   = threads_per_block * (sizeof(ValueTypeT) + sizeof(int));
+
+  ValueTypeT* U_ptr  = U ? U->data_handle() : nullptr;
+  ValueTypeT* Vt_ptr = Vt ? Vt->data_handle() : nullptr;
+
+  svd_sign_correction_kernel<<<k, threads_per_block, smem_size, stream>>>(U_ptr, Vt_ptr, m, n, k);
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+}
+
+}  // namespace raft::sparse::solver::detail
diff --git a/cpp/include/raft/sparse/solver/randomized_svds.cuh b/cpp/include/raft/sparse/solver/randomized_svds.cuh
new file mode 100644
index 0000000000..10acf924c3
--- /dev/null
+++ b/cpp/include/raft/sparse/solver/randomized_svds.cuh
@@ -0,0 +1,117 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/sparse/solver/detail/csr_linear_operator.cuh>
+#include <raft/sparse/solver/detail/randomized_svds.cuh>
+
+#include <optional>
+#include <type_traits>
+
+namespace raft::sparse::solver {
+
+namespace detail {
+// Helper alias used to put the optional U/Vt parameters in a non-deduced context
+// so template argument deduction picks up ValueTypeT from earlier parameters and
+// implicit conversion from `device_matrix_view` to `std::optional<...>` works.
+template <typename T>
+using nondeduced_optional_matrix_view_t = std::optional<std::type_identity_t<T>>;
+}  // namespace detail
+
+/**
+ * @defgroup sparse_randomized_svd Sparse Randomized SVD
+ * @{
+ */
+
+/**
+ * @brief Compute truncated SVD using randomized algorithm with a generic linear operator.
+ *
+ * Implements randomized SVD (Halko et al. 2009) with CholeskyQR2 orthogonalization
+ * (Tomás et al. 2024) for efficient GPU execution on sparse matrices.
+ *
+ * The operator interface allows implicit linear operators (e.g. mean-centered sparse
+ * matrices for PCA) without materializing the dense matrix. `OperatorT` must expose
+ * the minimal interface used by the randomized SVD inner loop:
+ *   - `int rows() const` / `int cols() const`
+ *   - `void apply(handle, X, Y) const`           computes `Y = A @ X`
+ *   - `void apply_transpose(handle, X, Z) const` computes `Z = A^T @ X`
+ *
+ * where `X`, `Y`, `Z` are `raft::device_matrix_view<..., uint32_t, raft::col_major>`.
+ * This is intentionally a narrow, SVD-specific operator: only the two matrix products
+ * above are required, and no other operations (addition, scaling, inverse, eigensolves,
+ * etc.) are assumed or supported — it is not a general-purpose linear operator like
+ * `scipy.sparse.linalg.LinearOperator`.
+ *
+ * @tparam ValueTypeT Data type (float or double)
+ * @tparam OperatorT Linear operator type satisfying the interface above
+ *
+ * @param[in] handle raft resources handle
+ * @param[in] config SVD configuration parameters
+ * @param[in] op linear operator representing the matrix to decompose
+ * @param[out] singular_values output singular values of shape (n_components,) in descending order
+ * @param[out] U optional output left singular vectors of shape (m, n_components), col-major.
+ *             Pass `std::nullopt` to skip computing U.
+ * @param[out] Vt optional output right singular vectors of shape (n_components, n), col-major.
+ *             Pass `std::nullopt` to skip computing Vt.
+ *
+ * @note References:
+ *   [1] Halko, Martinsson, Tropp (2009) "Finding structure with randomness"
+ *       https://arxiv.org/abs/0909.4061
+ *   [2] Tomás, Quintana-Ortí, Anzt (2024) "Fast Truncated SVD of Sparse and Dense Matrices
+ *       on Graphics Processors" https://arxiv.org/abs/2403.06218
+ */
+template <typename ValueTypeT, typename OperatorT>
+void sparse_randomized_svd(
+  raft::resources const& handle,
+  sparse_svd_config<ValueTypeT> const& config,
+  OperatorT const& op,
+  raft::device_vector_view<ValueTypeT, uint32_t> singular_values,
+  detail::nondeduced_optional_matrix_view_t<
+    raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major>> U = std::nullopt,
+  detail::nondeduced_optional_matrix_view_t<
+    raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major>> Vt = std::nullopt)
+{
+  detail::sparse_randomized_svd(handle, config, op, singular_values, U, Vt);
+}
+
+/**
+ * @brief Compute truncated SVD of a sparse CSR matrix using randomized algorithm.
+ *
+ * Convenience overload that accepts a CSR matrix view directly.
+ *
+ * @tparam ValueTypeT Data type (float or double)
+ * @tparam NNZTypeT Type for number of non-zeros
+ *
+ * @param[in] handle raft resources handle
+ * @param[in] config SVD configuration parameters
+ * @param[in] A input sparse CSR matrix of shape (m, n)
+ * @param[out] singular_values output singular values of shape (n_components,) in descending order
+ * @param[out] U optional output left singular vectors of shape (m, n_components), col-major.
+ *             Pass `std::nullopt` to skip computing U.
+ * @param[out] Vt optional output right singular vectors of shape (n_components, n), col-major.
+ *             Pass `std::nullopt` to skip computing Vt.
+ */
+template <typename ValueTypeT, typename NNZTypeT>
+void sparse_randomized_svd(
+  raft::resources const& handle,
+  sparse_svd_config<ValueTypeT> const& config,
+  raft::device_csr_matrix_view<const ValueTypeT, int, int, NNZTypeT> A,
+  raft::device_vector_view<ValueTypeT, uint32_t> singular_values,
+  detail::nondeduced_optional_matrix_view_t<
+    raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major>> U = std::nullopt,
+  detail::nondeduced_optional_matrix_view_t<
+    raft::device_matrix_view<ValueTypeT, uint32_t, raft::col_major>> Vt = std::nullopt)
+{
+  detail::csr_linear_operator<ValueTypeT, NNZTypeT> op(A);
+  detail::sparse_randomized_svd(handle, config, op, singular_values, U, Vt);
+}
+
+/** @} */
+
+}  // namespace raft::sparse::solver
diff --git a/cpp/include/raft/sparse/solver/svds_config.hpp b/cpp/include/raft/sparse/solver/svds_config.hpp
new file mode 100644
index 0000000000..32077b97cd
--- /dev/null
+++ b/cpp/include/raft/sparse/solver/svds_config.hpp
@@ -0,0 +1,35 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+
+namespace raft::sparse::solver {
+
+/**
+ * @brief Configuration parameters for the sparse randomized SVD solver
+ *
+ * @tparam ValueTypeT Data type for values (float or double)
+ */
+template <typename ValueTypeT>
+struct sparse_svd_config {
+  /** @brief Number of singular values/vectors to compute. Must be set by the user. */
+  int n_components = 0;
+
+  /** @brief Number of extra random vectors for better approximation.
+   *  Total subspace dimension is n_components + n_oversamples. */
+  int n_oversamples = 10;
+
+  /** @brief Number of power iteration passes. More iterations improve accuracy
+   *  for matrices with slowly decaying singular values. */
+  int n_power_iters = 2;
+
+  /** @brief Random seed for reproducibility */
+  std::optional<uint64_t> seed = std::nullopt;
+};
+
+}  // namespace raft::sparse::solver
diff --git a/cpp/include/raft_runtime/solver/randomized_svds.hpp b/cpp/include/raft_runtime/solver/randomized_svds.hpp
new file mode 100644
index 0000000000..afb23035e7
--- /dev/null
+++ b/cpp/include/raft_runtime/solver/randomized_svds.hpp
@@ -0,0 +1,43 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/sparse/solver/svds_config.hpp>
+
+#include <cstdint>
+#include <optional>
+
+namespace raft::runtime::solver {
+
+/**
+ * @defgroup sparse_randomized_svd_runtime Sparse Randomized SVD Runtime API
+ * @{
+ */
+
+#define FUNC_DECL(ValueType)                                                         \
+  void sparse_randomized_svd(                                                        \
+    const raft::resources& handle,                                                   \
+    const raft::sparse::solver::sparse_svd_config<ValueType>& config,                \
+    raft::device_vector_view<int, uint32_t, raft::row_major> indptr,                 \
+    raft::device_vector_view<int, uint32_t, raft::row_major> indices,                \
+    raft::device_vector_view<ValueType, uint32_t, raft::row_major> data,             \
+    int n_rows,                                                                      \
+    int n_cols,                                                                      \
+    int nnz,                                                                         \
+    raft::device_vector_view<ValueType, uint32_t> singular_values,                   \
+    std::optional<raft::device_matrix_view<ValueType, uint32_t, raft::col_major>> U, \
+    std::optional<raft::device_matrix_view<ValueType, uint32_t, raft::col_major>> Vt)
+
+FUNC_DECL(float);
+FUNC_DECL(double);
+
+#undef FUNC_DECL
+
+/** @} */
+
+}  // namespace raft::runtime::solver
diff --git a/cpp/src/raft_runtime/solver/randomized_svds.cuh b/cpp/src/raft_runtime/solver/randomized_svds.cuh
new file mode 100644
index 0000000000..61e60fe5ec
--- /dev/null
+++ b/cpp/src/raft_runtime/solver/randomized_svds.cuh
@@ -0,0 +1,30 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <raft/sparse/solver/randomized_svds.cuh>
+
+#include <optional>
+
+#define FUNC_DEF(ValueType)                                                              \
+  void sparse_randomized_svd(                                                            \
+    const raft::resources& handle,                                                       \
+    const raft::sparse::solver::sparse_svd_config<ValueType>& config,                    \
+    raft::device_vector_view<int, uint32_t, raft::row_major> indptr,                     \
+    raft::device_vector_view<int, uint32_t, raft::row_major> indices,                    \
+    raft::device_vector_view<ValueType, uint32_t, raft::row_major> data,                 \
+    int n_rows,                                                                          \
+    int n_cols,                                                                          \
+    int nnz,                                                                             \
+    raft::device_vector_view<ValueType, uint32_t> singular_values,                       \
+    std::optional<raft::device_matrix_view<ValueType, uint32_t, raft::col_major>> U,     \
+    std::optional<raft::device_matrix_view<ValueType, uint32_t, raft::col_major>> Vt)    \
+  {                                                                                      \
+    auto csr_structure = raft::make_device_compressed_structure_view<int, int, int>(     \
+      indptr.data_handle(), indices.data_handle(), n_rows, n_cols, nnz);                 \
+    auto csr_matrix = raft::make_device_csr_matrix_view<const ValueType, int, int, int>( \
+      data.data_handle(), csr_structure);                                                \
+    raft::sparse::solver::sparse_randomized_svd(                                         \
+      handle, config, csr_matrix, singular_values, U, Vt);                               \
+  }
diff --git a/cpp/src/raft_runtime/solver/randomized_svds_double.cu b/cpp/src/raft_runtime/solver/randomized_svds_double.cu
new file mode 100644
index 0000000000..a6b3984e96
--- /dev/null
+++ b/cpp/src/raft_runtime/solver/randomized_svds_double.cu
@@ -0,0 +1,12 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "randomized_svds.cuh"
+
+namespace raft::runtime::solver {
+
+FUNC_DEF(double);
+
+}  // namespace raft::runtime::solver
diff --git a/cpp/src/raft_runtime/solver/randomized_svds_float.cu b/cpp/src/raft_runtime/solver/randomized_svds_float.cu
new file mode 100644
index 0000000000..8364c27f0e
--- /dev/null
+++ b/cpp/src/raft_runtime/solver/randomized_svds_float.cu
@@ -0,0 +1,12 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "randomized_svds.cuh"
+
+namespace raft::runtime::solver {
+
+FUNC_DEF(float);
+
+}  // namespace raft::runtime::solver
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 47ac6fc286..41b6e7ee3c 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -242,7 +242,7 @@ if(BUILD_TESTS)
 
   ConfigureTest(
     NAME SOLVERS_TEST PATH linalg/eigen_solvers.cu lap/lap.cu sparse/mst.cu
-    sparse/solver/lanczos.cu LIB EXPLICIT_INSTANTIATE_ONLY
+    sparse/solver/lanczos.cu sparse/solver/randomized_svds.cu LIB EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
diff --git a/cpp/tests/sparse/solver/randomized_svds.cu b/cpp/tests/sparse/solver/randomized_svds.cu
new file mode 100644
index 0000000000..978f836eac
--- /dev/null
+++ b/cpp/tests/sparse/solver/randomized_svds.cu
@@ -0,0 +1,679 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "../../test_utils.cuh"
+
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/linalg/detail/cublas_wrappers.hpp>
+#include <raft/linalg/gemm.cuh>
+#include <raft/linalg/svd.cuh>
+#include <raft/sparse/solver/detail/csr_linear_operator.cuh>
+#include <raft/sparse/solver/randomized_svds.cuh>
+#include <raft/util/cudart_utils.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <vector>
+
+namespace raft::sparse::solver {
+
+// ============================================================================
+// Golden data: 20x15 sparse matrix (nnz=120), generated with cupy seed=42
+// Expected singular values computed via cupy.linalg.svd
+// ============================================================================
+
+template <typename IndexType>
+std::vector<IndexType> golden_indptr()
+{
+  return {0, 4, 12, 17, 21, 27, 35, 41, 48, 55, 61, 67, 74, 79, 84, 90, 97, 102, 110, 114, 120};
+}
+
+template <typename IndexType>
+std::vector<IndexType> golden_indices()
+{
+  return {0,  4,  11, 14, 0,  2,  4,  5,  6,  12, 13, 14, 1,  6,  7,  8,  10, 0,  1,  6,
+          11, 0,  3,  4,  5,  13, 14, 0,  1,  2,  3,  8,  10, 12, 14, 1,  4,  8,  9,  11,
+          12, 0,  3,  4,  7,  10, 11, 13, 0,  2,  4,  8,  9,  11, 13, 0,  4,  8,  9,  10,
+          11, 3,  4,  5,  12, 13, 14, 0,  1,  2,  5,  7,  10, 13, 0,  2,  7,  13, 14, 3,
+          8,  10, 12, 14, 0,  3,  4,  5,  9,  11, 0,  3,  7,  9,  11, 12, 14, 0,  4,  9,
+          10, 12, 0,  1,  5,  6,  8,  9,  10, 13, 4,  6,  9,  11, 2,  4,  6,  7,  8,  14};
+}
+
+template <typename ValueType>
+std::vector<ValueType> golden_values()
+{
+  return {0.1887315, 0.1368716, 0.1613618, 0.4839568, 0.4151455, 0.9012728, 0.7806592, 0.6713938,
+          0.4281978, 0.5782541, 0.4538292, 0.0404218, 0.1264595, 0.3271811, 0.0725956, 0.0031886,
+          0.6529871, 0.7025284, 0.1509823, 0.2051058, 0.6743041, 0.7965932, 0.5618279, 0.3343601,
+          0.8330396, 0.6796557, 0.5684967, 0.2353606, 0.8159586, 0.3035201, 0.7145222, 0.7080131,
+          0.4448774, 0.8879446, 0.2896976, 0.5502138, 0.3758983, 0.6988429, 0.6113330, 0.3722922,
+          0.4468638, 0.8808504, 0.0408660, 0.6595733, 0.5429990, 0.7350115, 0.3736090, 0.9308268,
+          0.8861471, 0.3089533, 0.3642297, 0.4673888, 0.8658971, 0.8499948, 0.6036414, 0.0184879,
+          0.6970348, 0.1920955, 0.0650081, 0.7614104, 0.1568318, 0.3703384, 0.5869733, 0.1282817,
+          0.2451350, 0.9711478, 0.5114062, 0.5030190, 0.5759066, 0.8097631, 0.3580396, 0.4049093,
+          0.1618905, 0.2125666, 0.1163929, 0.4818056, 0.7425613, 0.7851025, 0.7386931, 0.7113866,
+          0.5566008, 0.5112362, 0.5538779, 0.0846058, 0.3478690, 0.2713018, 0.5058042, 0.4784714,
+          0.6136972, 0.5514663, 0.6401569, 0.1399612, 0.3880760, 0.6163719, 0.5653080, 0.4741685,
+          0.3447469, 0.8718122, 0.8952977, 0.7043414, 0.7425023, 0.4490941, 0.3434426, 0.4157445,
+          0.2318376, 0.6219735, 0.0641180, 0.8132253, 0.7734252, 0.0820893, 0.1556361, 0.1998085,
+          0.2101485, 0.0405817, 0.2176879, 0.9811354, 0.2048657, 0.4163201, 0.3513670, 0.2649395};
+}
+
+template <typename ValueType>
+std::vector<ValueType> golden_singular_values()
+{
+  return {3.819689, 2.097294, 1.926829};
+}
+
+// ============================================================================
+// Test: randomized SVD on golden data
+// ============================================================================
+
+template <typename IndexType, typename ValueType>
+class RandomizedSvdsTest : public ::testing::Test {
+ public:
+  RandomizedSvdsTest()
+    : stream(resource::get_cuda_stream(handle)),
+      m(20),
+      n(15),
+      k(3),
+      nnz(120),
+      d_indptr(raft::make_device_vector<IndexType, uint32_t>(handle, m + 1)),
+      d_indices(raft::make_device_vector<IndexType, uint32_t>(handle, nnz)),
+      d_values(raft::make_device_vector<ValueType, uint32_t>(handle, nnz)),
+      expected_S(raft::make_device_vector<ValueType, uint32_t>(handle, k))
+  {
+  }
+
+ protected:
+  void SetUp() override
+  {
+    auto h_indptr  = golden_indptr<IndexType>();
+    auto h_indices = golden_indices<IndexType>();
+    auto h_values  = golden_values<ValueType>();
+    auto h_S       = golden_singular_values<ValueType>();
+
+    raft::update_device(d_indptr.data_handle(), h_indptr.data(), m + 1, stream);
+    raft::update_device(d_indices.data_handle(), h_indices.data(), nnz, stream);
+    raft::update_device(d_values.data_handle(), h_values.data(), nnz, stream);
+    raft::update_device(expected_S.data_handle(), h_S.data(), k, stream);
+  }
+
+  void Run()
+  {
+    auto csr_structure =
+      raft::make_device_compressed_structure_view<IndexType, IndexType, IndexType>(
+        d_indptr.data_handle(), d_indices.data_handle(), m, n, nnz);
+    auto csr_matrix =
+      raft::make_device_csr_matrix_view<const ValueType, IndexType, IndexType, IndexType>(
+        d_values.data_handle(), csr_structure);
+
+    sparse_svd_config<ValueType> config;
+    config.n_components  = k;
+    config.n_oversamples = 10;
+    config.n_power_iters = 4;
+    config.seed          = 42;
+
+    auto S  = raft::make_device_vector<ValueType, uint32_t>(handle, k);
+    auto U  = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, k);
+    auto Vt = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, n);
+
+    sparse_randomized_svd(handle, config, csr_matrix, S.view(), U.view(), Vt.view());
+
+    // Singular values must match golden ground truth
+    ASSERT_TRUE(raft::devArrMatch<ValueType>(
+      S.data_handle(), expected_S.data_handle(), k, raft::CompareApprox<ValueType>(0.05), stream));
+
+    // U must be orthogonal: U^T U ~ I_k
+    auto UtU      = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, k);
+    ValueType one = 1, zero = 0;
+    raft::linalg::gemm(handle,
+                       U.data_handle(),
+                       m,
+                       k,
+                       U.data_handle(),
+                       UtU.data_handle(),
+                       k,
+                       k,
+                       CUBLAS_OP_T,
+                       CUBLAS_OP_N,
+                       one,
+                       zero,
+                       stream);
+
+    std::vector<ValueType> I_k(k * k, 0);
+    for (int i = 0; i < k; i++)
+      I_k[i * k + i] = 1;
+    auto I_k_dev = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, k);
+    raft::update_device(I_k_dev.data_handle(), I_k.data(), k * k, stream);
+
+    ASSERT_TRUE(raft::devArrMatch<ValueType>(UtU.data_handle(),
+                                             I_k_dev.data_handle(),
+                                             k * k,
+                                             raft::CompareApprox<ValueType>(1e-4),
+                                             stream));
+
+    // Vt must have orthonormal rows: Vt Vt^T ~ I_k
+    auto VVt = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, k);
+    raft::linalg::gemm(handle,
+                       Vt.data_handle(),
+                       k,
+                       n,
+                       Vt.data_handle(),
+                       VVt.data_handle(),
+                       k,
+                       k,
+                       CUBLAS_OP_N,
+                       CUBLAS_OP_T,
+                       one,
+                       zero,
+                       stream);
+
+    ASSERT_TRUE(raft::devArrMatch<ValueType>(VVt.data_handle(),
+                                             I_k_dev.data_handle(),
+                                             k * k,
+                                             raft::CompareApprox<ValueType>(1e-4),
+                                             stream));
+  }
+
+  raft::resources handle;
+  cudaStream_t stream;
+  int m, n, k, nnz;
+  raft::device_vector<IndexType, uint32_t> d_indptr;
+  raft::device_vector<IndexType, uint32_t> d_indices;
+  raft::device_vector<ValueType, uint32_t> d_values;
+  raft::device_vector<ValueType, uint32_t> expected_S;
+};
+
+using RandomizedSvdsTestF = RandomizedSvdsTest<int, float>;
+TEST_F(RandomizedSvdsTestF, GoldenData) { Run(); }
+
+using RandomizedSvdsTestD = RandomizedSvdsTest<int, double>;
+TEST_F(RandomizedSvdsTestD, GoldenData) { Run(); }
+
+// ============================================================================
+// Test: optional U / Vt outputs (skip flag)
+//
+// Verifies that passing std::nullopt for U or Vt (or both) skips that output:
+//   - singular values match the all-outputs reference exactly
+//   - when U is requested, it equals the reference U (sign-fixing uses U)
+//   - when Vt is requested without U, signs are derived from Vt — values are
+//     deterministic but may differ from the U+Vt reference; we check the
+//     row-norms (each Vt row should remain unit-norm)
+// ============================================================================
+
+template <typename IndexType, typename ValueType>
+class OptionalUVtTest : public ::testing::Test {
+ public:
+  OptionalUVtTest()
+    : stream(resource::get_cuda_stream(handle)),
+      m(20),
+      n(15),
+      k(3),
+      nnz(120),
+      d_indptr(raft::make_device_vector<IndexType, uint32_t>(handle, m + 1)),
+      d_indices(raft::make_device_vector<IndexType, uint32_t>(handle, nnz)),
+      d_values(raft::make_device_vector<ValueType, uint32_t>(handle, nnz))
+  {
+    auto h_indptr  = golden_indptr<IndexType>();
+    auto h_indices = golden_indices<IndexType>();
+    auto h_values  = golden_values<ValueType>();
+    raft::update_device(d_indptr.data_handle(), h_indptr.data(), m + 1, stream);
+    raft::update_device(d_indices.data_handle(), h_indices.data(), nnz, stream);
+    raft::update_device(d_values.data_handle(), h_values.data(), nnz, stream);
+  }
+
+  template <typename UViewT, typename VtViewT>
+  void run_one(UViewT U_opt, VtViewT Vt_opt, ValueType* S_out)
+  {
+    auto csr_structure =
+      raft::make_device_compressed_structure_view<IndexType, IndexType, IndexType>(
+        d_indptr.data_handle(), d_indices.data_handle(), m, n, nnz);
+    auto csr_matrix =
+      raft::make_device_csr_matrix_view<const ValueType, IndexType, IndexType, IndexType>(
+        d_values.data_handle(), csr_structure);
+
+    sparse_svd_config<ValueType> config;
+    config.n_components  = k;
+    config.n_oversamples = 10;
+    config.n_power_iters = 4;
+    config.seed          = 42;
+
+    auto S = raft::make_device_vector<ValueType, uint32_t>(handle, k);
+    sparse_randomized_svd(handle, config, csr_matrix, S.view(), U_opt, Vt_opt);
+    raft::update_host(S_out, S.data_handle(), k, stream);
+    resource::sync_stream(handle);
+  }
+
+  void Run()
+  {
+    using mat_view = raft::device_matrix_view<ValueType, uint32_t, raft::col_major>;
+
+    // Reference: ask for everything
+    auto U_ref  = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, k);
+    auto Vt_ref = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, n);
+    std::vector<ValueType> S_ref(k);
+    run_one<std::optional<mat_view>, std::optional<mat_view>>(
+      U_ref.view(), Vt_ref.view(), S_ref.data());
+
+    // Mode: S only — both nullopt
+    {
+      std::vector<ValueType> S_only(k);
+      run_one<std::optional<mat_view>, std::optional<mat_view>>(
+        std::nullopt, std::nullopt, S_only.data());
+      for (int i = 0; i < k; i++) {
+        ASSERT_NEAR(S_only[i], S_ref[i], 1e-4) << "S mismatch at i=" << i << " (S-only)";
+      }
+    }
+
+    // Mode: U + S
+    {
+      auto U_only = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, k);
+      std::vector<ValueType> S_u(k);
+      run_one<std::optional<mat_view>, std::optional<mat_view>>(
+        U_only.view(), std::nullopt, S_u.data());
+      for (int i = 0; i < k; i++) {
+        ASSERT_NEAR(S_u[i], S_ref[i], 1e-4) << "S mismatch at i=" << i << " (U-only)";
+      }
+      // U is sign-corrected from U → must match reference U exactly
+      ASSERT_TRUE(raft::devArrMatch<ValueType>(U_only.data_handle(),
+                                               U_ref.data_handle(),
+                                               m * k,
+                                               raft::CompareApprox<ValueType>(1e-5),
+                                               stream))
+        << "U should match reference (same sign convention)";
+    }
+
+    // Mode: Vt + S
+    {
+      auto Vt_only = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, n);
+      std::vector<ValueType> S_v(k);
+      run_one<std::optional<mat_view>, std::optional<mat_view>>(
+        std::nullopt, Vt_only.view(), S_v.data());
+      for (int i = 0; i < k; i++) {
+        ASSERT_NEAR(S_v[i], S_ref[i], 1e-4) << "S mismatch at i=" << i << " (Vt-only)";
+      }
+      // In Vt-only mode, signs come from Vt instead of U; rows may flip relative to
+      // reference Vt. Verify row-orthonormality is preserved.
+      auto VVt      = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, k);
+      ValueType one = 1, zero = 0;
+      raft::linalg::gemm(handle,
+                         Vt_only.data_handle(),
+                         k,
+                         n,
+                         Vt_only.data_handle(),
+                         VVt.data_handle(),
+                         k,
+                         k,
+                         CUBLAS_OP_N,
+                         CUBLAS_OP_T,
+                         one,
+                         zero,
+                         stream);
+      std::vector<ValueType> I_k(k * k, 0);
+      for (int i = 0; i < k; i++)
+        I_k[i * k + i] = 1;
+      auto I_k_dev = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, k);
+      raft::update_device(I_k_dev.data_handle(), I_k.data(), k * k, stream);
+      ASSERT_TRUE(raft::devArrMatch<ValueType>(VVt.data_handle(),
+                                               I_k_dev.data_handle(),
+                                               k * k,
+                                               raft::CompareApprox<ValueType>(1e-4),
+                                               stream))
+        << "Vt rows must remain orthonormal in Vt-only mode";
+    }
+
+    // Determinism: same mode + same seed → identical output across runs
+    {
+      auto Vt_a = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, n);
+      auto Vt_b = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, n);
+      std::vector<ValueType> S_a(k), S_b(k);
+      run_one<std::optional<mat_view>, std::optional<mat_view>>(
+        std::nullopt, Vt_a.view(), S_a.data());
+      run_one<std::optional<mat_view>, std::optional<mat_view>>(
+        std::nullopt, Vt_b.view(), S_b.data());
+      ASSERT_TRUE(raft::devArrMatch<ValueType>(Vt_a.data_handle(),
+                                               Vt_b.data_handle(),
+                                               k * n,
+                                               raft::CompareApprox<ValueType>(1e-5),
+                                               stream))
+        << "Vt-only mode must be deterministic";
+    }
+  }
+
+  raft::resources handle;
+  cudaStream_t stream;
+  int m, n, k, nnz;
+  raft::device_vector<IndexType, uint32_t> d_indptr;
+  raft::device_vector<IndexType, uint32_t> d_indices;
+  raft::device_vector<ValueType, uint32_t> d_values;
+};
+
+using OptionalUVtTestF = OptionalUVtTest<int, float>;
+TEST_F(OptionalUVtTestF, AllModes) { Run(); }
+
+using OptionalUVtTestD = OptionalUVtTest<int, double>;
+TEST_F(OptionalUVtTestD, AllModes) { Run(); }
+
+// ============================================================================
+// Test: reconstruction error ||A - U diag(S) Vt||_F
+// ============================================================================
+
+struct ReconstructionErrorTest : public ::testing::Test {
+  raft::resources handle;
+  cudaStream_t stream;
+  ReconstructionErrorTest() : stream(resource::get_cuda_stream(handle)) {}
+
+  void Run()
+  {
+    using ValueType = float;
+    int m = 20, n = 15, k = 3, nnz = 120;
+
+    auto h_indptr  = golden_indptr<int>();
+    auto h_indices = golden_indices<int>();
+    auto h_values  = golden_values<ValueType>();
+
+    auto d_indptr  = raft::make_device_vector<int, uint32_t>(handle, m + 1);
+    auto d_indices = raft::make_device_vector<int, uint32_t>(handle, nnz);
+    auto d_values  = raft::make_device_vector<ValueType, uint32_t>(handle, nnz);
+    raft::update_device(d_indptr.data_handle(), h_indptr.data(), m + 1, stream);
+    raft::update_device(d_indices.data_handle(), h_indices.data(), nnz, stream);
+    raft::update_device(d_values.data_handle(), h_values.data(), nnz, stream);
+
+    auto csr_structure = raft::make_device_compressed_structure_view<int, int, int>(
+      d_indptr.data_handle(), d_indices.data_handle(), m, n, nnz);
+    auto csr_matrix = raft::make_device_csr_matrix_view<const ValueType, int, int, int>(
+      d_values.data_handle(), csr_structure);
+
+    sparse_svd_config<ValueType> config;
+    config.n_components  = k;
+    config.n_oversamples = 10;
+    config.n_power_iters = 4;
+    config.seed          = 42;
+
+    auto S  = raft::make_device_vector<ValueType, uint32_t>(handle, k);
+    auto U  = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, k);
+    auto Vt = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, n);
+    sparse_randomized_svd(handle, config, csr_matrix, S.view(), U.view(), Vt.view());
+
+    // Reconstruct: recon = U @ diag(S) @ Vt
+    // First: US = U * S (scale columns of U by S)
+    auto US = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, k);
+    raft::copy(US.data_handle(), U.data_handle(), m * k, stream);
+    std::vector<ValueType> h_S(k);
+    raft::update_host(h_S.data(), S.data_handle(), k, stream);
+    resource::sync_stream(handle);
+    for (int j = 0; j < k; j++) {
+      auto cublas_h = raft::resource::get_cublas_handle(handle);
+      RAFT_CUBLAS_TRY(raft::linalg::detail::cublasscal(
+        cublas_h, m, &h_S[j], US.data_handle() + j * m, 1, stream));
+    }
+
+    // recon = US @ Vt
+    auto recon    = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, n);
+    ValueType one = 1, zero = 0;
+    raft::linalg::gemm(handle,
+                       US.data_handle(),
+                       m,
+                       k,
+                       Vt.data_handle(),
+                       recon.data_handle(),
+                       m,
+                       n,
+                       CUBLAS_OP_N,
+                       CUBLAS_OP_N,
+                       one,
+                       zero,
+                       stream);
+
+    // Build dense A on host
+    std::vector<ValueType> h_dense(m * n, 0);
+    int vi = 0;
+    for (int i = 0; i < m; i++)
+      for (int jj = h_indptr[i]; jj < h_indptr[i + 1]; jj++)
+        h_dense[h_indices[jj] * m + i] = h_values[vi++];
+
+    auto dense_A = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, n);
+    raft::update_device(dense_A.data_handle(), h_dense.data(), m * n, stream);
+
+    // error = A - recon
+    // Compute ||A - recon||_F / ||A||_F on host
+    std::vector<ValueType> h_recon(m * n);
+    raft::update_host(h_recon.data(), recon.data_handle(), m * n, stream);
+    resource::sync_stream(handle);
+
+    double err_sq = 0, norm_sq = 0;
+    for (int i = 0; i < m * n; i++) {
+      double diff = h_dense[i] - h_recon[i];
+      err_sq += diff * diff;
+      norm_sq += (double)h_dense[i] * h_dense[i];
+    }
+    double rel_err = std::sqrt(err_sq / norm_sq);
+
+    // With k=3 out of min(20,15)=15 components, relative error should be < 1.0
+    ASSERT_LT(rel_err, 1.0) << "Reconstruction relative error too large: " << rel_err;
+  }
+};
+
+TEST_F(ReconstructionErrorTest, RelativeError) { Run(); }
+
+// ============================================================================
+// Test: mean-centered linear operator
+// Ground truth: dense SVD of explicitly centered matrix
+// ============================================================================
+
+template <typename ValueType, typename NNZType>
+struct mean_centered_operator {
+  detail::csr_linear_operator<ValueType, NNZType> base_op_;
+  ValueType* col_means_;
+  int m_, n_;
+
+  mean_centered_operator(raft::device_csr_matrix_view<const ValueType, int, int, NNZType> A,
+                         ValueType* col_means,
+                         int m,
+                         int n)
+    : base_op_(A), col_means_(col_means), m_(m), n_(n)
+  {
+  }
+
+  int rows() const { return m_; }
+  int cols() const { return n_; }
+  auto csr_view() const { return base_op_.csr_view(); }
+
+  // Y = (A - 1*mean^T) @ X = A@X - ones * (mean^T @ X)
+  void apply(raft::resources const& handle,
+             raft::device_matrix_view<const ValueType, uint32_t, raft::col_major> X,
+             raft::device_matrix_view<ValueType, uint32_t, raft::col_major> Y) const
+  {
+    auto stream = raft::resource::get_cuda_stream(handle);
+    auto cublas = raft::resource::get_cublas_handle(handle);
+    int bk      = X.extent(1);
+    base_op_.apply(handle, X, Y);
+    rmm::device_uvector<ValueType> corr(bk, stream);
+    rmm::device_uvector<ValueType> ones(m_, stream);
+    std::vector<ValueType> h(m_, 1);
+    raft::update_device(ones.data(), h.data(), m_, stream);
+    ValueType a1 = 1, a0 = 0, am1 = -1;
+    RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemv(cublas,
+                                                     CUBLAS_OP_T,
+                                                     n_,
+                                                     bk,
+                                                     &a1,
+                                                     X.data_handle(),
+                                                     n_,
+                                                     col_means_,
+                                                     1,
+                                                     &a0,
+                                                     corr.data(),
+                                                     1,
+                                                     stream));
+    RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas,
+                                                     CUBLAS_OP_N,
+                                                     CUBLAS_OP_N,
+                                                     m_,
+                                                     bk,
+                                                     1,
+                                                     &am1,
+                                                     ones.data(),
+                                                     m_,
+                                                     corr.data(),
+                                                     1,
+                                                     &a1,
+                                                     Y.data_handle(),
+                                                     m_,
+                                                     stream));
+  }
+
+  // Z = (A - 1*mean^T)^T @ X = A^T@X - mean * (1^T @ X)
+  void apply_transpose(raft::resources const& handle,
+                       raft::device_matrix_view<const ValueType, uint32_t, raft::col_major> X,
+                       raft::device_matrix_view<ValueType, uint32_t, raft::col_major> Z) const
+  {
+    auto stream = raft::resource::get_cuda_stream(handle);
+    auto cublas = raft::resource::get_cublas_handle(handle);
+    int bk      = X.extent(1);
+    base_op_.apply_transpose(handle, X, Z);
+    rmm::device_uvector<ValueType> sums(bk, stream);
+    rmm::device_uvector<ValueType> ones(m_, stream);
+    std::vector<ValueType> h(m_, 1);
+    raft::update_device(ones.data(), h.data(), m_, stream);
+    ValueType a1 = 1, a0 = 0, am1 = -1;
+    RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemv(cublas,
+                                                     CUBLAS_OP_T,
+                                                     m_,
+                                                     bk,
+                                                     &a1,
+                                                     X.data_handle(),
+                                                     m_,
+                                                     ones.data(),
+                                                     1,
+                                                     &a0,
+                                                     sums.data(),
+                                                     1,
+                                                     stream));
+    RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas,
+                                                     CUBLAS_OP_N,
+                                                     CUBLAS_OP_N,
+                                                     n_,
+                                                     bk,
+                                                     1,
+                                                     &am1,
+                                                     col_means_,
+                                                     n_,
+                                                     sums.data(),
+                                                     1,
+                                                     &a1,
+                                                     Z.data_handle(),
+                                                     n_,
+                                                     stream));
+  }
+};
+
+class MeanCenteredOperatorTest : public ::testing::Test {
+ public:
+  MeanCenteredOperatorTest() : stream(resource::get_cuda_stream(handle)) {}
+
+ protected:
+  void Run()
+  {
+    using ValueType = float;
+    using IndexType = int;
+
+    int m = 20, n = 15, k = 3, nnz = 120;
+
+    auto h_indptr  = golden_indptr<IndexType>();
+    auto h_indices = golden_indices<IndexType>();
+    auto h_values  = golden_values<ValueType>();
+
+    auto d_indptr  = raft::make_device_vector<IndexType, uint32_t>(handle, m + 1);
+    auto d_indices = raft::make_device_vector<IndexType, uint32_t>(handle, nnz);
+    auto d_values  = raft::make_device_vector<ValueType, uint32_t>(handle, nnz);
+    raft::update_device(d_indptr.data_handle(), h_indptr.data(), m + 1, stream);
+    raft::update_device(d_indices.data_handle(), h_indices.data(), nnz, stream);
+    raft::update_device(d_values.data_handle(), h_values.data(), nnz, stream);
+
+    auto csr_structure =
+      raft::make_device_compressed_structure_view<IndexType, IndexType, IndexType>(
+        d_indptr.data_handle(), d_indices.data_handle(), m, n, nnz);
+    auto csr_matrix =
+      raft::make_device_csr_matrix_view<const ValueType, IndexType, IndexType, IndexType>(
+        d_values.data_handle(), csr_structure);
+
+    // Reconstruct dense matrix on host to compute column means
+    std::vector<ValueType> dense(m * n, 0);
+    int vi = 0;
+    for (int i = 0; i < m; i++) {
+      for (int jj = h_indptr[i]; jj < h_indptr[i + 1]; jj++) {
+        dense[h_indices[jj] * m + i] = h_values[vi++];
+      }
+    }
+
+    std::vector<ValueType> h_means(n, 0);
+    for (int j = 0; j < n; j++) {
+      for (int i = 0; i < m; i++)
+        h_means[j] += dense[j * m + i];
+      h_means[j] /= m;
+    }
+    auto d_means = raft::make_device_vector<ValueType, uint32_t>(handle, n);
+    raft::update_device(d_means.data_handle(), h_means.data(), n, stream);
+
+    // Ground truth: dense SVD of (A - 1*mean^T)
+    std::vector<ValueType> centered(m * n);
+    for (int j = 0; j < n; j++)
+      for (int i = 0; i < m; i++)
+        centered[j * m + i] = dense[j * m + i] - h_means[j];
+
+    auto centered_dev =
+      raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, n);
+    raft::update_device(centered_dev.data_handle(), centered.data(), m * n, stream);
+    auto ref_S  = raft::make_device_vector<ValueType, uint32_t>(handle, n);
+    auto ref_U  = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, n);
+    auto ref_Vt = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, n, n);
+    raft::linalg::svdQR(handle,
+                        centered_dev.data_handle(),
+                        m,
+                        n,
+                        ref_S.data_handle(),
+                        ref_U.data_handle(),
+                        ref_Vt.data_handle(),
+                        false,
+                        true,
+                        true,
+                        stream);
+
+    // Operator-based SVD
+    mean_centered_operator<ValueType, IndexType> op(csr_matrix, d_means.data_handle(), m, n);
+
+    sparse_svd_config<ValueType> config;
+    config.n_components  = k;
+    config.n_oversamples = 10;
+    config.n_power_iters = 4;
+    config.seed          = 42;
+
+    auto S  = raft::make_device_vector<ValueType, uint32_t>(handle, k);
+    auto U  = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, m, k);
+    auto Vt = raft::make_device_matrix<ValueType, uint32_t, raft::col_major>(handle, k, n);
+    sparse_randomized_svd(handle, config, op, S.view(), U.view(), Vt.view());
+
+    // Singular values must match dense centered ground truth
+    ASSERT_TRUE(raft::devArrMatch<ValueType>(
+      S.data_handle(), ref_S.data_handle(), k, raft::CompareApprox<ValueType>(0.1), stream));
+  }
+
+  raft::resources handle;
+  cudaStream_t stream;
+};
+
+TEST_F(MeanCenteredOperatorTest, OperatorVsDenseGroundTruth) { Run(); }
+
+}  // namespace raft::sparse::solver
diff --git a/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt b/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt
index 516b8d61f1..969b9b68b7 100644
--- a/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt
+++ b/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt
@@ -1,12 +1,12 @@
 # =============================================================================
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 # =============================================================================
 
 # Set the list of Cython files to build
-set(cython_sources lanczos.pyx)
+set(cython_sources lanczos.pyx svds.pyx)
 
 # TODO: should finally be replaced with 'compiled' library to be more generic, when that is
 # available
diff --git a/python/pylibraft/pylibraft/sparse/linalg/__init__.py b/python/pylibraft/pylibraft/sparse/linalg/__init__.py
index edf9b39e9c..e632a723d3 100644
--- a/python/pylibraft/pylibraft/sparse/linalg/__init__.py
+++ b/python/pylibraft/pylibraft/sparse/linalg/__init__.py
@@ -1,7 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
 
 from .lanczos import eigsh
+from .svds import svds
 
-__all__ = ["eigsh"]
+__all__ = ["eigsh", "svds"]
diff --git a/python/pylibraft/pylibraft/sparse/linalg/svds.pyx b/python/pylibraft/pylibraft/sparse/linalg/svds.pyx
new file mode 100644
index 0000000000..b9a2778190
--- /dev/null
+++ b/python/pylibraft/pylibraft/sparse/linalg/svds.pyx
@@ -0,0 +1,284 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+#
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+import warnings
+
+import cupy as cp
+import cupyx.scipy.sparse as cupy_sparse
+import numpy as np
+
+from cython.operator cimport dereference as deref
+from libc.stdint cimport int64_t, uint32_t, uint64_t, uintptr_t
+
+from pylibraft.common import Handle, cai_wrapper, device_ndarray
+from pylibraft.common.handle import auto_sync_handle
+
+from pylibraft.common.cpp.mdspan cimport (
+    col_major,
+    device_matrix_view,
+    device_vector_view,
+    make_device_matrix_view,
+    make_device_vector_view,
+    row_major,
+)
+from pylibraft.common.cpp.optional cimport optional
+from pylibraft.common.handle cimport device_resources
+
+
+cdef extern from "raft/sparse/solver/svds_config.hpp" \
+        namespace "raft::sparse::solver" nogil:
+
+    cdef cppclass sparse_svd_config[ValueTypeT]:
+        int n_components
+        int n_oversamples
+        int n_power_iters
+        optional[uint64_t] seed
+
+
+cdef extern from "raft_runtime/solver/randomized_svds.hpp" \
+        namespace "raft::runtime::solver" nogil:
+
+    cdef void sparse_randomized_svd_f \
+            "raft::runtime::solver::sparse_randomized_svd"(
+        const device_resources &handle,
+        const sparse_svd_config[float] &config,
+        device_vector_view[int, uint32_t] indptr,
+        device_vector_view[int, uint32_t] indices,
+        device_vector_view[float, uint32_t] data,
+        int n_rows, int n_cols, int nnz,
+        device_vector_view[float, uint32_t] singular_values,
+        optional[device_matrix_view[float, uint32_t, col_major]] U,
+        optional[device_matrix_view[float, uint32_t, col_major]] Vt) except +
+
+    cdef void sparse_randomized_svd_d \
+            "raft::runtime::solver::sparse_randomized_svd"(
+        const device_resources &handle,
+        const sparse_svd_config[double] &config,
+        device_vector_view[int, uint32_t] indptr,
+        device_vector_view[int, uint32_t] indices,
+        device_vector_view[double, uint32_t] data,
+        int n_rows, int n_cols, int nnz,
+        device_vector_view[double, uint32_t] singular_values,
+        optional[device_matrix_view[double, uint32_t, col_major]] U,
+        optional[device_matrix_view[double, uint32_t, col_major]] Vt) except +
+
+
+@auto_sync_handle
+def svds(A, k=6, n_oversamples=10, n_power_iters=2,
+         seed=None, return_singular_vectors=True, handle=None):
+    """
+    Compute the largest ``k`` singular values and corresponding singular
+    vectors of a sparse matrix using randomized SVD.
+
+    Computes the truncated SVD: ``A ~ U @ diag(S) @ Vt``.
+
+    Args:
+        A (cupyx.scipy.sparse.csr_matrix): Sparse CSR matrix of shape
+            ``(m, n)``. Must be of type
+            :class:`cupyx.scipy.sparse.csr_matrix`.
+        k (int): Number of singular values and vectors to compute. Must be
+            ``1 <= k < min(m, n)``. Default 6.
+        n_oversamples (int): Number of extra random vectors for better
+            approximation. Total subspace dimension is ``k + n_oversamples``.
+            Default 10.
+        n_power_iters (int): Number of power iteration passes. More
+            iterations improve accuracy for matrices with slowly decaying
+            singular values. Default 2.
+        seed (int or None): Random seed for reproducibility. If ``None``,
+            a non-deterministic seed is used.
+        return_singular_vectors (bool or {{"u", "vh"}}): Controls which
+            singular vectors are returned (matches
+            :func:`scipy.sparse.linalg.svds`).
+
+            - ``True`` (default): return ``(U, S, Vt)``.
+            - ``False``: skip both vector matrices, return ``(None, S, None)``.
+            - ``"u"``: skip ``Vt``, return ``(U, S, None)``.
+            - ``"vh"``: skip ``U``, return ``(None, S, Vt)``.
+
+            Skipping a side avoids the corresponding output buffer and
+            final matrix multiplication.
+        handle: RAFT resource handle. If ``None``, a default is created.
+
+    Returns:
+        tuple:
+            ``(U, S, Vt)`` where ``U`` is left singular vectors ``(m, k)``,
+            ``S`` is singular values ``(k,)`` in descending order, and
+            ``Vt`` is right singular vectors ``(k, n)``. ``U`` and/or ``Vt``
+            may be ``None`` depending on ``return_singular_vectors``.
+
+    .. seealso::
+        :func:`scipy.sparse.linalg.svds`
+
+    .. note::
+        This function uses randomized SVD (Halko et al. 2009) with
+        CholeskyQR2 orthogonalization (Tomas et al. 2024) for efficient
+        GPU execution.
+
+    """
+
+    if A is None:
+        raise ValueError("'A' cannot be None!")
+
+    if not cupy_sparse.issparse(A):
+        raise TypeError(
+            "Expected a cupyx.scipy.sparse matrix, got %s" % type(A).__name__
+        )
+
+    if not isinstance(A, cupy_sparse.csr_matrix):
+        raise TypeError(
+            "Expected a cupyx.scipy.sparse.csr_matrix, got %s. "
+            "Use A.tocsr() to convert." % type(A).__name__
+        )
+
+    if not (
+        return_singular_vectors is True
+        or return_singular_vectors is False
+        or return_singular_vectors == "u"
+        or return_singular_vectors == "vh"
+    ):
+        raise ValueError(
+            "return_singular_vectors must be True, False, 'u', or 'vh', "
+            "got %r" % (return_singular_vectors,)
+        )
+    want_U = (return_singular_vectors is True) or (return_singular_vectors == "u")
+    want_Vt = (return_singular_vectors is True) or (return_singular_vectors == "vh")
+
+    m, n = A.shape
+
+    if k < 1 or k >= min(m, n):
+        raise ValueError(
+            f"k must satisfy 1 <= k < min(m, n), got k={k}, m={m}, n={n}"
+        )
+
+    # Extract CSR arrays and ensure int32 indices. raft's runtime layer takes
+    # `int` (int32) for nnz / indptr / indices, so any overflow on conversion
+    # cannot be recovered downstream — error out before the cast.
+    indptr = A.indptr
+    indices = A.indices
+    data = A.data
+
+    INT32_MAX = (1 << 31) - 1
+
+    IndexType = indptr.dtype
+    if IndexType == np.int64:
+        if A.nnz > INT32_MAX:
+            raise OverflowError(
+                f"nnz={A.nnz} exceeds int32 max ({INT32_MAX}); "
+                "raft sparse SVD requires nnz to fit in int32."
+            )
+        if n > INT32_MAX:
+            raise OverflowError(
+                f"n_cols={n} exceeds int32 max ({INT32_MAX}); "
+                "raft sparse SVD requires column indices to fit in int32."
+            )
+        warnings.warn(
+            "Input matrix has int64 indices which will be converted to "
+            "int32. The conversion is safe for this matrix (nnz and "
+            "column indices fit in int32).",
+            UserWarning,
+            stacklevel=2,
+        )
+        indptr = indptr.astype(np.int32)
+        indices = indices.astype(np.int32)
+    elif IndexType != np.int32:
+        raise TypeError("dtype IndexType=%s not supported, "
+                        "expected int32 or int64" % IndexType)
+
+    indptr = cai_wrapper(indptr)
+    indices = cai_wrapper(indices)
+    data = cai_wrapper(data)
+
+    ValueType = data.dtype
+    nnz = A.nnz
+
+    indptr_ptr = <uintptr_t>indptr.data
+    indices_ptr = <uintptr_t>indices.data
+    data_ptr = <uintptr_t>data.data
+
+    cdef optional[uint64_t] seed_opt
+    if seed is not None:
+        seed_opt = <uint64_t>seed
+
+    # Allocate outputs
+    S_out = device_ndarray.empty((k,), dtype=ValueType, order='F')
+    S_cai = cai_wrapper(S_out)
+    S_ptr = <uintptr_t>S_cai.data
+
+    U_out = None
+    Vt_out = None
+    cdef uintptr_t U_ptr = 0
+    cdef uintptr_t Vt_ptr = 0
+    if want_U:
+        U_out = device_ndarray.empty((m, k), dtype=ValueType, order='F')
+        U_ptr = <uintptr_t>cai_wrapper(U_out).data
+    if want_Vt:
+        Vt_out = device_ndarray.empty((k, n), dtype=ValueType, order='F')
+        Vt_ptr = <uintptr_t>cai_wrapper(Vt_out).data
+
+    handle = handle if handle is not None else Handle()
+    cdef device_resources *h = <device_resources*><size_t>handle.getHandle()
+
+    cdef sparse_svd_config[float] cfg_float
+    cdef sparse_svd_config[double] cfg_double
+    cdef optional[device_matrix_view[float, uint32_t, col_major]] U_opt_f
+    cdef optional[device_matrix_view[float, uint32_t, col_major]] Vt_opt_f
+    cdef optional[device_matrix_view[double, uint32_t, col_major]] U_opt_d
+    cdef optional[device_matrix_view[double, uint32_t, col_major]] Vt_opt_d
+
+    if ValueType == np.float32:
+        cfg_float.n_components = k
+        cfg_float.n_oversamples = n_oversamples
+        cfg_float.n_power_iters = n_power_iters
+        cfg_float.seed = seed_opt
+        if want_U:
+            U_opt_f = make_device_matrix_view[float, uint32_t, col_major](
+                <float *>U_ptr, <uint32_t>m, <uint32_t>k)
+        if want_Vt:
+            Vt_opt_f = make_device_matrix_view[float, uint32_t, col_major](
+                <float *>Vt_ptr, <uint32_t>k, <uint32_t>n)
+        sparse_randomized_svd_f(
+            deref(h),
+            cfg_float,
+            make_device_vector_view(<int *>indptr_ptr, <uint32_t>(m + 1)),
+            make_device_vector_view(<int *>indices_ptr, <uint32_t>nnz),
+            make_device_vector_view(<float *>data_ptr, <uint32_t>nnz),
+            m, n, nnz,
+            make_device_vector_view(<float *>S_ptr, <uint32_t>k),
+            U_opt_f,
+            Vt_opt_f,
+        )
+    elif ValueType == np.float64:
+        cfg_double.n_components = k
+        cfg_double.n_oversamples = n_oversamples
+        cfg_double.n_power_iters = n_power_iters
+        cfg_double.seed = seed_opt
+        if want_U:
+            U_opt_d = make_device_matrix_view[double, uint32_t, col_major](
+                <double *>U_ptr, <uint32_t>m, <uint32_t>k)
+        if want_Vt:
+            Vt_opt_d = make_device_matrix_view[double, uint32_t, col_major](
+                <double *>Vt_ptr, <uint32_t>k, <uint32_t>n)
+        sparse_randomized_svd_d(
+            deref(h),
+            cfg_double,
+            make_device_vector_view(<int *>indptr_ptr, <uint32_t>(m + 1)),
+            make_device_vector_view(<int *>indices_ptr, <uint32_t>nnz),
+            make_device_vector_view(<double *>data_ptr, <uint32_t>nnz),
+            m, n, nnz,
+            make_device_vector_view(<double *>S_ptr, <uint32_t>k),
+            U_opt_d,
+            Vt_opt_d,
+        )
+    else:
+        raise TypeError("dtype ValueType=%s not supported, "
+                        "expected float32 or float64" % ValueType)
+
+    U_ret = cp.asarray(U_out) if U_out is not None else None
+    Vt_ret = cp.asarray(Vt_out) if Vt_out is not None else None
+    return (U_ret, cp.asarray(S_out), Vt_ret)
diff --git a/python/pylibraft/pylibraft/tests/test_sparse.py b/python/pylibraft/pylibraft/tests/test_sparse.py
index 61cc4d6e62..b6b66721a4 100644
--- a/python/pylibraft/pylibraft/tests/test_sparse.py
+++ b/python/pylibraft/pylibraft/tests/test_sparse.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -9,7 +9,7 @@
 import scipy
 from cupyx.scipy import sparse
 
-from pylibraft.sparse.linalg import eigsh
+from pylibraft.sparse.linalg import eigsh, svds
 
 
 def shaped_random(
@@ -160,3 +160,194 @@ def test_reproducibility(self, dtype, seed):
         # Results should be identical
         assert cupy.allclose(w1, w2), "Eigenvalues differ with same seed"
         assert cupy.allclose(v1, v2), "Eigenvectors differ with same seed"
+
+
+class TestSvds:
+    """Tests for sparse randomized SVD."""
+
+    @pytest.mark.parametrize("dtype", [numpy.float32, numpy.float64])
+    @pytest.mark.parametrize("k", [3, 10])
+    def test_singular_values_positive_descending(self, dtype, k):
+        """Singular values must be positive and in descending order."""
+        m, n = 100, 60
+        cupy.random.seed(42)
+        A = sparse.random(m, n, density=0.3, format="csr", dtype=dtype)
+        U, S, Vt = svds(A, k=k, seed=42)
+        S_host = cupy.asnumpy(S)
+        assert all(S_host > 0), "Singular values must be positive"
+        assert all(
+            S_host[i] >= S_host[i + 1] for i in range(len(S_host) - 1)
+        ), "Singular values must be descending"
+
+    @pytest.mark.parametrize("dtype", [numpy.float32, numpy.float64])
+    @pytest.mark.parametrize("k", [3, 10])
+    def test_orthogonality(self, dtype, k):
+        """U columns and Vt rows must be orthonormal."""
+        m, n = 100, 60
+        cupy.random.seed(42)
+        A = sparse.random(m, n, density=0.3, format="csr", dtype=dtype)
+        U, S, Vt = svds(A, k=k, seed=42)
+
+        tol = 1e-4 if dtype == numpy.float32 else 1e-8
+        I_k = cupy.eye(k, dtype=dtype)
+
+        UtU = U.T @ U
+        assert cupy.allclose(UtU, I_k, atol=tol), "U is not orthogonal"
+
+        VtVtT = Vt @ Vt.T
+        assert cupy.allclose(VtVtT, I_k, atol=tol), "Vt rows not orthonormal"
+
+    @pytest.mark.parametrize("dtype", [numpy.float32, numpy.float64])
+    def test_reconstruction_error(self, dtype):
+        """||A - U @ diag(S) @ Vt|| must be bounded by the (k+1)-th singular value."""
+        m, n = 80, 50
+        k = 5
+        cupy.random.seed(42)
+        A = sparse.random(m, n, density=0.4, format="csr", dtype=dtype)
+        U, S, Vt = svds(A, k=k, n_power_iters=4, seed=42)
+
+        A_dense = A.toarray()
+        recon = (U * S[None, :]) @ Vt
+        err = cupy.linalg.norm(A_dense - recon)
+        norm_A = cupy.linalg.norm(A_dense)
+
+        # Relative error should be reasonable (not reconstructing full matrix)
+        assert err / norm_A < 1.0, (
+            f"Reconstruction error too large: {float(err / norm_A):.4f}"
+        )
+
+    def test_shapes(self):
+        """Output shapes must match (m,k), (k,), (k,n)."""
+        m, n, k = 100, 60, 5
+        A = sparse.random(m, n, density=0.3, format="csr", dtype=numpy.float32)
+        U, S, Vt = svds(A, k=k, seed=42)
+        assert U.shape == (m, k)
+        assert S.shape == (k,)
+        assert Vt.shape == (k, n)
+
+    def test_int64_indices(self):
+        """int64 indices should be auto-converted and work, with a warning."""
+        m, n, k = 50, 30, 3
+        cupy.random.seed(42)
+        A = sparse.random(m, n, density=0.3, format="csr", dtype=numpy.float32)
+
+        # cupyx CSR's constructor normalizes index dtype back to int32, so
+        # bypass it via field reassignment to actually hold int64 indices.
+        A.indptr = A.indptr.astype(cupy.int64)
+        A.indices = A.indices.astype(cupy.int64)
+        assert A.indptr.dtype == numpy.int64
+
+        with pytest.warns(UserWarning, match="int64"):
+            U, S, Vt = svds(A, k=k, seed=42)
+        assert S.shape == (k,)
+        assert all(cupy.asnumpy(S) > 0)
+
+    def test_invalid_k(self):
+        """K out of range must raise ValueError."""
+        A = sparse.random(20, 15, density=0.3, format="csr")
+        with pytest.raises(ValueError):
+            svds(A, k=0)
+        with pytest.raises(ValueError):
+            svds(A, k=15)
+
+    def test_invalid_input_matrix(self):
+        """Non-CSR / non-sparse / None inputs must be rejected with a clear error."""
+        # None
+        with pytest.raises(ValueError, match="'A' cannot be None"):
+            svds(None, k=3)
+
+        # Dense array (not sparse at all)
+        dense = cupy.random.rand(20, 15).astype(numpy.float32)
+        with pytest.raises(TypeError, match="cupyx.scipy.sparse"):
+            svds(dense, k=3)
+
+        # Sparse but wrong layout (COO / CSC)
+        A_coo = sparse.random(
+            20, 15, density=0.3, format="coo", dtype=numpy.float32
+        )
+        with pytest.raises(TypeError, match="csr_matrix"):
+            svds(A_coo, k=3)
+
+        A_csc = sparse.random(
+            20, 15, density=0.3, format="csc", dtype=numpy.float32
+        )
+        with pytest.raises(TypeError, match="csr_matrix"):
+            svds(A_csc, k=3)
+
+    @pytest.mark.parametrize("dtype", [numpy.float32, numpy.float64])
+    def test_reproducibility(self, dtype):
+        """Same seed must give same results."""
+        m, n, k = 80, 50, 5
+        cupy.random.seed(42)
+        A = sparse.random(m, n, density=0.3, format="csr", dtype=dtype)
+
+        _, S1, _ = svds(A, k=k, seed=123)
+        _, S2, _ = svds(A, k=k, seed=123)
+
+        tol = 1e-5 if dtype == numpy.float32 else 1e-10
+        assert cupy.allclose(S1, S2, atol=tol), (
+            "Not reproducible with same seed"
+        )
+
+    @pytest.mark.parametrize("dtype", [numpy.float32, numpy.float64])
+    def test_return_singular_vectors_modes(self, dtype):
+        """All four scipy-style return_singular_vectors modes."""
+        m, n, k = 60, 40, 4
+        cupy.random.seed(42)
+        A = sparse.random(m, n, density=0.3, format="csr", dtype=dtype)
+
+        # Reference: full output
+        U_ref, S_ref, Vt_ref = svds(A, k=k, seed=42)
+        assert U_ref.shape == (m, k)
+        assert Vt_ref.shape == (k, n)
+
+        # False: singular values only
+        U_f, S_f, Vt_f = svds(A, k=k, seed=42, return_singular_vectors=False)
+        assert U_f is None
+        assert Vt_f is None
+        assert cupy.allclose(S_f, S_ref)
+
+        # "u": U + S
+        U_u, S_u, Vt_u = svds(A, k=k, seed=42, return_singular_vectors="u")
+        assert U_u.shape == (m, k)
+        assert Vt_u is None
+        assert cupy.allclose(S_u, S_ref)
+        # U-based sign correction matches the full-output reference exactly
+        tol = 1e-4 if dtype == numpy.float32 else 1e-10
+        assert cupy.allclose(U_u, U_ref, atol=tol)
+
+        # "vh": Vt + S
+        U_v, S_v, Vt_v = svds(A, k=k, seed=42, return_singular_vectors="vh")
+        assert U_v is None
+        assert Vt_v.shape == (k, n)
+        assert cupy.allclose(S_v, S_ref)
+        # Vt rows should remain orthonormal even though signs may differ
+        I_k = cupy.eye(k, dtype=dtype)
+        assert cupy.allclose(Vt_v @ Vt_v.T, I_k, atol=tol)
+
+    def test_return_singular_vectors_invalid(self):
+        """Invalid return_singular_vectors must raise ValueError."""
+        A = sparse.random(20, 15, density=0.3, format="csr")
+        with pytest.raises(ValueError):
+            svds(A, k=3, return_singular_vectors="bogus")
+        with pytest.raises(ValueError):
+            svds(A, k=3, return_singular_vectors=1)
+
+    @pytest.mark.parametrize("dtype", [numpy.float32, numpy.float64])
+    def test_return_singular_vectors_deterministic(self, dtype):
+        """Each mode must be deterministic across runs with the same seed."""
+        m, n, k = 50, 30, 3
+        cupy.random.seed(42)
+        A = sparse.random(m, n, density=0.3, format="csr", dtype=dtype)
+
+        # cuSOLVER gesvd inside the algorithm is not bit-deterministic across
+        # runs even with a fixed RNG seed, so allow a small numerical tolerance.
+        atol = 1e-4 if dtype == numpy.float32 else 1e-10
+        for mode in (False, "u", "vh"):
+            res_a = svds(A, k=k, seed=7, return_singular_vectors=mode)
+            res_b = svds(A, k=k, seed=7, return_singular_vectors=mode)
+            assert cupy.allclose(res_a[1], res_b[1], atol=atol)
+            if mode in (True, "u"):
+                assert cupy.allclose(res_a[0], res_b[0], atol=atol)
+            if mode in (True, "vh"):
+                assert cupy.allclose(res_a[2], res_b[2], atol=atol)