diff --git a/mlx/backend/cuda/allocator.cpp b/mlx/backend/cuda/allocator.cpp index 718ae33e9c..244218d4db 100644 --- a/mlx/backend/cuda/allocator.cpp +++ b/mlx/backend/cuda/allocator.cpp @@ -318,7 +318,7 @@ void CudaAllocator::move_to_unified_memory( buf.device = -1; } -// This must be called with mutex_ aquired +// This must be called with mutex_ acquired void CudaAllocator::free_cuda_buffer(CudaBuffer* buf) { if (scalar_pool_.in_pool(buf)) { scalar_pool_.free(buf); diff --git a/mlx/backend/cuda/eval.cpp b/mlx/backend/cuda/eval.cpp index ef9ee20cfa..c5bfe2fa85 100644 --- a/mlx/backend/cuda/eval.cpp +++ b/mlx/backend/cuda/eval.cpp @@ -13,7 +13,7 @@ namespace mlx::core::gpu { void init() { - // Force initalization of CUDA, so CUDA runtime get destroyed last. + // Force initialization of CUDA, so CUDA runtime get destroyed last. cudaFree(nullptr); // Make sure CUDA event pool get destroyed after device and stream. mlx::core::cu::CudaEvent::init_pool(); diff --git a/mlx/backend/metal/kernels/fft.h b/mlx/backend/metal/kernels/fft.h index e478a85b6c..3cce29c574 100644 --- a/mlx/backend/metal/kernels/fft.h +++ b/mlx/backend/metal/kernels/fft.h @@ -229,7 +229,7 @@ template uint3 grid [[threads_per_grid]]) { // Use Rader's algorithm to compute fast FFTs // when a prime factor `p` of `n` is greater than 13 but - // has `p - 1` Stockham decomposable into to prime factors <= 13. + // has `p - 1` Stockham decomposable into prime factors <= 13. // // E.g. n = 102 // = 2 * 3 * 17