From c483c158100252f9cf928b0bd3821e32c87f03a6 Mon Sep 17 00:00:00 2001
From: Patrick Mours <pmours@nvidia.com>
Date: Mon, 10 Feb 2025 14:55:28 +0100
Subject: [PATCH] Cycles: Add Blackwell to Cycles CUDA binaries architectures

Enables building of a Cubin for GPUs based on Blackwell architecture
if CUDA toolkit version 12.8 or higher is installed.
Only added sm_120 to the default set, since it is the one relevant for
consumer GPUs (RTX 5090 etc.) that are generally used with Blender.

Pull Request: https://projects.blender.org/blender/blender/pulls/134170
---
 CMakeLists.txt                            | 2 +-
 build_files/config/pipeline_config.yaml   | 2 +-
 intern/cycles/kernel/CMakeLists.txt       | 8 ++++++++
 intern/cycles/kernel/device/cuda/config.h | 4 ++--
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 378f757d285..8da797ed155 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -669,7 +669,7 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))
 
   option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
   set(CYCLES_CUDA_BINARIES_ARCH
-    sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 compute_75
+    sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75
     CACHE STRING "CUDA architectures to build binaries for"
   )
   option(WITH_CYCLES_CUDA_BUILD_SERIAL "\
diff --git a/build_files/config/pipeline_config.yaml b/build_files/config/pipeline_config.yaml
index e5f293e9da8..c817aa94b90 100644
--- a/build_files/config/pipeline_config.yaml
+++ b/build_files/config/pipeline_config.yaml
@@ -11,7 +11,7 @@ buildbot:
     cuda11:
         version: '11.4.1'
     cuda12:
-        version: '12.1.1'
+        version: '12.8.0'
     hip:
         version: '5.7.32000'
     hiprt:
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 56fb1fafd98..baff8ef004f 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -577,6 +577,14 @@ if(WITH_CYCLES_CUDA_BINARIES)
       else()
         message(STATUS "CUDA binaries for ${arch} require CUDA 11.1+, skipped.")
       endif()
+    elseif(${arch} MATCHES ".*_10." OR ${arch} MATCHES ".*_120")
+      if("${CUDA_VERSION}" GREATER_EQUAL 128) # Support for sm_100, sm_101, sm_120 was introduced in CUDA 12.8
+        set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
+        set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
+        set(cuda_version ${CUDA_VERSION})
+      else()
+        message(STATUS "CUDA binaries for ${arch} require CUDA 12.8+, skipped.")
+      endif()
     else()
       set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
       set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
diff --git a/intern/cycles/kernel/device/cuda/config.h b/intern/cycles/kernel/device/cuda/config.h
index 75edb808bcb..6506775cd9e 100644
--- a/intern/cycles/kernel/device/cuda/config.h
+++ b/intern/cycles/kernel/device/cuda/config.h
@@ -62,8 +62,8 @@
 #    define GPU_KERNEL_MAX_REGISTERS 48
 #  endif
 
-/* 7.x, 8.x */
-#elif __CUDA_ARCH__ <= 899
+/* 7.x, 8.x, 12.x */
+#elif __CUDA_ARCH__ <= 1299
 #  define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
 #  define GPU_MULTIPROCESSOR_MAX_BLOCKS 32
 #  define GPU_BLOCK_MAX_THREADS 1024
-- 
GitLab