diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e78c2e17e1bec995cff77cbbe6790ee8c71073d..8bbe173f8aa4e5722871c99ebc602e08640954d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -762,6 +762,9 @@ oneAPI targets to build AOT binaries for" mark_as_advanced(CYCLES_ONEAPI_SYCL_TARGETS) endif() +# ANARI +option(WITH_CYCLES_DEVICE_ANARI "Enable Cycles ANARI compute support" ON) + # Draw Manager option(WITH_DRAW_DEBUG "Add extra debug capabilities to Draw Manager" OFF) mark_as_advanced(WITH_DRAW_DEBUG) diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 49b9b5bf88832d54ca55019d7bb6bd5f9c49ec9d..fbbbf00a2ea5063e985a10e3a1c0c667a458c23e 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -244,6 +244,10 @@ if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX) endif() endif() +if(WITH_CYCLES_DEVICE_ANARI) + add_definitions(-DWITH_ANARI) +endif() + if(WITH_CYCLES_DEVICE_HIP) add_definitions(-DWITH_HIP) diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 92d73a1de075774a0755beabedac36fd32aff41f..1163f01af564f558da34b863ae75b30d635b15b4 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -15,7 +15,7 @@ def _configure_argument_parser(): action='store_true') parser.add_argument("--cycles-device", help="Set the device to use for Cycles, overriding user preferences and the scene setting." - "Valid options are 'CPU', 'CUDA', 'OPTIX', 'HIP', 'ONEAPI', or 'METAL'." + "Valid options are 'CPU', 'CUDA', 'OPTIX', 'HIP', 'ONEAPI', 'METAL' or 'ANARI'." "Additionally, you can append '+CPU' to any GPU type for hybrid rendering.", default=None) return parser diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 33d0407818f7ea406f7809117373ae9b2e509432..ee136c699209d50e710ad6d7ee11ba0ead44048d 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -179,7 +179,8 @@ enum_device_type = ( ('OPTIX', "OptiX", "OptiX", 3), ('HIP', "HIP", "HIP", 4), ('METAL', "Metal", "Metal", 5), - ('ONEAPI', "oneAPI", "oneAPI", 6) + ('ONEAPI', "oneAPI", "oneAPI", 6), + ('ANARI', "ANARI", "ANARI", 7) ) enum_texture_limit = ( @@ -1539,7 +1540,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): def get_device_types(self, context): import _cycles - has_cuda, has_optix, has_hip, has_metal, has_oneapi, has_hiprt = _cycles.get_device_types() + has_cuda, has_optix, has_hip, has_metal, has_oneapi, has_hiprt, has_anari = _cycles.get_device_types() list = [('NONE', "None", "Don't use compute device", 0)] if has_cuda: @@ -1552,6 +1553,8 @@ class CyclesPreferences(bpy.types.AddonPreferences): list.append(('METAL', "Metal", "Use Metal for GPU acceleration", 5)) if has_oneapi: list.append(('ONEAPI', "oneAPI", "Use oneAPI for GPU acceleration", 6)) + if has_anari: + list.append(('ANARI', "ANARI", "Use ANARI for acceleration", 7)) return list @@ -1626,7 +1629,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): def update_device_entries(self, device_list): for device in device_list: - if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL', 'ONEAPI'}: + if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL', 'ONEAPI', 'ANARI'}: continue # Try to find existing Device entry entry = self.find_existing_device_entry(device) @@ -1665,7 +1668,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): def refresh_devices(self): # Ensure `self.devices` is not re-allocated when the second call to # get_devices_for_type is made, freeing items from the first list. - for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL', 'ONEAPI'): + for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL', 'ONEAPI', 'ANARI'): # Query the device list to trigger all required updates. # Note that even though the device list is unused, # the function has side-effects with internal state updates. @@ -1809,6 +1812,9 @@ class CyclesPreferences(bpy.types.AddonPreferences): mac_version = "12.2" col.label(text=rpt_("Requires Apple Silicon with macOS %s or newer") % mac_version, icon='BLANK1', translate=False) + elif device_type == 'ANARI': + pass + return for device in devices: @@ -1881,6 +1887,9 @@ class CyclesPreferences(bpy.types.AddonPreferences): row.active = has_hardware_rt row.prop(self, "use_oneapirt") + elif compute_device_type == 'ANARI': + row = layout.row() + def draw(self, context): self.draw_impl(self.layout, context) diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index b93ba9bba9b50eb32b3e1f20b5407c719e59d2b3..7659f2a163c994117f8adc320ec1a08aacb2a8bd 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -132,6 +132,8 @@ def use_optix(context): def use_oneapi(context): return (get_device_type(context) == 'ONEAPI' and use_gpu(context)) +def use_anari(context): + return (get_device_type(context) == 'ANARI' and use_gpu(context)) def use_multi_device(context): if use_gpu(context): diff --git a/intern/cycles/blender/device.cpp b/intern/cycles/blender/device.cpp index 9cb70805be22c622b734ade565969531c16a5105..e5a672a3aa062a7acff65f256da0edf15523c6aa 100644 --- a/intern/cycles/blender/device.cpp +++ b/intern/cycles/blender/device.cpp @@ -15,6 +15,7 @@ enum ComputeDevice { COMPUTE_DEVICE_HIP = 4, COMPUTE_DEVICE_METAL = 5, COMPUTE_DEVICE_ONEAPI = 6, + COMPUTE_DEVICE_ANARI = 7, COMPUTE_DEVICE_NUM }; @@ -123,6 +124,9 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, else if (compute_device == COMPUTE_DEVICE_ONEAPI) { mask |= DEVICE_MASK_ONEAPI; } + else if (compute_device == COMPUTE_DEVICE_ANARI) { + mask |= DEVICE_MASK_ANARI; + } const vector<DeviceInfo> devices = Device::available_devices(mask); /* Match device preferences and available devices. */ diff --git a/intern/cycles/blender/python.cpp b/intern/cycles/blender/python.cpp index ee4cba477e3d78c6a5ed8555334f55856dde497e..603681ed8862a1de20e8bfebee92d2ce1f4dfc62 100644 --- a/intern/cycles/blender/python.cpp +++ b/intern/cycles/blender/python.cpp @@ -676,6 +676,7 @@ static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/) bool has_metal = false; bool has_oneapi = false; bool has_hiprt = false; + bool has_anari = false; for (const DeviceType device_type : device_types) { has_cuda |= (device_type == DEVICE_CUDA); has_optix |= (device_type == DEVICE_OPTIX); @@ -683,14 +684,16 @@ static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/) has_metal |= (device_type == DEVICE_METAL); has_oneapi |= (device_type == DEVICE_ONEAPI); has_hiprt |= (device_type == DEVICE_HIPRT); + has_anari |= (device_type == DEVICE_ANARI); } - PyObject *list = PyTuple_New(6); + PyObject *list = PyTuple_New(7); PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda)); PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix)); PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip)); PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal)); PyTuple_SET_ITEM(list, 4, PyBool_FromLong(has_oneapi)); PyTuple_SET_ITEM(list, 5, PyBool_FromLong(has_hiprt)); + PyTuple_SET_ITEM(list, 6, PyBool_FromLong(has_anari)); return list; } @@ -725,6 +728,9 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg) else if (override == "ONEAPI") { BlenderSession::device_override = DEVICE_MASK_ONEAPI; } + else if (override == "ANARI") { + BlenderSession::device_override = DEVICE_MASK_ANARI; + } else { fprintf(stderr, "\nError: %s is not a valid Cycles device.\n", override.c_str()); Py_RETURN_FALSE; diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index aa9dbd151fde3bb94883cb7d067fda0a2cb231cb..1b4eb30dd3a00949f5d55211ec76d41ccf9cc675 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -128,3 +128,12 @@ if(WITH_CYCLES_DEVICE_ONEAPI AND WITH_CYCLES_ONEAPI_BINARIES) set_and_warn_library_found("ocloc" OCLOC_FOUND WITH_CYCLES_ONEAPI_BINARIES) endif() endif() + + +##################################################### +# ANARI +##################################################### + +if(WITH_CYCLES_DEVICE_ANARI) + find_package(anari 0.8.0) +endif() \ No newline at end of file diff --git a/intern/cycles/cmake/macros.cmake b/intern/cycles/cmake/macros.cmake index 348ae3b3709a3b83491340386a1f71991909302d..4cd53be7b11c4b82b4cfd3fcee38bd7e33a1dca6 100644 --- a/intern/cycles/cmake/macros.cmake +++ b/intern/cycles/cmake/macros.cmake @@ -169,6 +169,10 @@ macro(cycles_external_libraries_append libraries) endif() endif() + if(WITH_CYCLES_DEVICE_ANARI) + list(APPEND ${libraries} anari::anari) + endif() + if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD) list(APPEND ${libraries} extern_hipew) endif() diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 431ae7a80d414127f7611b17f0ed643e369ec9e7..73591c0462d776b33b945ba60d20f2acd9be1f63 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -116,6 +116,13 @@ set(SRC_OPTIX optix/util.h ) +set(SRC_ANARI + anari/device.cpp + anari/device.h + anari/device_impl.cpp + anari/device_impl.h +) + set(SRC_HEADERS device.h denoise.h @@ -134,6 +141,7 @@ set(SRC ${SRC_DUMMY} ${SRC_MULTI} ${SRC_OPTIX} + ${SRC_ANARI} ${SRC_HEADERS} ) @@ -228,4 +236,5 @@ source_group("multi" FILES ${SRC_MULTI}) source_group("metal" FILES ${SRC_METAL}) source_group("optix" FILES ${SRC_OPTIX}) source_group("oneapi" FILES ${SRC_ONEAPI}) +source_group("anari" FILES ${SRC_ANARI}) source_group("common" FILES ${SRC_BASE} ${SRC_HEADERS}) diff --git a/intern/cycles/device/anari/device.cpp b/intern/cycles/device/anari/device.cpp new file mode 100644 index 0000000000000000000000000000000000000000..929d2e3e8ac4ad5865803855bbd27e43bc5b9595 --- /dev/null +++ b/intern/cycles/device/anari/device.cpp @@ -0,0 +1,64 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#include "device/anari/device.h" +#include "device/anari/device_impl.h" +#include "device/device.h" + +/* Used for `info.denoisers`. */ +/* TODO(sergey): The denoisers are probably to be moved completely out of the device into their + * own class. But until then keep API consistent with how it used to work before. */ +// #include "util/guiding.h" +// #include "util/openimagedenoise.h" + +CCL_NAMESPACE_BEGIN + +bool device_anari_init() +{ + return true; +} + +unique_ptr<Device> device_anari_create(const DeviceInfo &info, + Stats &stats, + Profiler &profiler, + bool headless) +{ + return make_unique<ANARIDevice>(info, stats, profiler, headless); +} + +void device_anari_info(vector<DeviceInfo> &devices) +{ + DeviceInfo info; + + info.type = DEVICE_ANARI; + info.description = "ANARI";//system_cpu_brand_string(); + info.id = "ANARI"; + info.num = 0; + info.has_osl = false; + info.has_nanovdb = false; + info.has_profiling = false; + // if (guiding_supported()) { + // info.has_guiding = true; + // } + // else { + info.has_guiding = false; + //} + // if (openimagedenoise_supported()) { + // info.denoisers |= DENOISER_OPENIMAGEDENOISE; + // } + + devices.insert(devices.begin(), info); +} + +string device_anari_capabilities() +{ + string capabilities = ""; + // capabilities += system_cpu_support_avx2() ? "AVX2" : ""; + // if (capabilities[capabilities.size() - 1] == ' ') { + // capabilities.resize(capabilities.size() - 1); + // } + return capabilities; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/device/anari/device.h b/intern/cycles/device/anari/device.h new file mode 100644 index 0000000000000000000000000000000000000000..d83dd784ddca3963ed00014da2da4471ca680fff --- /dev/null +++ b/intern/cycles/device/anari/device.h @@ -0,0 +1,29 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +#include "util/string.h" +#include "util/unique_ptr.h" +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +class Device; +class DeviceInfo; +class Profiler; +class Stats; + +bool device_anari_init(); + +unique_ptr<Device> device_anari_create(const DeviceInfo &info, + Stats &stats, + Profiler &profiler, + bool headless); + +void device_anari_info(vector<DeviceInfo> &devices); + +string device_anari_capabilities(); + +CCL_NAMESPACE_END diff --git a/intern/cycles/device/anari/device_impl.cpp b/intern/cycles/device/anari/device_impl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d5db77c6feac8ae695cf595786215230dd7a333b --- /dev/null +++ b/intern/cycles/device/anari/device_impl.cpp @@ -0,0 +1,323 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#include "device/anari/device_impl.h" + +#include <cstdlib> +#include <cstring> + +// /* So ImathMath is included before our kernel_cpu_compat. */ +// #ifdef WITH_OSL +// /* So no context pollution happens from indirectly included windows.h */ +// # ifdef _WIN32 +// # include "util/windows.h" +// # endif +// # include <OSL/oslexec.h> +// #endif + +// #ifdef WITH_EMBREE +// # if EMBREE_MAJOR_VERSION >= 4 +// # include <embree4/rtcore.h> +// # else +// # include <embree3/rtcore.h> +// # endif +// #endif + +//#include "device/anari/kernel.h" + +#include "device/device.h" + +#include "kernel/device/anari/kernel.h" +#include "kernel/globals.h" +#include "kernel/types.h" + +//#include "bvh/embree.h" + +#include "session/buffers.h" + +#include "util/guiding.h" +#include "util/log.h" +#include "util/progress.h" +#include "util/task.h" + +CCL_NAMESPACE_BEGIN + +ANARIDevice::ANARIDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_) + : Device(info_, stats_, profiler_, headless_), texture_info(this, "texture_info", MEM_GLOBAL) +{ + /* Pick any kernel, all of them are supposed to have same level of microarchitecture + * optimization. */ + //VLOG_INFO << "Using " << get_cpu_kernels().integrator_init_from_camera.get_uarch_name() + // << " CPU kernels."; + + if (info.cpu_threads == 0) { + info.cpu_threads = TaskScheduler::max_concurrency(); + } + +// #ifdef WITH_EMBREE +// embree_device = rtcNewDevice("verbose=0"); +// #endif + need_texture_info = false; +} + +ANARIDevice::~ANARIDevice() +{ +// #ifdef WITH_EMBREE +// rtcReleaseDevice(embree_device); +// #endif + + texture_info.free(); +} + +BVHLayoutMask ANARIDevice::get_bvh_layout_mask(uint /*kernel_features*/) const +{ + BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2; +// #ifdef WITH_EMBREE +// bvh_layout_mask |= BVH_LAYOUT_EMBREE; +// #endif /* WITH_EMBREE */ + return bvh_layout_mask; +} + +bool ANARIDevice::load_texture_info() +{ + if (!need_texture_info) { + return false; + } + + texture_info.copy_to_device(); + need_texture_info = false; + + return true; +} + +void ANARIDevice::mem_alloc(device_memory &mem) +{ + if (mem.type == MEM_TEXTURE) { + assert(!"mem_alloc not supported for textures."); + } + else if (mem.type == MEM_GLOBAL) { + assert(!"mem_alloc not supported for global memory."); + } + else { + //if (mem.name) { + // VLOG_WORK << "Buffer allocate: " << mem.name << ", " + // << string_human_readable_number(mem.memory_size()) << " bytes. (" + // << string_human_readable_size(mem.memory_size()) << ")"; + //} + + if (mem.type == MEM_DEVICE_ONLY) { + size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES; + void *data = util_aligned_malloc(mem.memory_size(), alignment); + mem.device_pointer = (device_ptr)data; + } + else { + assert(!(mem.host_pointer == nullptr && mem.memory_size() > 0)); + mem.device_pointer = (device_ptr)mem.host_pointer; + } + + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); + } +} + +void ANARIDevice::mem_copy_to(device_memory &mem) +{ + if (mem.type == MEM_GLOBAL) { + global_free(mem); + global_alloc(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + tex_alloc((device_texture &)mem); + } + else { + if (!mem.device_pointer) { + mem_alloc(mem); + } + + /* copy is no-op */ + } +} + +void ANARIDevice::mem_move_to_host(device_memory & /*mem*/) +{ + /* no-op */ +} + +void ANARIDevice::mem_copy_from( + device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/) +{ + /* no-op */ +} + +void ANARIDevice::mem_zero(device_memory &mem) +{ + if (!mem.device_pointer) { + mem_alloc(mem); + } + + if (mem.device_pointer) { + memset((void *)mem.device_pointer, 0, mem.memory_size()); + } +} + +void ANARIDevice::mem_free(device_memory &mem) +{ + if (mem.type == MEM_GLOBAL) { + global_free(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + } + else if (mem.device_pointer) { + if (mem.type == MEM_DEVICE_ONLY) { + util_aligned_free((void *)mem.device_pointer, mem.memory_size()); + } + mem.device_pointer = 0; + stats.mem_free(mem.device_size); + mem.device_size = 0; + } +} + +device_ptr ANARIDevice::mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) +{ + return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset)); +} + +void ANARIDevice::const_copy_to(const char *name, void *host, const size_t size) +{ +// #ifdef WITH_EMBREE +// if (strcmp(name, "data") == 0) { +// assert(size <= sizeof(KernelData)); + +// // Update scene handle (since it is different for each device on multi devices) +// KernelData *const data = (KernelData *)host; +// data->device_bvh = embree_scene; +// } +// #endif + //kernel_const_copy(&kernel_globals, name, host, size); +} + +void ANARIDevice::global_alloc(device_memory &mem) +{ + //VLOG_WORK << "Global memory allocate: " << mem.name << ", " + // << string_human_readable_number(mem.memory_size()) << " bytes. (" + // << string_human_readable_size(mem.memory_size()) << ")"; + + //kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size); + + mem.device_pointer = (device_ptr)mem.host_pointer; + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); +} + +void ANARIDevice::global_free(device_memory &mem) +{ + if (mem.device_pointer) { + mem.device_pointer = 0; + stats.mem_free(mem.device_size); + mem.device_size = 0; + } +} + +void ANARIDevice::tex_alloc(device_texture &mem) +{ + //VLOG_WORK << "Texture allocate: " << mem.name << ", " + // << string_human_readable_number(mem.memory_size()) << " bytes. (" + // << string_human_readable_size(mem.memory_size()) << ")"; + + mem.device_pointer = (device_ptr)mem.host_pointer; + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); + + const uint slot = mem.slot; + if (slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount of re-allocations. */ + texture_info.resize(slot + 128); + } + + texture_info[slot] = mem.info; + texture_info[slot].data = (uint64_t)mem.host_pointer; + need_texture_info = true; +} + +void ANARIDevice::tex_free(device_texture &mem) +{ + if (mem.device_pointer) { + mem.device_pointer = 0; + stats.mem_free(mem.device_size); + mem.device_size = 0; + need_texture_info = true; + } +} + +void ANARIDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) +{ +// #ifdef WITH_EMBREE +// if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE || +// bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE || +// bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE || +// bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE || +// bvh->params.bvh_layout == BVH_LAYOUT_MULTI_EMBREEGPU_EMBREE) +// { +// BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh); +// if (refit) { +// bvh_embree->refit(progress); +// } +// else { +// bvh_embree->build(progress, &stats, embree_device); +// } + +// if (bvh->params.top_level) { +// embree_scene = bvh_embree->scene; +// } +// } +// else +// #endif + { + Device::build_bvh(bvh, progress, refit); + } +} + +void *ANARIDevice::get_guiding_device() const +{ +//#ifdef WITH_PATH_GUIDING +// if (!guiding_device) { +// if (guiding_device_type() == 8) { +// guiding_device = make_unique<openpgl::cpp::Device>(PGL_DEVICE_TYPE_CPU_8); +// } +// else if (guiding_device_type() == 4) { +// guiding_device = make_unique<openpgl::cpp::Device>(PGL_DEVICE_TYPE_CPU_4); +// } +// } +// return guiding_device.get(); +//#else + return nullptr; +//#endif +} + +void ANARIDevice::get_cpu_kernel_thread_globals( + vector<ThreadKernelGlobalsCPU> &kernel_thread_globals) +{ + /* Ensure latest texture info is loaded into kernel globals before returning. */ + load_texture_info(); + + //kernel_thread_globals.clear(); + //OSLGlobals *osl_globals = get_cpu_osl_memory(); + //for (int i = 0; i < info.cpu_threads; i++) { + // kernel_thread_globals.emplace_back(kernel_globals, osl_globals, profiler, i); + //} +} + +OSLGlobals *ANARIDevice::get_cpu_osl_memory() +{ + return nullptr; +} + +bool ANARIDevice::load_kernels(const uint /*kernel_features*/) +{ + return true; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/device/anari/device_impl.h b/intern/cycles/device/anari/device_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..774d3fc5e8b2f56dd42ae97ff750f3cfca982197 --- /dev/null +++ b/intern/cycles/device/anari/device_impl.h @@ -0,0 +1,79 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +//#include "device/anari/kernel.h" +#include "device/device.h" +#include "device/memory.h" + +// clang-format off +#include "kernel/device/anari/kernel.h" +#include "kernel/globals.h" + +//#include "kernel/osl/globals.h" +// clang-format on + +#include "util/guiding.h" // IWYU pragma: keep +#include "util/unique_ptr.h" + +CCL_NAMESPACE_BEGIN + +class ANARIDevice : public Device { + public: + KernelGlobalsCPU kernel_globals; + + device_vector<TextureInfo> texture_info; + bool need_texture_info; + +// #ifdef WITH_OSL +// OSLGlobals osl_globals; +// #endif +// #ifdef WITH_EMBREE +// RTCScene embree_scene = nullptr; +// RTCDevice embree_device; +// #endif +// #ifdef WITH_PATH_GUIDING +// mutable unique_ptr<openpgl::cpp::Device> guiding_device; +// #endif + + ANARIDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_); + ~ANARIDevice() override; + + BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override; + + /* Returns true if the texture info was copied to the device (meaning, some more + * re-initialization might be needed). */ + bool load_texture_info(); + + void mem_alloc(device_memory &mem) override; + void mem_copy_to(device_memory &mem) override; + void mem_move_to_host(device_memory &mem) override; + void mem_copy_from( + device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override; + void mem_zero(device_memory &mem) override; + void mem_free(device_memory &mem) override; + device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override; + + void const_copy_to(const char *name, void *host, const size_t size) override; + + void global_alloc(device_memory &mem); + void global_free(device_memory &mem); + + void tex_alloc(device_texture &mem); + void tex_free(device_texture &mem); + + void build_bvh(BVH *bvh, Progress &progress, bool refit) override; + + void *get_guiding_device() const override; + + void get_cpu_kernel_thread_globals( + vector<ThreadKernelGlobalsCPU> &kernel_thread_globals) override; + OSLGlobals *get_cpu_osl_memory() override; + + protected: + bool load_kernels(uint /*kernel_features*/) override; +}; + +CCL_NAMESPACE_END diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 44fa7988f2a2738cf948d5b518aefc2648a0a1a4..b043cfbb7a4369963bebfe65b9eded2aaf97254f 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -19,6 +19,7 @@ #include "device/multi/device.h" #include "device/oneapi/device.h" #include "device/optix/device.h" +#include "device/anari/device.h" #ifdef WITH_HIPRT # include <hiprtew.h> @@ -43,6 +44,7 @@ vector<DeviceInfo> Device::cpu_devices; vector<DeviceInfo> Device::hip_devices; vector<DeviceInfo> Device::metal_devices; vector<DeviceInfo> Device::oneapi_devices; +vector<DeviceInfo> Device::anari_devices; uint Device::devices_initialized_mask = 0; /* Device */ @@ -117,6 +119,12 @@ unique_ptr<Device> Device::create(const DeviceInfo &info, break; #endif +#ifdef WITH_ANARI + case DEVICE_ANARI: + device = device_anari_create(info, stats, profiler, headless); + break; +#endif + default: break; } @@ -154,6 +162,9 @@ DeviceType Device::type_from_string(const char *name) if (strcmp(name, "HIPRT") == 0) { return DEVICE_HIPRT; } + if (strcmp(name, "ANARI") == 0) { + return DEVICE_ANARI; + } return DEVICE_NONE; } @@ -211,6 +222,9 @@ vector<DeviceType> Device::available_types() if (hiprtewInit()) { types.push_back(DEVICE_HIPRT); } +#endif +#ifdef WITH_ANARI + types.push_back(DEVICE_ANARI); #endif return types; } @@ -281,6 +295,20 @@ vector<DeviceInfo> Device::available_devices(const uint mask) } #endif +#ifdef WITH_ANARI + if (mask & DEVICE_MASK_ANARI) { + if (!(devices_initialized_mask & DEVICE_MASK_ANARI)) { + if (device_anari_init()) { + device_anari_info(anari_devices); + } + devices_initialized_mask |= DEVICE_MASK_ANARI; + } + for (DeviceInfo &info : anari_devices) { + devices.push_back(info); + } + } +#endif + if (mask & DEVICE_MASK_CPU) { if (!(devices_initialized_mask & DEVICE_MASK_CPU)) { device_cpu_info(cpu_devices); @@ -374,6 +402,18 @@ string Device::device_capabilities(const uint mask) } #endif +#ifdef WITH_ANARI + if (mask & DEVICE_MASK_ANARI) { + if (device_anari_init()) { + const string device_capabilities = device_anari_capabilities(); + if (!device_capabilities.empty()) { + capabilities += "\nANARI device capabilities:\n"; + capabilities += device_capabilities; + } + } + } +#endif + return capabilities; } @@ -470,6 +510,7 @@ void Device::free_memory() oneapi_devices.free_memory(); cpu_devices.free_memory(); metal_devices.free_memory(); + anari_devices.free_memory(); } unique_ptr<DeviceQueue> Device::gpu_queue_create() diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index f1ea26cad9581c21512b851a189b4793736c87d2..71df167156e0e568c0c4e409971ad55b5ef558b9 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -44,7 +44,8 @@ enum DeviceType { DEVICE_HIP, DEVICE_HIPRT, DEVICE_METAL, - DEVICE_ONEAPI, + DEVICE_ONEAPI, + DEVICE_ANARI, DEVICE_DUMMY, }; @@ -55,6 +56,7 @@ enum DeviceTypeMask { DEVICE_MASK_HIP = (1 << DEVICE_HIP), DEVICE_MASK_METAL = (1 << DEVICE_METAL), DEVICE_MASK_ONEAPI = (1 << DEVICE_ONEAPI), + DEVICE_MASK_ANARI = (1 << DEVICE_ANARI), DEVICE_MASK_ALL = ~0 }; @@ -328,6 +330,7 @@ class Device { static vector<DeviceInfo> hip_devices; static vector<DeviceInfo> metal_devices; static vector<DeviceInfo> oneapi_devices; + static vector<DeviceInfo> anari_devices; static uint devices_initialized_mask; }; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 05dc3c5979ddf3cc4644508677528a7cb0351a4e..9db3f8237f28156d39997e0e6c3e40c327896e3c 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -39,6 +39,10 @@ set(SRC_KERNEL_DEVICE_OPTIX device/optix/kernel_shader_raytrace.cu ) +set(SRC_KERNEL_DEVICE_ANARI + device/anari/kernel.cpp +) + if(WITH_CYCLES_OSL) math(EXPR OSL_LIBRARY_VERSION_CODE "${OSL_LIBRARY_VERSION_MAJOR} * 10000 + ${OSL_LIBRARY_VERSION_MINOR} * 100 + ${OSL_LIBRARY_VERSION_PATCH}") @@ -139,6 +143,16 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS device/cpu/bvh.h ) +set(SRC_KERNEL_DEVICE_ANARI_HEADERS + #device/anari/bvh.h + #device/anari/compat.h + #device/anari/image.h + #device/anari/globals.h + device/anari/kernel.h + #device/anari/kernel_arch.h + #device/anari/kernel_arch_impl.h +) + set(SRC_KERNEL_CLOSURE_HEADERS closure/alloc.h closure/bsdf.h @@ -1257,6 +1271,7 @@ cycles_add_library(cycles_kernel "${LIB}" ${SRC_KERNEL_DEVICE_HIPRT} ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_METAL} + ${SRC_KERNEL_DEVICE_ANARI} ${SRC_KERNEL_HEADERS} ${SRC_KERNEL_DEVICE_CPU_HEADERS} ${SRC_KERNEL_DEVICE_GPU_HEADERS} @@ -1266,6 +1281,7 @@ cycles_add_library(cycles_kernel "${LIB}" ${SRC_KERNEL_DEVICE_OPTIX_HEADERS} ${SRC_KERNEL_DEVICE_METAL_HEADERS} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS} + ${SRC_KERNEL_DEVICE_ANARI_HEADERS} ) source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS}) @@ -1280,6 +1296,7 @@ source_group("device\\hiprt" FILES ${SRC_KERNEL_DEVICE_HIPRT} ${SRC_KERNEL_DEVIC source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS}) source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS}) source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}) +source_group("device\\anari" FILES ${SRC_KERNEL_DEVICE_ANARI} ${SRC_KERNEL_DEVICE_ANARI_HEADERS}) source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS}) source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS}) source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS}) @@ -1306,6 +1323,7 @@ cycles_add_gpu_kernel_dependencies(cycles_kernel_optix) cycles_add_gpu_kernel_dependencies(cycles_kernel_hip) cycles_add_gpu_kernel_dependencies(cycles_kernel_hiprt) cycles_add_gpu_kernel_dependencies(cycles_kernel_oneapi) +cycles_add_gpu_kernel_dependencies(cycles_kernel_anari) # Install kernel source for runtime compilation diff --git a/intern/cycles/kernel/device/anari/kernel.cpp b/intern/cycles/kernel/device/anari/kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d3734b8e89be3b6099e873058fd632af7ac0adb2 --- /dev/null +++ b/intern/cycles/kernel/device/anari/kernel.cpp @@ -0,0 +1,85 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +/* CPU kernel entry points */ + +// /* On x86-64, our minimum is SSE4.2, so avoid the extra kernel and compile this +// * one with SSE4.2 intrinsics. +// */ +// #if defined(__x86_64__) || defined(_M_X64) +// # define __KERNEL_SSE__ +// # define __KERNEL_SSE2__ +// # define __KERNEL_SSE3__ +// # define __KERNEL_SSSE3__ +// # define __KERNEL_SSE42__ +// #endif + +// /* When building kernel for native machine detect kernel features from the flags +// * set by compiler. +// */ +// #ifdef WITH_KERNEL_NATIVE +// # ifdef __SSE4_2__ +// # ifndef __KERNEL_SSE42__ +// # define __KERNEL_SSE42__ +// # endif +// # endif +// # ifdef __AVX__ +// # ifndef __KERNEL_SSE__ +// # define __KERNEL_SSE__ +// # endif +// # define __KERNEL_AVX__ +// # endif +// # ifdef __AVX2__ +// # ifndef __KERNEL_SSE__ +// # define __KERNEL_SSE__ +// # endif +// # define __KERNEL_AVX2__ +// # endif +// #endif + +// /* quiet unused define warnings */ +// #if defined(__KERNEL_SSE2__) +// /* do nothing */ +// #endif + +// #include "kernel/device/cpu/globals.h" + +// #include "kernel/device/cpu/kernel.h" +// #define KERNEL_ARCH cpu +// #include "kernel/device/cpu/kernel_arch_impl.h" + +// CCL_NAMESPACE_BEGIN + +// /* Memory Copy */ + +// void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t /*unused*/) +// { +// if (strcmp(name, "data") == 0) { +// kg->data = *(KernelData *)host; +// } +// else { +// assert(0); +// } +// } + +// void kernel_global_memory_copy(KernelGlobalsCPU *kg, +// const char *name, +// void *mem, +// const size_t size) +// { +// if (false) { +// } + +// #define KERNEL_DATA_ARRAY(type, tname) \ +// else if (strcmp(name, #tname) == 0) { \ +// kg->tname.data = (type *)mem; \ +// kg->tname.width = size; \ +// } +// #include "kernel/data_arrays.h" +// else { +// assert(0); +// } +// } + +// CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/anari/kernel.h b/intern/cycles/kernel/device/anari/kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..4d66cf20977a7014e73f3cdc5638459ad984135b --- /dev/null +++ b/intern/cycles/kernel/device/anari/kernel.h @@ -0,0 +1,41 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +// /* CPU Kernel Interface */ + +// #include "kernel/types.h" + +// #include "util/half.h" + +// CCL_NAMESPACE_BEGIN + +// #define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z +// #define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name) +// #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name) + +// struct IntegratorStateCPU; +// struct KernelGlobalsCPU; +// struct KernelData; + +// KernelGlobalsCPU *kernel_globals_create(); +// void kernel_globals_free(KernelGlobalsCPU *kg); + +// void *kernel_osl_memory(const KernelGlobalsCPU *kg); +// bool kernel_osl_use(const KernelGlobalsCPU *kg); + +// void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, const size_t size); +// void kernel_global_memory_copy(KernelGlobalsCPU *kg, +// const char *name, +// void *mem, +// const size_t size); + +// #define KERNEL_ARCH cpu +// #include "kernel/device/cpu/kernel_arch.h" + +// #define KERNEL_ARCH cpu_avx2 +// #include "kernel/device/cpu/kernel_arch.h" + +// CCL_NAMESPACE_END diff --git a/source/creator/creator_args.cc b/source/creator/creator_args.cc index a7e5f0e7702be6dc476bd524cda94a6ae25ddfa3..55c6d8f298f7197a55c217b702c482dfa9ec847c 100644 --- a/source/creator/creator_args.cc +++ b/source/creator/creator_args.cc @@ -664,7 +664,7 @@ static void print_help(bArgs *ba, bool all) PRINT("\n"); PRINT("--cycles-device <device>\n"); PRINT("\tSet the device used for rendering.\n"); - PRINT("\tValid options are: 'CPU' 'CUDA' 'OPTIX' 'HIP' 'ONEAPI' 'METAL'.\n"); + PRINT("\tValid options are: 'CPU' 'CUDA' 'OPTIX' 'HIP' 'ONEAPI' 'METAL' 'ANARI'.\n"); PRINT("\n"); PRINT("\tAppend +CPU to a GPU device to render on both CPU and GPU.\n"); PRINT("\n");