Commit 93bd90b6 authored by Milan Jaros's avatar Milan Jaros
Browse files

add new version of blender, fix calculation of camera view matrix in blender...

add new version of blender, fix calculation of camera view matrix in blender client, fix multigpu rendering in blender client
parent eb986997
......@@ -24,8 +24,13 @@ option(WITH_CUDA_STAT "Enable statistics for CUDA BIN" OFF)
option(WITH_CUDA_DEBUG "Enable debug for CUDA" OFF)
option(WITH_OPTIX_DENOISER "Enable build for OPTIX" OFF)
option(WITH_CLIENT_SHOW_STAT "Enable build for SHOW STAT" OFF)
option(WITH_CLIENT_CUDA_CPU_STAT "Enable build for CUDA CPU STAT" OFF)
option(WITH_CLIENT_CUDA_CPU_STAT_LB "Enable build for CUDA CPU STAT" OFF)
option(WITH_CLIENT_CUDA_CPU_STAT2 "Enable build for CUDA CPU STAT" OFF)
option(WITH_CLIENT_CUDA_CPU_STAT2_LB "Enable build for CUDA CPU STAT" OFF)
option(WITH_CLIENT_CUDA_CPU_STAT3 "Enable build for CUDA CPU STAT" OFF)
option(WITH_SOCKET_UDP "Enable UDP" OFF)
option(WITH_CLIENT_MPI_SOCKET "Enable MPI_SOCKET" OFF)
......@@ -35,10 +40,14 @@ option(WITH_CLIENT_FILE_MMAP "Enable FILE" OFF)
option(WITH_CLIENT_MPI_FILE "Enable FILE+MPI" OFF)
option(WITH_CLIENT_RENDERENGINE "Enable RENDERENGINE" OFF)
option(WITH_CLIENT_RENDERENGINE_VR "Enable RENDERENGINE_VR" OFF)
option(WITH_CLIENT_RENDERENGINE_EMULATE "Enable RENDERENGINE" OFF)
option(WITH_CLIENT_UNIMEM "Enable build for UNIMEM" OFF)
option(ENABLE_LOAD_BALANCE "Enable ENABLE_LOAD_BALANCE" OFF)
option(ENABLE_LOAD_BALANCEv2 "Enable ENABLE_LOAD_BALANCE" OFF)
option(ENABLE_LOAD_BALANCE "Enable ENABLE_LOAD_BALANCE - line stealing" OFF)
option(ENABLE_LOAD_BALANCE_EXIT "Enable ENABLE_LOAD_BALANCE_EXIT" OFF)
option(ENABLE_LOAD_BALANCE_CUDA "Enable ENABLE_LOAD_BALANCE_CUDA - faster lines+time stealing" OFF)
option(ENABLE_LOAD_BALANCEv2 "Enable ENABLE_LOAD_BALANCEv2 - pixel stealing" OFF)
option(ENABLE_LOAD_BALANCEv3 "Enable ENABLE_LOAD_BALANCEv3 - faster lines+time stealing" OFF)
option(CLIENT_MPI_LOAD_BALANCING_SAMPLES "Enable LOAD_BALANCING_SAMPLES" OFF)
option(CLIENT_MPI_LOAD_BALANCING_LINES "Enable LINE_LOAD_BALANCING" OFF)
......@@ -70,9 +79,9 @@ option(CLIENT_PATH_TRACE_PIXEL_256 "Enable" OFF)
#==============================ULTRAGRID==============================
option(WITH_CLIENT_CESNET "Enable build for CESNET" OFF)
#option(WITH_RGBA_FORMAT "Enable rgba format, default is yuv420" OFF)
option(WITH_RGBA_FORMAT "Enable rgba format, default is yuv420" OFF)
option(WITH_YUV_OUTPUT "Enable yuv output - writing to yuv file" OFF)
#option(WITH_BMP_OUTPUT "Enable bmp output - writing to bmp file" OFF)
option(WITH_BMP_OUTPUT "Enable bmp output - writing to bmp file" OFF)
#==============================ULTRAGRID==============================
......@@ -88,16 +97,31 @@ if(WITH_CLIENT_MPI_SOCKET)
set(WITH_CLIENT_MPI OFF)
endif()
if(WITH_CLIENT_CUDA_CPU_STAT3)
set(WITH_CLIENT_CUDA_CPU_STAT ON)
endif()
if(WITH_CLIENT_CUDA_CPU_STAT2)
set(WITH_CLIENT_CUDA_CPU_STAT ON)
endif()
if(WITH_CLIENT_CUDA_CPU_STAT)
set(WITH_CLIENT_CUDA ON)
set(WITH_CLIENT_CPU ON)
set(WITH_CPU_STAT ON)
set(WITH_CUDA_STAT OFF)
set(WITH_CLIENT_UNIMEM ON)
#set(WITH_CLIENT_CUDA_CPU_STAT2 ON)
set(WITH_CPU_SSE OFF)
set(WITH_CPU_AVX OFF)
set(WITH_CPU_AVX2 OFF)
#set(WITH_CPU_IMAGE ON)
#set(WITH_CLIENT_CUDA_CPU_STAT2 ON)
#set(WITH_CLIENT_CUDA_CPU_STAT_LB ON)
#set(WITH_CLIENT_CUDA_CPU_STAT2_LB ON)
endif()
if(WITH_CUDA_STAT)
......@@ -109,6 +133,14 @@ if(WITH_CUDA_STAT)
set(WITH_CLIENT_UNIMEM ON)
endif()
if(WITH_CLIENT_SHOW_STAT)
set(ENABLE_INC_SAMPLES OFF)
endif()
if(ENABLE_LOAD_BALANCEv3)
set(ENABLE_LOAD_BALANCE ON)
endif()
#message(${CMAKE_CXX_COMPILER_ID})
if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
......
......@@ -40,6 +40,9 @@ if(WITH_CLIENT_RENDERENGINE_VR)
add_definitions(-DWITH_CLIENT_RENDERENGINE_VR)
endif()
if(WITH_CLIENT_RENDERENGINE_EMULATE)
add_definitions(-DWITH_CLIENT_RENDERENGINE_EMULATE)
endif()
if(WITH_CLIENT_MPI_FILE)
add_definitions(-DWITH_CLIENT_MPI_FILE)
endif()
......@@ -78,7 +81,7 @@ if(WITH_CLIENT_CUDA)
endif()
if(WITH_CLIENT_CESNET)
target_link_libraries(blender_client${CLIENT_SUFFIX} ultragrid${CLIENT_SUFFIX})
target_link_libraries(blender_client${CLIENT_SUFFIX} ultragrid${CLIENT_SUFFIX} blenlib${CLIENT_SUFFIX})
endif()
if(WITH_NVPIPE)
......
......@@ -19,28 +19,13 @@
#include "client_api.h"
#include "cycles_client.h"
#include "kernel_util.h"
#include <omp.h>
#if defined(WITH_CLIENT_MPI) || defined(WITH_CLIENT_MPI_SOCKET) || defined(WITH_CLIENT_MPI_FILE)
# include <mpi.h>
#endif
#include <stdlib.h>
// int setenv(const char *envname, const char *envval, int overwrite);
#ifdef _WIN32
int setenv(const char *name, const char *value, int overwrite)
{
int errcode = 0;
if (!overwrite) {
size_t envsize = 0;
errcode = getenv_s(&envsize, NULL, 0, name);
if (errcode || envsize)
return errcode;
}
return _putenv_s(name, value);
}
#endif
#if defined(_WIN32) //&& defined(_DEBUG)
# include <windows.h>
......@@ -49,7 +34,7 @@ int setenv(const char *name, const char *value, int overwrite)
int main(int argc, char **argv)
{
/////////////////////////
/*
/*
setenv(
"KERNEL_CUDA_CUBIN",
"c:\\work\\blender\\blender_client_build\\cycles_cuda\\kernel_sm_52.cubin",
......@@ -59,27 +44,17 @@ int main(int argc, char **argv)
"c:\\work\\blender\\blender_client_build\\cycles_cuda\\kernel_sm_52_stat.cubin",
1);
setenv("CLIENT_FILE_KERNEL_GLOBAL", "c:\\temp\\test.kg", 1);
setenv("CLIENT_FILE_CYCLES_BUFFER", "c:\\temp\\test.bf", 1);
setenv("CLIENT_FILE_CYCLES_BMP", "c:\\temp\\test.bmp", 1);
setenv("CLIENT_FILE_KERNEL_GLOBAL", "e:\\temp\\test.kg", 1);
setenv("CLIENT_FILE_CYCLES_BUFFER", "e:\\temp\\test.bf", 1);
setenv("CLIENT_FILE_CYCLES_BMP", "e:\\temp\\test.bmp", 1);
setenv("DEBUG_SAMPLES", "1", 1);
setenv("DEBUG_RES_W", "1024", 1);
setenv("DEBUG_RES_H", "512", 1);
setenv("SOCKET_SERVER_PORT_CAM", "7000", 1);
setenv("SOCKET_SERVER_PORT_DATA", "7001", 1);
setenv("CUDA_VISIBLE_DEVICES", "0,1", 1);
*/
/*
CLIENT_FILE_KERNEL_GLOBAL=c://temp//test.kg
CLIENT_FILE_CYCLES_BUFFER=c://temp//test.bf
CLIENT_FILE_CYCLES_BMP=c://temp//test
SOCKET_SERVER_PORT_CAM=7000
SOCKET_SERVER_PORT_DATA=7001
SOCKET_SERVER_NAME_CAM=localhost
SOCKET_SERVER_NAME_DATA=localhost
KERNEL_CUDA_CUBIN=c:\\work\\blender\\blender_client_build\\cycles_cuda\\kernel_sm_52.cubin
KERNEL_CUDA_STAT_CUBIN=c:\\work\\blender\\blender_client_build\\cycles_cuda\\kernel_sm_52_stat.cubin
*/
setenv("DEBUG_REPEAT_TIME", "1", 1);
*/
/////////////////////////
client_kernel_struct data;
......@@ -117,24 +92,24 @@ KERNEL_CUDA_STAT_CUBIN=c:\\work\\blender\\blender_client_build\\cycles_cuda\\ker
// mpi_print_memory(world_rank);
}
#if defined(_WIN32) //&& defined(_DEBUG)
if (world_rank == 0) {
int stop = 0;
printf("attach\n");
fflush(0);
while (stop) {
Sleep(1000);
}
}
MPI_Barrier(MPI_COMM_WORLD);
#endif
//#if defined(_WIN32) //&& defined(_DEBUG)
//
// if (world_rank == 0) {
// int stop = 0;
// printf("attach\n");
// fflush(0);
// while (stop) {
// Sleep(1000);
// }
// }
//
// MPI_Barrier(MPI_COMM_WORLD);
//#endif
#endif
#ifdef WITH_CLIENT_CUDA
cyclesphi::client_set_device(-1);
cyclesphi::client_set_device(-1, world_rank);
#endif
data.world_rank = world_rank;
......@@ -166,10 +141,10 @@ KERNEL_CUDA_STAT_CUBIN=c:\\work\\blender\\blender_client_build\\cycles_cuda\\ker
break;
}
// printf("TAG: %d, TIME: %f, %f\n",
// data.client_tag,
// omp_get_wtime() - t_start,
// omp_get_wtime() - t_start0);
printf("TAG: %d, TIME: %f, %f\n",
data.client_tag,
omp_get_wtime() - t_start,
omp_get_wtime() - t_start0);
t_start = omp_get_wtime();
}
......@@ -190,24 +165,23 @@ KERNEL_CUDA_STAT_CUBIN=c:\\work\\blender\\blender_client_build\\cycles_cuda\\ker
MPI_Barrier(MPI_COMM_WORLD);
if (world_rank < 2) {
printf(
"End from processor %s, rank %d"
" out of %d processors, total time: %f\n",
processor_name,
world_rank,
world_size,
// omp_get_wtime() - t_start,
omp_get_wtime() - t_start0);
fflush(0);
}
// MPI_Barrier(MPI_COMM_WORLD);
// Finalize the MPI environment.
MPI_Finalize();
#endif
if (world_rank < 2) {
printf(
"End from processor, rank %d"
" out of %d processors, total time: %f\n",
world_rank,
world_size,
// omp_get_wtime() - t_start,
omp_get_wtime() - t_start0);
fflush(0);
}
return 0;
}
......@@ -6,10 +6,10 @@ set(INC
)
set(SRC
math_matrix.cpp
math_rotation.cpp
math_vector.cpp
math_vector_inline.cpp
math_matrix.cpp
math_rotation.cpp
math_vector.cpp
math_vector_inline.cpp
math_base.cpp
math_base_inline.cpp
)
......@@ -22,9 +22,11 @@ if(WITH_OPENMP)
endif()
add_definitions(-D__SSE2__)
add_definitions(-DNDEBUG)
add_definitions(-DMATH_STANDALONE)
include_directories(${INC})
add_library(blenlib STATIC ${SRC} ${SRC_HEADERS})
add_library(blenlib${CLIENT_SUFFIX} STATIC ${SRC} ${SRC_HEADERS})
install (TARGETS blenlib DESTINATION lib)
......@@ -34,6 +34,9 @@ set(SRC
kernel_camera.h
kernel_camera.cpp
kernel_util.h
kernel_util.cpp
../renderengine/src/cyclesphi_data.h
../api/client_api.h
)
......@@ -151,6 +154,10 @@ if(WITH_CLIENT_RENDERENGINE)
endif()
if(WITH_CLIENT_RENDERENGINE_EMULATE)
add_definitions(-DWITH_CLIENT_RENDERENGINE_EMULATE)
endif()
if(WITH_CLIENT_RENDERENGINE_VR)
add_definitions(-DWITH_CLIENT_RENDERENGINE_VR)
endif()
......@@ -168,6 +175,18 @@ if(WITH_CLIENT_CUDA_CPU_STAT2)
add_definitions(-DWITH_CLIENT_CUDA_CPU_STAT2)
endif()
if(WITH_CLIENT_CUDA_CPU_STAT2_LB)
add_definitions(-DWITH_CLIENT_CUDA_CPU_STAT2_LB)
endif()
if(WITH_CLIENT_CUDA_CPU_STAT3)
add_definitions(-DWITH_CLIENT_CUDA_CPU_STAT3)
endif()
if(WITH_CLIENT_CUDA_CPU_STAT_LB)
add_definitions(-DWITH_CLIENT_CUDA_CPU_STAT_LB)
endif()
if(WITH_IMAGE_TURBOJPEG)
add_definitions(-DWITH_TURBOJPEG)
endif()
......@@ -196,6 +215,18 @@ if(ENABLE_LOAD_BALANCEv2)
add_definitions(-DENABLE_LOAD_BALANCEv2)
endif()
if(ENABLE_LOAD_BALANCEv3)
add_definitions(-DENABLE_LOAD_BALANCEv3)
endif()
if(ENABLE_LOAD_BALANCE_EXIT)
add_definitions(-DENABLE_LOAD_BALANCE_EXIT)
endif()
if(ENABLE_LOAD_BALANCE_CUDA)
add_definitions(-DENABLE_LOAD_BALANCE_CUDA)
endif()
if(ENABLE_INC_SAMPLES)
add_definitions(-DENABLE_INC_SAMPLES)
endif()
......@@ -227,6 +258,10 @@ if(WITH_LOAD_BALANCING_COLUMN)
add_definitions(-DWITH_LOAD_BALANCING_COLUMN)
endif()
if(WITH_CLIENT_SHOW_STAT)
add_definitions(-DWITH_CLIENT_SHOW_STAT)
endif()
if(WITH_OPTIX_DENOISER)
add_definitions(-DWITH_OPTIX_DENOISER)
......@@ -283,6 +318,10 @@ if(WITH_CLIENT_CESNET)
add_definitions(-DWITH_CLIENT_CESNET)
endif()
if(WITH_RGBA_FORMAT)
add_definitions(-DWITH_RGBA_FORMAT)
endif()
include_directories(${INC})
add_library(cycles${CLIENT_SUFFIX} STATIC ${SRC})
......
......@@ -402,17 +402,17 @@ void client_save_buffer(client_kernel_struct &data)
//#endif
//}
//
void client_set_device(int device) {
void client_set_device(int device, int rank) {
#ifdef WITH_CLIENT_MPI
mpi_set_device(device);
mpi_set_device(device, rank);
#endif
#ifdef WITH_CLIENT_FILE
file_set_device(device);
file_set_device(device, rank);
#endif
#ifdef WITH_CLIENT_MPI_SOCKET
mpi_set_device(device);
mpi_set_device(device, rank);
#endif
}
......
......@@ -58,7 +58,7 @@ bool client_break_loop(client_kernel_struct &data);
//
//void client_path_to_cache(client_kernel_struct &data);
//
void client_set_device(int device);
void client_set_device(int device, int rank);
/////////////////////////////////////////////////////////////
......
This diff is collapsed.
......@@ -78,7 +78,7 @@ namespace cyclesphi {
double file_get_path_time_acc();
//void file_anim_step(client_kernel_struct &data, int s);
void file_set_device(int device);
void file_set_device(int device, int rank);
void file_save_bmp(int offset,
int stride,
......
This diff is collapsed.
......@@ -38,7 +38,7 @@ bool mpi_break_loop(client_kernel_struct &data);
int mpi_get_num_threads();
double mpi_get_path_time_acc();
void mpi_set_device(int device);
void mpi_set_device(int device, int rank);
/////////////////////////////////////////////
//void mpi_denoising_non_local_means(client_kernel_struct &data);
......
......@@ -409,7 +409,7 @@ void socket_path_trace_buffer(client_kernel_struct &data) {
data.client_path_trace_data.stride, data.client_path_trace_data.tile_h, data.client_path_trace_data.tile_w,
data.client_path_trace_data.tile_h2, data.client_path_trace_data.tile_w2,
data.client_path_trace_data.pass_stride,
data.client_path_trace_data.use_load_balancing, data.client_path_trace_data.tile_step, data.client_path_trace_data.compress,
//data.client_path_trace_data.use_load_balancing, data.client_path_trace_data.tile_step, data.client_path_trace_data.compress,
data.client_path_trace_data.buffer, data.client_path_trace_data.pixels
);
......@@ -419,7 +419,7 @@ void socket_path_trace_buffer(client_kernel_struct &data) {
data.client_path_trace_data.tile_h, data.client_path_trace_data.tile_w,
data.client_path_trace_data.tile_h2, data.client_path_trace_data.tile_w2,
data.client_path_trace_data.num_samples, data.client_path_trace_data.tile_step,
SIZE_UCHAR4, data.client_path_trace_data.compress != 0,
SIZE_UCHAR4, //data.client_path_trace_data.compress != 0,
(char*) mpiSocketData->ptr_map[data.client_path_trace_data.pixels],
(char*) &data, NULL, NULL, update_progress, update_break
);
......@@ -427,7 +427,7 @@ void socket_path_trace_buffer(client_kernel_struct &data) {
kernel::mpi_receive_path_trace(data.client_path_trace_data.offset, data.client_path_trace_data.stride,
data.client_path_trace_data.tile_x, data.client_path_trace_data.tile_y, data.client_path_trace_data.tile_h,
data.client_path_trace_data.tile_w, data.client_path_trace_data.num_samples,
SIZE_UCHAR4, data.client_path_trace_data.compress != 0,
SIZE_UCHAR4, //data.client_path_trace_data.compress != 0,
(char*) mpiSocketData->ptr_map[data.client_path_trace_data.pixels],
(char*) &data, NULL, NULL, tex_update, update_progress
);
......@@ -439,7 +439,7 @@ void socket_path_trace_buffer(client_kernel_struct &data) {
data.client_path_trace_data.stride, data.client_path_trace_data.tile_h, data.client_path_trace_data.tile_w,
data.client_path_trace_data.tile_h2, data.client_path_trace_data.tile_w2,
data.client_path_trace_data.pass_stride,
data.client_path_trace_data.use_load_balancing, data.client_path_trace_data.tile_step, data.client_path_trace_data.compress,
//data.client_path_trace_data.use_load_balancing, data.client_path_trace_data.tile_step, data.client_path_trace_data.compress,
data.client_path_trace_data.buffer, data.client_path_trace_data.pixels
);
......@@ -449,14 +449,14 @@ void socket_path_trace_buffer(client_kernel_struct &data) {
data.client_path_trace_data.tile_h, data.client_path_trace_data.tile_w,
data.client_path_trace_data.tile_h2, data.client_path_trace_data.tile_w2,
data.client_path_trace_data.num_samples, data.client_path_trace_data.tile_step,
data.client_path_trace_data.pass_stride * sizeof (float), false,
data.client_path_trace_data.pass_stride * sizeof (float), //false,
(char*) mpiSocketData->ptr_map[data.client_path_trace_data.buffer],
(char*) &data, NULL, NULL, update_progress, update_break
);
else
kernel::mpi_receive_path_trace(data.client_path_trace_data.offset, data.client_path_trace_data.stride,
data.client_path_trace_data.tile_x, data.client_path_trace_data.tile_y, data.client_path_trace_data.tile_h,
data.client_path_trace_data.tile_w, data.client_path_trace_data.num_samples, data.client_path_trace_data.pass_stride * sizeof (float), false,
data.client_path_trace_data.tile_w, data.client_path_trace_data.num_samples, data.client_path_trace_data.pass_stride * sizeof (float), //false,
(char*) mpiSocketData->ptr_map[data.client_path_trace_data.buffer],
(char*) &data, NULL, NULL, tex_update, update_progress
);
......
......@@ -58,10 +58,38 @@ void cuda_path_trace(int numDevice,
int stride,
int tile_h,
int tile_w,
int h,
int w,
char *sample_finished_omp,
char *reqFinished_omp,
int nprocs_cpu,
char *signal_value);
void cuda_path_trace_time(DEVICE_PTR kg_bin,
DEVICE_PTR buffer_bin,
int start_sample,
int end_sample,
int tile_x,
int tile_y,
int offset,
int stride,
int tile_h,
int tile_w,
int nprocs_cpu,
double *row_times);
void cuda_path_trace_lb(int numDevice,
DEVICE_PTR map_buffer_bin,
int start_sample,
int end_sample,
int tile_x,
int tile_y,
int offset,
int stride,
int tile_h,
int tile_w,
int nprocs_cpu);
#if defined(WITH_CLIENT_CUDA_CPU_STAT)
void cuda_path_trace_stat(int numDevice,
DEVICE_PTR kg_bin,
......@@ -112,18 +140,18 @@ void cuda_path_trace_stat(int numDevice,
// char *signal_value);
//#endif
void cuda_path_trace_time(DEVICE_PTR kg_bin,
DEVICE_PTR buffer_bin,
int start_sample,
int end_sample,
int tile_x,
int tile_y,
int offset,
int stride,
int tile_h,
int tile_w,
int nprocs_cpu,
float *row_times);
//void cuda_path_trace_time(DEVICE_PTR kg_bin,
// DEVICE_PTR buffer_bin,
// int start_sample,
// int end_sample,
// int tile_x,
// int tile_y,
// int offset,
// int stride,
// int tile_h,
// int tile_w,
// int nprocs_cpu,
// float *row_times);
/* Device memory */
DEVICE_PTR cuda_alloc_kg(int numDevice);
......@@ -156,7 +184,7 @@ DEVICE_PTR cuda_tex_info_alloc(int numDevice,
size_t data_depth,
bool unimem_flag);
void cuda_tex_info_copy(char *mem, DEVICE_PTR map_id, size_t memSize);
void cuda_tex_info_copy(const char *name, char *mem, DEVICE_PTR map_id, size_t memSize, bool check_uniname = true);
void cuda_const_copy(
int numDevice, DEVICE_PTR kg, const char *name, char *host, size_t size, bool save = true);
......@@ -207,8 +235,20 @@ void cuda_anim_step(int numDevice, DEVICE_PTR kg_bin, char *data_bin, int s);
// void cuda_socket_step(int numDevice, DEVICE_PTR kg_bin, char *data_bin, float *cameratoworld,
// float w, float h);
void cuda_socket_step(int numDevice, DEVICE_PTR kg_bin, char *data_bin, char *cd);
void cuda_set_bounces(int numDevice,
DEVICE_PTR kg_bin,
char *data_bin,
int min_bounce,
int max_bounce,
int max_diffuse_bounce,
int max_glossy_bounce,
int max_transmission_bounce,
int max_volume_bounce,
int transparent_min_bounce,
int transparent_max_bounce);
void cuda_set_device(int device);
void cuda_set_device(int device, int rank);
int cuda_get_cpu_threads();
......
This diff is collapsed.
This diff is collapsed.
......@@ -22,6 +22,7 @@
#include "client_api.h"
#include <map>
#include <cstdio>
#include <string>
// CCL_NAMESPACE_BEGIN
......@@ -59,6 +60,8 @@ extern OmpMpiData *omp_mpiData;
// int nprocs_cpu,
// char *signal_value);
void omp_enable_stat(bool enable);
void omp_path_trace(int numDevice,
DEVICE_PTR kg_bin,
DEVICE_PTR buffer_bin,
......@@ -71,6 +74,8 @@ void omp_path_trace(int numDevice,
int stride,
int tile_h,
int tile_w,
int h,
int w,
char *sample_finished_omp,
char *reqFinished_omp,
int nprocs_cpu,
......@@ -87,7 +92,7 @@ void omp_path_trace_time(DEVICE_PTR kg_bin,
int tile_h,
int tile_w,
int nprocs_cpu,
float *row_times);
double *line_times);