Something went wrong on our end
Makefile 4.95 KiB
# Mandelbrot benchmark makefile
# Usage: make [icc] target [target]
# GCC/MPI config
CC = gcc
MPICC = mpicc
CF512 = -mavx512f
CFSSE = -msse
CFAVX = -mavx
CFAVX2 = -mavx2
CFOMP = -fopenmp
CFNAT = -march=native
LIBS = -lm
# CUDA config
NVCC = nvcc
ifdef NBLOCKS
DFLAGS = -DNBLOCKS=$(NBLOCKS) -DNTHREADS=$(NTHREADS)
endif
# HIP config
HIPCC = hipcc -w
# MinGW config
MINGW32 = i686-w64-mingw32-gcc
MINGW64 = x86_64-w64-mingw32-gcc
# Octave config
OCTAVE = octave
# Targets
OMP = cpuid.x mandelbrot-real-sse-omp32.x mandelbrot-real-sse-omp.x mandelbrot-real-avx-omp.x \
mandelbrot-real-fma-omp.x mandelbrot-real-fma512-omp.x
OMPWIN = cpuid.exe mandelbrot-real-sse-omp32.exe mandelbrot-real-sse-omp.exe mandelbrot-real-avx-omp.exe \
mandelbrot-real-fma-omp.exe mandelbrot-real-fma512-omp.exe
MPI = mandelbrot-real-sse-mpi-dump.x mandelbrot-real-avx-mpi-dump.x mandelbrot-real-fma-mpi-mem4-dump.x \
mandelbrot-real-fma-mpi-dump-mic.x mandelbrot-ridge-real-fma-zmm-mpi.x
CUDA = mandelbrot-real-fma-ptx-f16-dump.x mandelbrot-real-fma-ptx-f16x2-dump.x \
mandelbrot-real-fma-ptx-f32-dump.x mandelbrot-real-fma-ptx-dump.x \
stream_gpu_flops.x
RDNA = mandelbrot-real-fma-rdna-f32-dump.x mandelbrot-real-fma-rdna-f64-dump.x accumulator-mfma-cdna-f32.x
WMMA = mandelbrot-real-wmma-ptx-f16-dump.x mandelbrot-real-wmma-ptx-f64-dump.x
ARM = mandelbrot-real-fma-sve-f64-omp.x jansik-real-fmla-neon-f64-omp.x mandelbrot-real-neon-f64-omp.x
POWER = mandelbrot-real-fma-power-f64-omp.x accumulator-ger-power-f64-omp.x
ALL = $(OMP) $(OMPWIN) $(MPI) $(CUDA) $(WMMA) $(RDNA) $(ARM) $(POWER) mbdata.o mbsdata.o mbdata mbsdata
# General rules
.SUFFIXES:
.PHONY: usage icc clean
usage:
@echo Usage: make [icc] target [target [target] ... ]
@echo
@echo Select targets to build.
@echo Main targets: omp ompwin mpi cudafma cudawmma rdna arm power
@echo Build targets:
@echo $(ALL)
all: usage
# Use this target to set up icc compiler and intel mpi
icc:
$(eval CC = icc)
$(eval MPICC = mpiicc)
$(eval CFAVX =)
$(eval CFAVX2 =)
$(eval CF512 =)
$(eval CFSSE =)
$(eval CFOMP = -qopenmp)
$(eval LIBS =)
@echo Using icc...
omp: $(OMP)
ompwin: $(OMPWIN)
mpi: $(MPI)
cudafma: $(CUDA)
cudawmma: $(WMMA)
rdna: $(RDNA)
arm: $(ARM)
power: $(POWER)
# Build rules
cpuid.x: cpuid.c
$(CC) $< -o $@
cpuid.exe: cpuid.c
$(MINGW32) -static -m32 $< -o $@
%.x: %.c
@CC=$(CC); \
if [ "$(findstring omp,$<)" = omp ]; then CFOMP=$(CFOMP); fi; \
if [ "$(findstring mpi,$<)" = mpi ]; then CC=$(MPICC); fi; \
if [ "$(findstring sse,$<)" = sse ]; then CFARCH=$(CFSSE); fi; \
if [ "$(findstring avx,$<)" = avx ]; then CFARCH=$(CFAVX); fi; \
if [ "$(findstring fma,$<)" = fma ]; then CFARCH=$(CFAVX2);fi; \
if [ "$(findstring 512,$<)" = 512 ]; then CFARCH=$(CF512); fi; \
if [ "$(findstring mic,$<)" = mic ]; then CFARCH=$(CF512); fi; \
if [ "$(findstring zmm,$<)" = zmm ]; then CFARCH=$(CF512); fi; \
if [ "$(findstring sve,$<)" = sve ]; then CFARCH=$(CFNAT); fi; \
if [ "$(findstring neon,$<)" = neon ]; then CFARCH=; fi; \
if [ "$(findstring power,$<)" = power ]; then CFARCH=; fi; \
EXEC="$$CC $$CFARCH $$CFOMP $< -o $@ $(LIBS)" ; echo $$EXEC ; \
eval $$EXEC
mandelbrot%.exe : mandelbrot%.c
@CC=$(MINGW64); \
if [ "$(findstring 32,$<)" = 32 ]; then CC=$(MINGW32); fi; \
if [ "$(findstring sse,$<)" = sse ]; then CFARCH=$(CFSSE); fi; \
if [ "$(findstring avx,$<)" = avx ]; then CFARCH=$(CFAVX); fi; \
if [ "$(findstring fma,$<)" = fma ]; then CFARCH=$(CFAVX2); fi; \
if [ "$(findstring 512,$<)" = 512 ]; then CFARCH=$(CF512); fi; \
EXEC="$$CC -static $$CFARCH $(CFOMP) $< -o $@" ; echo $$EXEC ; \
eval $$EXEC
mandelbrot-real-fma-ptx%.x : NVARCH=sm_53
mandelbrot-real-fma-ptx%.x : mandelbrot-real-fma-ptx%.cu
$(NVCC) --gpu-architecture $(NVARCH) $(DFLAGS) $< -o $@
mandelbrot-real-wmma-ptx-f16-dump.x: NVARCH=sm_70
mandelbrot-real-wmma-ptx-f16-dump.x: mandelbrot-real-wmma-ptx-f16-dump.cu mbdata.o
$(NVCC) --gpu-architecture $(NVARCH) $(DFLAGS) $^ -o $@
mandelbrot-real-wmma-ptx-f64-dump.x: NVARCH=sm_80
mandelbrot-real-wmma-ptx-f64-dump.x: mandelbrot-real-wmma-ptx-f64-dump.cu mbsdata.o
$(NVCC) --gpu-architecture $(NVARCH) $(DFLAGS) $^ -o $@
stream_gpu_flops.x: NVARCH=sm_53
stream_gpu_flops.x: stream_gpu_flops.cu
$(NVCC) --gpu-architecture $(NVARCH) $< -o $@
#rdna, cdna rocm hip targets
$(wildcard *dna)%.x: %.cpp
$(HIPCC) $< -o $@
mbdata: NBLOCKS=1296
mbdata:
$(OCTAVE) -q -W --eval 'fd=fopen("mbdata","wb"); for i=1:$(NBLOCKS); fwrite(fd,mbmatrix(16),"float"); end; fclose(fd);'
mbsdata: NBLOCKS=216
mbsdata: NTHREADS=512
mbsdata:
$(OCTAVE) -q -W --eval 'fd=fopen("mbsdata","wb"); for i=1:($(NBLOCKS)*$(NTHREADS)/32); fwrite(fd,mbsmatrix(8),"double"); end; fclose(fd);'
mb%.o: mb%
ld -r -b binary -o $@ $<
# Clean rule
clean:
rm -f $(ALL)
# EOF