Commit 6e35b6ab authored by Ondrej Vysocky's avatar Ondrej Vysocky

ENH new version of hdeem, no hdeem_ipmi error #1 #18

parent d9239690
......@@ -63,7 +63,7 @@
# in the interactive job load necessary modules
module load intel/2016.1.150
module load mpirt/5.1.2.150
module load hdeem/2.2.2
module load hdeem/2.2.20ms
module load papi/5.4.1
# compile MERIC
......@@ -232,7 +232,7 @@ If you want to insert Meric regions into code you don't know, instead of Meric p
export LD_LIBRARY_PATH+=:$PATH_TO_MERIC/lib:/usr/local/lib
module load intel/2016.1.150
module load mpirt/5.1.2.150
module load hdeem/2.2.2
module load hdeem/2.2.20ms
module load papi/5.4.1
make
......
### XX.XX.XXXX #################################################################
- update from hdeem/2.2.2 to hdeem/2.2.20ms
- now requires c++14 standart
- it is now possible to use hdeem, rapl and counters at one time
- MERIC works in modes - export MERIC_MODE
......@@ -21,3 +22,67 @@
- hdeem measurement counters are moved to separated folder, that has the same name as MERIC_OUTPUT_DIR with suffix "Counters"
DURATION: 6.22941
DURATION hdeem: 6.22982
Real blade freq: 1808.4
Real vr freq: 169.186
hdeemd_ipmi: Unknown Command 0x00
hdeemd_ipmi: Unknown Command 0x00
HDEEM ERROR: function get_global return status 110
When the power consumption values are requested, HDEMM sends a GPIO
signal to the FPGA component which starts reading the sensors. A sensor that
records the overall blade power consumption is read by the FPGA at a
frequency of 1000 values per second.
Six Voltage Regulator sensors corresponding to the VR-CPU0 and VR-CPU1
processor sockets and to the VR-DIMMAB, VR-DIMMCD, VR-DIMMEF and VR-
DIMMGH memory channels for each socket, are read at a frequency of 100
values per second.
typedef struct hdeem_status {
unsigned char status;
struct timespec start_time;
struct timespec stop_time;
unsigned long long int total_blade_values;
unsigned long long int total_vr_values;
int blade_frequency;
int vr_frequency;
unsigned long long int pending_blade_values;
unsigned long long int pending_vr_values;
} hdeem_status_t;
* blade_frequency
Theoretical BMC blade sensor frequency (1000 Hz) may differ from the ratio total_blade_values/collect_duration
* vr_frequency
Theoretical BMC vr sensor frequency (1000 Hz) may differ from the ratio total_vr_values/collect_duration
printf("Real blade freq: %f\n", readStatus.total_blade_values /(duration.tv_sec + duration.tv_nsec/1000000000.));
printf("Real vr freq: %f\n", readStatus.total_vr_values /(duration.tv_sec + duration.tv_nsec/1000000000.));
printf("Freq blade values: %d\n", readStatus.blade_frequency);
printf("Freq vr values: %d\n", readStatus.vr_frequency);
H INIT
H START
REGION B
H STOP
H CHECK STATUS
DURATION: 2.01931
DURATION hdeem: 2.01113
Real blade freq: 1000.43
Real vr freq: 99.9438
H GET GLOBAL
hdeemd_ipmi: Unknown Command 0x00
hdeemd_ipmi: Unknown Command 0x00
H DATA FREE
H CLOSE
......@@ -21,7 +21,7 @@ extern "C"
#include <string.h>
#endif
#define HDEEM_VERSION "2.2.2"
#define HDEEM_VERSION "2.2.20ms"
#define StartedBy(status) ((status >> 1 & 1) ? "IPMI" : "GPIO")
#define IsStartedByIpmi(status) (status >> 1 & 1)
......@@ -139,7 +139,6 @@ typedef struct hdeem_status{
#include <freeipmi/freeipmi.h>
#include <freeipmi/api/ipmi-api.h>
/* Describe how to connect to the BMC */
typedef struct hdeem_bmc_data {
char * host;
......@@ -157,6 +156,8 @@ typedef struct hdeem_bmc_data {
int vr_frequency;
struct timespec skew_blade;
struct timespec skew_vr;
int lib2d[2], d2lib[2], de2lib[2];
int hdeemd_pid;
} hdeem_bmc_data_t;
/* One snapshot of the blade power sensor(s) */
......
......@@ -4,6 +4,7 @@ CC=icpc
MPICC=mpiicpc
HDEEM=-L../hdeem -lhdeem
FREEIPMI=-lfreeipmi
X86ADAPT=-L/usr/local/lib -lx86_adapt
CPUFREQ=-lcpufreq
PAPI=-L/sw/taurus/libraries/papi/5.4.1/lib/ -lpapi
......@@ -14,17 +15,17 @@ all: mpiTest basicTest blasTest
######
mpiTest: test_mpi.cpp
$(MPICC) -O0 -g test_mpi.cpp -L../lib -lmericmpi -lrt -qopenmp $(HDEEM) $(X86ADAPT) $(CPUFREQ) $(PAPI) -o test_mpi
$(MPICC) -O0 -g test_mpi.cpp -L../lib -lmericmpi -lrt -qopenmp $(HDEEM) $(FREEIPMI) $(X86ADAPT) $(CPUFREQ) $(PAPI) -o test_mpi
basicTest: test.cpp
$(CC) -O0 -g test.cpp -L../lib -lmeric -qopenmp $(HDEEM) $(X86ADAPT) $(CPUFREQ) $(PAPI) -lrt -o test
$(CC) -O0 -g test.cpp -L../lib -lmeric -qopenmp $(HDEEM) $(FREEIPMI) $(X86ADAPT) $(CPUFREQ) $(PAPI) -lrt -o test
blasTest: blas_test.c
$(info )
$(info WARNING: Makefile loads mkl module that is necessary for BLAS test )
$(info )
module load mkl;
$(CC) -O0 -g blas_test.c -L../lib -I../include -lmeric -mkl -qopenmp $(HDEEM) $(X86ADAPT) $(CPUFREQ) $(PAPI) -lrt -o blasTest
$(CC) -O0 -g blas_test.c -L../lib -I../include -lmeric -mkl -qopenmp $(HDEEM) $(FREEIPMI) $(X86ADAPT) $(CPUFREQ) $(PAPI) -lrt -o blasTest
######
......
......@@ -14,7 +14,7 @@ modules()
module purge
module load intel/2016.1.150
module load mpirt/5.1.2.150
module load hdeem/2.2.2
module load hdeem/2.2.20ms
module load papi/5.4.1
}
......@@ -57,11 +57,11 @@ modules
### ENVIRONMENT VARIABLES ###
export MERIC_FREQUENCY=25
export MERIC_UNCORE_FREQUENCY=25
export MERIC_NUM_THREADS=12
#export MERIC_NUM_THREADS=12
export MERIC_COUNTERS=perfevent #perfevent papi
export MERIC_MODE=2 #enum {hdeem=0, rapl=1, both=2, run=3}
export MERIC_AGGREGATE=0
export MERIC_CONTINUAL=0
export MERIC_CONTINUAL=1
export MERIC_DETAILED=1
# export MERIC_DEBUG=1
......
#!/bin/bash
#SBATCH -N 2
#SBATCH -N 1
#SBATCH --exclusive
#SBATCH -p haswell
#SBATCH -A p_readex #to account your compute time on the readex project
#SBATCH --mem 62000 #to the memory available on the node. Some nodes have more memory. Please see the documentation.
##SBATCH --mem=126000
#SBATCH -t 0-1:00
#SBATCH -t 0-0:10
#SBATCH --comment="cpufreqchown"
#SBATCH --cpu-freq=Low
##SBATCH --mail-type=ALL
......@@ -27,7 +27,7 @@
module purge
module load intel/2016.1.150
module load mpirt/5.1.2.150
module load hdeem/2.2.2
module load hdeem/2.2.20ms
module load papi/5.4.1
# COMPILE
......@@ -35,28 +35,30 @@ make
# SET MERIC OUTPUT
#export MERIC_COUNTERS=perfevent
#export MERIC_MODE=2
export MERIC_MODE=2
export MERIC_DEBUG=1
# export MERIC_AGGREGATE=0
# export MERIC_DETAILED=0
# export MERIC_CONTINUAL=0
# FOR EACH SETTINGS
for proc in 48
for proc in 24
do
for thread in 1 #{1..24..1}
do
for cpu_freq in 12 # {12..25..1}
for cpu_freq in 12 14 # {12..25..1}
do
for uncore_freq in 28 #{12..30..1}
for uncore_freq in 28 30 #{12..30..1}
do
# OUTPUT NAMES
export MERIC_OUTPUT_FILENAME=$thread"_"$cpu_freq"_"$uncore_freq
export MERIC_OUTPUT_DIR="TESTMPI"
# TEST SETTINGS
export MERIC_NUM_THREADS=1
export MERIC_NUM_THREADS=$thread
export MERIC_FREQUENCY=$cpu_freq
export MERIC_UNCORE_FREQUENCY=$uncore_freq
#export MERIC_COUNTERS=perfevent
export MERIC_AGGREGATE=0
# RUN THE TEST
echo
echo TEST $MERIC_OUTPUT_FILENAME
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment