Skip to content
Snippets Groups Projects
Commit ae66d7a4 authored by root's avatar root
Browse files

Added goal gap threshold for slaves

parent 65198ca1
Branches
No related tags found
No related merge requests found
...@@ -46,7 +46,7 @@ int m_rank = -1; ...@@ -46,7 +46,7 @@ int m_rank = -1;
int m_numProc = -1; int m_numProc = -1;
int m_master = -1; int m_master = -1;
int m_threadsNumberTotal = 1; int m_threadsNumberTotal = 1;
int m_number_of_active_nodes = 1; //1, 2, 4, 8, 16.
int m_workDone = 0; int m_workDone = 0;
public: public:
......
...@@ -234,7 +234,6 @@ RTLIB_ExitCode_t MpiUncertainty::onRun() { ...@@ -234,7 +234,6 @@ RTLIB_ExitCode_t MpiUncertainty::onRun() {
{ {
// Set number of OMP threads to uncertainty, generate chunk of MC samples and execute simulations // Set number of OMP threads to uncertainty, generate chunk of MC samples and execute simulations
m_uncertainty->RunMC(m_threadsNumber); m_uncertainty->RunMC(m_threadsNumber);
} }
if(m_rank == m_master) if(m_rank == m_master)
...@@ -315,6 +314,8 @@ RTLIB_ExitCode_t MpiUncertainty::onMonitor() { ...@@ -315,6 +314,8 @@ RTLIB_ExitCode_t MpiUncertainty::onMonitor() {
100.0 * (float)m_jobsDone / (float)m_jobsNumber[m_qosMode], 100.0 * (float)m_jobsDone / (float)m_jobsNumber[m_qosMode],
Cycles(), m_threadsNumber, GetCPS() * m_threadsNumber); Cycles(), m_threadsNumber, GetCPS() * m_threadsNumber);
int goal_gap = 0;
std::vector<int> accelerate_flags(m_numProc, 0);
if (GetCPS() == 0.0) if (GetCPS() == 0.0)
return RTLIB_OK; return RTLIB_OK;
...@@ -330,56 +331,64 @@ RTLIB_ExitCode_t MpiUncertainty::onMonitor() { ...@@ -330,56 +331,64 @@ RTLIB_ExitCode_t MpiUncertainty::onMonitor() {
// defined by the QoS mode, the higher AWM is requested // defined by the QoS mode, the higher AWM is requested
// int timeFrameMs = m_timeFrame[m_qosMode] * 1000; // int timeFrameMs = m_timeFrame[m_qosMode] * 1000;
// int goalGap = (1 - (timeFrameMs / estimatedTotalTime)) * 100; // int goalGap = (1 - (timeFrameMs / estimatedTotalTime)) * 100;
int remaining_jobs = m_jobsNumber[m_qosMode] - m_jobsDone;
float remaining_time = int remaining_jobs = m_jobsNumber[m_qosMode] - m_jobsDone;
std::max(1.0, m_timeFrame[m_qosMode] - (m_timer.getElapsedTimeMs() / 1000.0)); float jobs_per_second = (float) m_threadsNumber * GetCPS();
float jobs_per_second = (float)m_threadsNumber * GetCPS(); float remaining_time = m_timeFrame[m_qosMode] - (m_timer.getElapsedTimeMs() / 1000.0);
float ideal_jobs_per_second = (float)remaining_jobs / remaining_time; float ideal_jobs_per_second = (float) remaining_jobs / remaining_time;
float goal_gap = 100.0 * (jobs_per_second - ideal_jobs_per_second) / ideal_jobs_per_second;
logger->Warn("Time spend: %f", m_timer.getElapsedTimeMs() / 1000.0); if(remaining_time <= 0.0)
{
goal_gap = -1000; // Really low value to motivate BBQUE to give the highest amount of resources possible
}
else
{
goal_gap = 100 * (jobs_per_second - ideal_jobs_per_second) / ideal_jobs_per_second;
}
logger->Warn("Time spend: %f s", m_timer.getElapsedTimeMs() / 1000.0);
logger->Warn("Remaining jobs: %d", remaining_jobs); logger->Warn("Remaining jobs: %d", remaining_jobs);
logger->Warn("Remaining time: %f", remaining_time); logger->Warn("Remaining time: %f", remaining_time);
logger->Warn("Current cps: %f", jobs_per_second); logger->Warn("Current cps: %f", jobs_per_second);
logger->Warn("Ideal cps: %f", ideal_jobs_per_second); logger->Warn("Ideal cps: %f", ideal_jobs_per_second);
logger->Warn("Goal Gap for Cycle %d is %.2f", Cycles(), goal_gap); logger->Warn("Goal Gap for Cycle %d is %d", Cycles(), goal_gap);
SetGoalGap(goal_gap);
MPI_Bcast(&goal_gap, 1, MPI_INT, m_master, MPI_COMM_WORLD);//tonipat@201610101 send Goal gap to the slaves goal_gap/(#of slaves) //Option A ==> Goal Gap is sent to all the slaves
// MPI_Bcast(&goal_gap, 1, MPI_INT, m_master, MPI_COMM_WORLD);//tonipat@201610101 send Goal gap to the slaves goal_gap/(#of slaves)
//std::cout << "FRAME " << m_timeFrame[m_qosMode] << std::endl; //Option B
// if(estimatedTotalTime > timeFrameMs)
//{ int goal_gap_threshold = 20; //if it is higher or lower of a threshold, then we activate more nodes1->2->8->16.
//int goalGap = (1 - (estimatedTotalTime / timeRequired - 1)) * 100;
// logger->Warn(RANK("MpiUncertainty::onMonitor() : Requesting higher AWM, gap: %d, estimation: %f, limit: %d"),
// goalGap, estimatedTotalTime/1000.0, m_timeFrame[m_qosMode]); if(goal_gap >= goal_gap_threshold)
// SetGoalGap(goalGap); --m_number_of_active_nodes;
// } if(goal_gap <= -goal_gap_threshold)
// else if (estimatedTotalTime < 0.85 * timeFrameMs) ++m_number_of_active_nodes;
// {
// logger->Warn(RANK("MpiUncertainty::onMonitor() : Requesting lower AWM, gap: %d, estimation: %f, limit: %f"),
// goalGap, estimatedTotalTime/1000.0, m_timeFrame[m_qosMode]); if(m_number_of_active_nodes <= 0) m_number_of_active_nodes = 0;
// //SetGoalGap(0); if(m_number_of_active_nodes > m_numProc) m_number_of_active_nodes = m_numProc;
// } logger->Info(RANK("MpiUncertainty::onMonitor() : Active slaves: %d"), m_number_of_active_nodes);
//else
//{ if(m_number_of_active_nodes)
// logger->Info(RANK("MpiUncertainty::onMonitor() : Current AWM is OK, gap: %d, estimation: %f, limit: %d"), goal_gap = goal_gap / m_number_of_active_nodes;
// goalGap, estimatedTotalTime/1000.0, m_timeFrame[m_qosMode]);
//SetGoalGap(0); SetGoalGap(goal_gap);
// } MPI_Bcast(&goal_gap, 1, MPI_INT, m_master, MPI_COMM_WORLD);
// Exploit less threads if less jobs remain std::fill_n(accelerate_flags.begin(), m_number_of_active_nodes, 1); // Master rank should have accelerate always eq 1
// if ( m_jobsNumber[m_qosMode] - m_jobsDone < m_threadsNumber ) MPI_Scatter(accelerate_flags.data(), 1, MPI_INT, &m_accelerate, 1, MPI_INT, m_master, MPI_COMM_WORLD);
// m_threadsNumber = m_jobsNumber[m_qosMode] - m_jobsDone;
} }
else else
{ {
// TODO always ask for higher AWM (accelerator mode) ? // Receive broadcasted goal gap
MPI_Bcast(&goal_gap, 1, MPI_INT, m_master, MPI_COMM_WORLD);
// Receive goal gap from master rank SetGoalGap(goal_gap);
//int goalGap = 0; // Receive accelerate flag
// MPI_Bcast(&goal_gap, 1, MPI_INT, m_master, MPI_COMM_WORLD); MPI_Scatter(accelerate_flags.data(), 1, MPI_INT, &m_accelerate, 1, MPI_INT, m_master, MPI_COMM_WORLD);
//logger->Info(RANK("MpiUncertainty::onMonitor() : Received goal gap %d"),goal_gap); logger->Info(RANK("MpiUncertainty::onMonitor() : Received accelerate flag %d"), m_accelerate);
//SetGoalGap(goal_gap);
} }
return RTLIB_OK; return RTLIB_OK;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment