LazyLearning.cpp

/**
 * DESCRIPTION OF THE FILE
 *
 * @author Michal Kravčenko
 * @date 30.7.18 -
 */

#include <random.hpp>
#include <limits>
#include <mpi.h>

#include "message.h"

#include "LazyLearning.h"

namespace lib4neuro {
	LazyLearning::LazyLearning(
		LearningMethod &inner_trainer,
		double tol
	){
		this->inner_method = &inner_trainer;
		this->tolerance = tol;
	}

	LazyLearning::~LazyLearning( ) {}

	void LazyLearning::optimize(
		lib4neuro::ErrorFunction& ef,
		std::ofstream* ofs
	) {

        std::vector<size_t> subset_indices;
        std::vector<int> active_subset;
        std::vector<float> entry_errors;

        active_subset.resize(ef.get_n_data_set());
        std::fill(active_subset.begin(), active_subset.end(), 0);

        entry_errors.resize(ef.get_n_data_set());

		size_t expansion_len = 10;

		float subset_error_min;
		float subset_error_max;
		float subset_error_total;
		float new_subset_error_total;
		float new_subset_entry_error_min;
		float new_subset_entry_error_max;


		/* errors of the elements not considered */
		float shelved_error_min = subset_error_min;
		float shelved_error_max = 0.0;
		float shelved_error_total = 0.0;
        while( true ){

            ef.divide_data_worst_subset( subset_indices, active_subset, entry_errors, expansion_len, this->tolerance * 1e-6 );
            /* errors of the active subset */
			shelved_error_min = std::numeric_limits<float>::max();
			shelved_error_max = 0.0;
			shelved_error_total = 0.0;

			new_subset_entry_error_min = std::numeric_limits<float>::max();
			new_subset_entry_error_max = 0;
			new_subset_error_total = 0.0;

			subset_error_min = std::numeric_limits<float>::max();
			subset_error_max = 0.0;
			subset_error_total = 0.0;

            /* processing of the errors */
            for( size_t i = 0; i < entry_errors.size(); ++i){
                if( active_subset[ i ] == 2 ){
                    /* previous entries */
					subset_error_total += entry_errors[ i ];
					subset_error_max = std::max(subset_error_max, entry_errors[ i ] );
					subset_error_min = std::min(subset_error_min, entry_errors[ i ] );
                }
                else if(active_subset[ i ] == 1){
                    /* new entries */
					new_subset_error_total += entry_errors[ i ];
					new_subset_entry_error_max = std::max(new_subset_entry_error_max, entry_errors[ i ] );
					new_subset_entry_error_min = std::min(new_subset_entry_error_min, entry_errors[ i ] );
                }
                else if( active_subset[ i ] == 0 ){
                    /* not learned entries */
                    shelved_error_total += entry_errors[ i ];
                    shelved_error_max = std::max(shelved_error_max, entry_errors[ i ] );
                    shelved_error_min = std::min(shelved_error_min, entry_errors[ i ] );
                }
            }

            int nactive_set = subset_indices.size();
            int nshelved_set = active_subset.size() - subset_indices.size();

            MPI_Allreduce( MPI_IN_PLACE, &nactive_set, 1, MPI_INT, MPI_SUM, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &nshelved_set, 1, MPI_INT, MPI_SUM, lib4neuro::mpi_active_comm );

            MPI_Allreduce( MPI_IN_PLACE, &subset_error_total, 1, MPI_FLOAT, MPI_SUM, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &subset_error_min, 1, MPI_FLOAT, MPI_MIN, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &subset_error_max, 1, MPI_FLOAT, MPI_MAX, lib4neuro::mpi_active_comm );

            MPI_Allreduce( MPI_IN_PLACE, &new_subset_error_total, 1, MPI_FLOAT, MPI_SUM, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &new_subset_entry_error_min, 1, MPI_FLOAT, MPI_MIN, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &new_subset_entry_error_max, 1, MPI_FLOAT, MPI_MAX, lib4neuro::mpi_active_comm );

            MPI_Allreduce( MPI_IN_PLACE, &shelved_error_total, 1, MPI_FLOAT, MPI_SUM, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &shelved_error_min, 1, MPI_FLOAT, MPI_MIN, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &shelved_error_max, 1, MPI_FLOAT, MPI_MAX, lib4neuro::mpi_active_comm );

            if( subset_indices.size() > 1 ){
                COUT_INFO( "[" << nactive_set << "] subset error: " <<
                          subset_error_total << ", in range: " <<
                          subset_error_min << " - " << subset_error_max <<
                          ", new subset error: " << new_subset_error_total <<
                          ", in range: " << new_subset_entry_error_min <<
                          " - " << new_subset_entry_error_max );
            }
            else{
                COUT_INFO( "[" << nactive_set << "]  new subset error: " <<
                          new_subset_error_total <<
                          ", in range: " << new_subset_entry_error_min <<
                          " - " << new_subset_entry_error_max );
            }
            COUT_INFO( "[" << nshelved_set << "] remaining error: " << shelved_error_total << ", in range: " << shelved_error_min << " - " << shelved_error_max << std::endl );

            if( shelved_error_max < this->tolerance && subset_error_max < this->tolerance ){
                break;
            }

            this->inner_method->optimize( ef, ofs );

            double sub_error_after = ef.eval( );

            while( sub_error_after > this->tolerance ){
                this->inner_method->optimize( ef, ofs );
                sub_error_after = ef.eval( );
            }
            ef.return_full_data_set_for_training( );
            COUT_INFO( "------------------------" );

        }
	}

}//end of namespace lib4neuro