LazyLearning.cpp

/**
 * DESCRIPTION OF THE FILE
 *
 * @author Michal Kravčenko
 * @date 30.7.18 -
 */

#include <random.hpp>
#include <limits>

#include "message.h"

#include "LazyLearning.h"

namespace lib4neuro {
	LazyLearning::LazyLearning(
		LearningMethod &inner_trainer,
		double tol
	){
		this->inner_method = &inner_trainer;
		this->tolerance = tol;
	}

	LazyLearning::~LazyLearning( ) {}

	void LazyLearning::optimize(
		lib4neuro::ErrorFunction& ef,
		std::ofstream* ofs
	) {

        std::vector<size_t> subset_indices;
        std::vector<bool> active_subset;
        std::vector<float> entry_errors;
        while( true ){

            ef.divide_data_worst_subset( subset_indices, active_subset, entry_errors );
            /* errors of the active subset */
            float subset_error_min = std::numeric_limits<float>::max();
            float subset_error_max = 0.0;
            float subset_error_total = 0.0;
            size_t new_subset_index = subset_indices[subset_indices.size() - 1];
            float new_subset_entry_error = entry_errors[new_subset_index];
            float new_subset_entry_error_min = 0.0;
            float new_subset_entry_error_max = 0.0;


            /* errors of the elements not considered */
            float shelved_error_min = subset_error_min;
            float shelved_error_max = 0.0;
            float shelved_error_total = 0.0;

            /* processing of the errors */
            for( size_t i = 0; i < entry_errors.size(); ++i){
                if( active_subset[ i ] ){
                    if( i == new_subset_index ){

                    }
                    else{
                        subset_error_total += entry_errors[ i ];
                        subset_error_max = std::max(subset_error_max, entry_errors[ i ] );
                        subset_error_min = std::min(subset_error_min, entry_errors[ i ] );
                    }
                }
                else{
                    shelved_error_total += entry_errors[ i ];
                    shelved_error_max = std::max(shelved_error_max, entry_errors[ i ] );
                    shelved_error_min = std::min(shelved_error_min, entry_errors[ i ] );
                }
            }
            int nactive_set = subset_indices.size();
            int nshelved_set = active_subset.size() - subset_indices.size();

            MPI_Allreduce( MPI_IN_PLACE, &nactive_set, 1, MPI_INT, MPI_SUM, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &subset_error_total, 1, MPI_FLOAT, MPI_SUM, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &subset_error_min, 1, MPI_FLOAT, MPI_MIN, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &subset_error_max, 1, MPI_FLOAT, MPI_MAX, lib4neuro::mpi_active_comm );
            MPI_Allreduce( &new_subset_entry_error, &new_subset_entry_error_min, 1, MPI_FLOAT, MPI_MIN, lib4neuro::mpi_active_comm );
            MPI_Allreduce( &new_subset_entry_error, &new_subset_entry_error_max, 1, MPI_FLOAT, MPI_MAX, lib4neuro::mpi_active_comm );

            MPI_Allreduce( MPI_IN_PLACE, &nshelved_set, 1, MPI_INT, MPI_SUM, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &shelved_error_total, 1, MPI_FLOAT, MPI_SUM, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &shelved_error_min, 1, MPI_FLOAT, MPI_MIN, lib4neuro::mpi_active_comm );
            MPI_Allreduce( MPI_IN_PLACE, &shelved_error_max, 1, MPI_FLOAT, MPI_MAX, lib4neuro::mpi_active_comm );

            if( subset_indices.size() > 1 ){
                COUT_INFO( "[" << nactive_set << "] subset error: " << subset_error_total << ", in range: " << subset_error_min << " - " << subset_error_max << ", new entry errors: " << new_subset_entry_error_min << " - " << new_subset_entry_error_max );
            }
            else{
                COUT_INFO( "[" << nactive_set << "] new entry error: " << new_subset_entry_error_min << " - " << new_subset_entry_error_max );
            }
            COUT_INFO( "[" << nshelved_set << "] remaining error: " << shelved_error_total << ", in range: " << shelved_error_min << " - " << shelved_error_max << std::endl );

            if( shelved_error_max < this->tolerance && subset_error_max < this->tolerance && new_subset_entry_error < this->tolerance ){
                break;
            }

            this->inner_method->optimize( ef, ofs );

            double sub_error_after = ef.eval( );
            while( sub_error_after > this->tolerance ){
                this->inner_method->optimize( ef, ofs );
                sub_error_after = ef.eval( );
            }
            ef.return_full_data_set_for_training( );
            COUT_INFO( "------------------------" );
        }
	}

}//end of namespace lib4neuro