diff --git a/easyblocks/n/numpy.py b/easyblocks/n/numpy.py new file mode 100644 index 0000000000000000000000000000000000000000..df2a9df162a4fb74ba9eb0aa8074b7ff76a265d6 --- /dev/null +++ b/easyblocks/n/numpy.py @@ -0,0 +1,372 @@ +## +# Copyright 2009-2021 Ghent University +# +# This file is part of EasyBuild, +# originally created by the HPC team of Ghent University (http://ugent.be/hpc/en), +# with support of Ghent University (http://ugent.be/hpc), +# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be), +# Flemish Research Foundation (FWO) (http://www.fwo.be/en) +# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en). +# +# https://github.com/easybuilders/easybuild +# +# EasyBuild is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation v2. +# +# EasyBuild is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with EasyBuild. If not, see <http://www.gnu.org/licenses/>. +## +""" +EasyBuild support for building and installing numpy, implemented as an easyblock + +@author: Stijn De Weirdt (Ghent University) +@author: Dries Verdegem (Ghent University) +@author: Kenneth Hoste (Ghent University) +@author: Pieter De Baets (Ghent University) +@author: Jens Timmerman (Ghent University) +""" +import glob +import os +import re +import tempfile + +import easybuild.tools.environment as env +import easybuild.tools.toolchain as toolchain +from easybuild.easyblocks.generic.fortranpythonpackage import FortranPythonPackage +from easybuild.easyblocks.generic.pythonpackage import det_pylibdir +from easybuild.framework.easyconfig import CUSTOM +from easybuild.tools.build_log import EasyBuildError +from easybuild.tools.filetools import change_dir, mkdir, read_file, remove_dir +from easybuild.tools.modules import get_software_root +from easybuild.tools.run import run_cmd +from distutils.version import LooseVersion + + +class EB_numpy(FortranPythonPackage): + """Support for installing the numpy Python package as part of a Python installation.""" + + @staticmethod + def extra_options(): + """Easyconfig parameters specific to numpy.""" + extra_vars = ({ + 'blas_test_time_limit': [500, "Time limit (in ms) for 1000x1000 matrix dot product BLAS test", CUSTOM], + 'ignore_test_result': [True, "Run numpy test suite, but ignore test result (only log)", CUSTOM], + }) + return FortranPythonPackage.extra_options(extra_vars=extra_vars) + + def __init__(self, *args, **kwargs): + """Initialize numpy-specific class variables.""" + super(EB_numpy, self).__init__(*args, **kwargs) + + self.sitecfg = None + self.sitecfgfn = 'site.cfg' + self.testinstall = True + + def configure_step(self): + """Configure numpy build by composing site.cfg contents.""" + + # see e.g. https://github.com/numpy/numpy/pull/2809/files + self.sitecfg = '\n'.join([ + "[DEFAULT]", + "library_dirs = %(libs)s", + "include_dirs= %(includes)s", + "search_static_first=True", + ]) + + if get_software_root("imkl"): + + if self.toolchain.comp_family() == toolchain.GCC: + # see https://software.intel.com/en-us/articles/numpyscipy-with-intel-mkl, + # section Building with GNU Compiler chain + extrasiteconfig = '\n'.join([ + "[mkl]", + "lapack_libs = ", + "mkl_libs = mkl_rt", + ]) + else: + extrasiteconfig = '\n'.join([ + "[mkl]", + "lapack_libs = %(lapack)s", + "mkl_libs = %(blas)s", + ]) + + else: + # [atlas] the only real alternative, even for non-ATLAS BLAS libs (e.g., OpenBLAS, ACML, ...) + # using only the [blas] and [lapack] sections results in sub-optimal builds that don't provide _dotblas.so; + # it does require a CBLAS interface to be available for the BLAS library being used + # e.g. for ACML, the CBLAS module providing a C interface needs to be used + extrasiteconfig = '\n'.join([ + "[atlas]", + "atlas_libs = %(lapack)s", + "[lapack]", + "lapack_libs = %(lapack)s", # required by scipy, that uses numpy's site.cfg + ]) + + blas = None + lapack = None + fft = None + + if get_software_root("imkl"): + # with IMKL, no spaces and use '-Wl:' + # redefine 'Wl,' to 'Wl:' so that the patch file can do its job + def get_libs_for_mkl(varname): + """Get list of libraries as required for MKL patch file.""" + libs = self.toolchain.variables['LIB%s' % varname].copy() + libs.try_remove(['pthread', 'dl']) + tweaks = { + 'prefix': '', + 'prefix_begin_end': '-Wl:', + 'separator': ',', + 'separator_begin_end': ',', + } + libs.try_function_on_element('change', kwargs=tweaks) + libs.SEPARATOR = ',' + return str(libs) # str causes list concatenation and adding prefixes & separators + + blas = get_libs_for_mkl('BLAS_MT') + lapack = get_libs_for_mkl('LAPACK_MT') + fft = get_libs_for_mkl('FFT') + + # make sure the patch file is there + # we check for a typical characteristic of a patch file that cooperates with the above + # not fool-proof, but better than enforcing a particular patch filename + patch_found = False + patch_wl_regex = re.compile(r"replace\(':',\s*','\)") + for patch in self.patches: + # patches are either strings (extension) or dicts (easyblock) + if isinstance(patch, dict): + patch = patch['path'] + if patch_wl_regex.search(read_file(patch)): + patch_found = True + break + if not patch_found: + raise EasyBuildError("Building numpy on top of Intel MKL requires a patch to " + "handle -Wl linker flags correctly, which doesn't seem to be there.") + + else: + # unless Intel MKL is used, $ATLAS should be set to take full control, + # and to make sure a fully optimized version is built, including _dotblas.so + # which is critical for decent performance of the numpy.dot (matrix dot product) function! + env.setvar('ATLAS', '1') + + lapack = ', '.join([x for x in self.toolchain.get_variable('LIBLAPACK_MT', typ=list) if x != "pthread"]) + fft = ', '.join(self.toolchain.get_variable('LIBFFT', typ=list)) + + libs = ':'.join(self.toolchain.get_variable('LDFLAGS', typ=list)) + includes = ':'.join(self.toolchain.get_variable('CPPFLAGS', typ=list)) + + # CBLAS is required for ACML, because it doesn't offer a C interface to BLAS + if get_software_root('ACML'): + cblasroot = get_software_root('CBLAS') + if cblasroot: + lapack = ', '.join([lapack, "cblas"]) + cblaslib = os.path.join(cblasroot, 'lib') + # with numpy as extension, CBLAS might not be included in LDFLAGS because it's not part of a toolchain + if cblaslib not in libs: + libs = ':'.join([libs, cblaslib]) + else: + raise EasyBuildError("CBLAS is required next to ACML to provide a C interface to BLAS, " + "but it's not loaded.") + + if fft: + extrasiteconfig += "\n[fftw]\nlibraries = %s" % fft + + suitesparseroot = get_software_root('SuiteSparse') + if suitesparseroot: + amddir = os.path.join(suitesparseroot, 'AMD') + umfpackdir = os.path.join(suitesparseroot, 'UMFPACK') + + if not os.path.exists(amddir) or not os.path.exists(umfpackdir): + raise EasyBuildError("Expected SuiteSparse subdirectories are not both there: %s, %s", + amddir, umfpackdir) + else: + extrasiteconfig += '\n'.join([ + "[amd]", + "library_dirs = %s" % os.path.join(amddir, 'Lib'), + "include_dirs = %s" % os.path.join(amddir, 'Include'), + "amd_libs = amd", + "[umfpack]", + "library_dirs = %s" % os.path.join(umfpackdir, 'Lib'), + "include_dirs = %s" % os.path.join(umfpackdir, 'Include'), + "umfpack_libs = umfpack", + ]) + + self.sitecfg = '\n'.join([self.sitecfg, extrasiteconfig]) + + self.sitecfg = self.sitecfg % { + 'blas': blas, + 'lapack': lapack, + 'libs': libs, + 'includes': includes, + } + + super(EB_numpy, self).configure_step() + + # check configuration (for debugging purposes) + cmd = "%s setup.py config" % self.python_cmd + run_cmd(cmd, log_all=True, simple=True) + + def test_step(self): + """Run available numpy unit tests, and more.""" + + # determine command to use to run numpy test suite, + # and whether test results should be ignored or not + if self.cfg['ignore_test_result']: + test_code = 'numpy.test(verbose=2)' + else: + if LooseVersion(self.version) >= LooseVersion('1.15'): + # Numpy 1.15+ returns a True on success. Hence invert to get a failure value + test_code = 'sys.exit(not numpy.test(verbose=2))' + else: + # Return value is a TextTestResult. Check the errors member for any error + test_code = 'sys.exit(len(numpy.test(verbose=2).errors) > 0)' + + # Prepend imports + test_code = "import sys; import numpy; " + test_code + + # LDFLAGS should not be set when testing numpy/scipy, because it overwrites whatever numpy/scipy sets + # see http://projects.scipy.org/numpy/ticket/182 + self.testcmd = "unset LDFLAGS && cd .. && %%(python)s -c '%s'" % test_code + + super(EB_numpy, self).test_step() + + # temporarily install numpy, it doesn't alow to be used straight from the source dir + tmpdir = tempfile.mkdtemp() + abs_pylibdirs = [os.path.join(tmpdir, pylibdir) for pylibdir in self.all_pylibdirs] + for pylibdir in abs_pylibdirs: + mkdir(pylibdir, parents=True) + pythonpath = "export PYTHONPATH=%s &&" % os.pathsep.join(abs_pylibdirs + ['$PYTHONPATH']) + cmd = self.compose_install_command(tmpdir, extrapath=pythonpath) + run_cmd(cmd, log_all=True, simple=True, verbose=False) + + try: + pwd = os.getcwd() + os.chdir(tmpdir) + except OSError as err: + raise EasyBuildError("Faild to change to %s: %s", tmpdir, err) + + # evaluate performance of numpy.dot (3 runs, 3 loops each) + size = 1000 + cmd = ' '.join([ + pythonpath, + '%s -m timeit -n 3 -r 3' % self.python_cmd, + '-s "import numpy; x = numpy.random.random((%(size)d, %(size)d))"' % {'size': size}, + '"numpy.dot(x, x.T)"', + ]) + (out, ec) = run_cmd(cmd, simple=False) + self.log.debug("Test output: %s" % out) + + # fetch result + time_msec = None + msec_re = re.compile(r"\d+ loops, best of \d+: (?P<time>[0-9.]+) msec per loop") + res = msec_re.search(out) + if res: + time_msec = float(res.group('time')) + else: + sec_re = re.compile(r"\d+ loops, best of \d+: (?P<time>[0-9.]+) sec per loop") + res = sec_re.search(out) + if res: + time_msec = 1000 * float(res.group('time')) + elif self.dry_run: + # use fake value during dry run + time_msec = 123 + self.log.warning("Using fake value for time required for %dx%d matrix dot product under dry run: %s", + size, size, time_msec) + else: + raise EasyBuildError("Failed to determine time for numpy.dot test run.") + + # make sure we observe decent performance + if time_msec < self.cfg['blas_test_time_limit']: + self.log.info("Time for %dx%d matrix dot product: %d msec < %d msec => OK", + size, size, time_msec, self.cfg['blas_test_time_limit']) + else: + raise EasyBuildError("Time for %dx%d matrix dot product: %d msec >= %d msec => ERROR", + size, size, time_msec, self.cfg['blas_test_time_limit']) + try: + os.chdir(pwd) + remove_dir(tmpdir) + except OSError as err: + raise EasyBuildError("Failed to change back to %s: %s", pwd, err) + + def install_step(self): + """Install numpy and remove numpy build dir, so scipy doesn't find it by accident.""" + super(EB_numpy, self).install_step() + + builddir = os.path.join(self.builddir, "numpy") + try: + if os.path.isdir(builddir): + os.chdir(self.builddir) + remove_dir(builddir) + else: + self.log.debug("build dir %s already clean" % builddir) + + except OSError as err: + raise EasyBuildError("Failed to clean up numpy build dir %s: %s", builddir, err) + + def run(self): + """Install numpy as an extension""" + super(EB_numpy, self).run() + + return self.make_module_extra_numpy_include() + + def sanity_check_step(self, *args, **kwargs): + """Custom sanity check for numpy.""" + + # can't use self.pylibdir here, need to determine path on the fly using currently active 'python' command; + # this is important for numpy installations for multiple Python version (via multi_deps) + custom_paths = { + 'files': [], + 'dirs': [det_pylibdir()], + } + + custom_commands = [] + + if LooseVersion(self.version) >= LooseVersion("1.10"): + # generic check to see whether numpy v1.10.x and up was built against a CBLAS-enabled library + # cfr. https://github.com/numpy/numpy/issues/6675#issuecomment-162601149 + blas_check_pytxt = '; '.join([ + "import sys", + "import numpy", + "blas_ok = 'HAVE_CBLAS' in dict(numpy.__config__.blas_opt_info['define_macros'])", + "sys.exit((1, 0)[blas_ok])", + ]) + custom_commands.append('python -c "%s"' % blas_check_pytxt) + else: + # _dotblas is required for decent performance of numpy.dot(), but only there in numpy 1.9.x and older + custom_commands.append("python -c 'import numpy.core._dotblas'") + + return super(EB_numpy, self).sanity_check_step(custom_paths=custom_paths, custom_commands=custom_commands) + + def make_module_extra_numpy_include(self): + """ + Return update statements for $CPATH specifically for numpy + """ + numpy_core_subdir = os.path.join('numpy', 'core') + numpy_core_dirs = [] + cwd = change_dir(self.installdir) + for pylibdir in self.all_pylibdirs: + numpy_core_dirs.extend(glob.glob(os.path.join(pylibdir, numpy_core_subdir))) + numpy_core_dirs.extend(glob.glob(os.path.join(pylibdir, 'numpy*.egg', numpy_core_subdir))) + change_dir(cwd) + + txt = '' + for numpy_core_dir in numpy_core_dirs: + txt += self.module_generator.prepend_paths('CPATH', os.path.join(numpy_core_dir, 'include')) + for lib_env_var in ('LD_LIBRARY_PATH', 'LIBRARY_PATH'): + txt += self.module_generator.prepend_paths(lib_env_var, os.path.join(numpy_core_dir, 'lib')) + + return txt + + def make_module_extra(self): + """ + Add additional update statements in module file specific to numpy + """ + txt = super(EB_numpy, self).make_module_extra() + txt += self.make_module_extra_numpy_include() + return txt