Source code for glompo.interfaces.params

import warnings
from pathlib import Path
from typing import Dict, List, Optional, Sequence, Tuple, Union

import numpy as np
import tables as tb
from scm.params.common._version import __version__ as PARAMS_VERSION
from scm.params.common.parallellevels import ParallelLevels
from scm.params.common.reaxff_converter import geo_to_params, trainset_to_params
from scm.params.core.dataset import DataSet, SSE
from scm.params.core.jobcollection import JobCollection
from scm.params.core.opt_components import _Step
from scm.params.optimizers.base import BaseOptimizer, MinimizeResult
from scm.params.parameterinterfaces.base import BaseParameters
from scm.params.parameterinterfaces.reaxff import ReaxParams  # Instead of ReaxFFParameters for backward compatibility
from scm.params.parameterinterfaces.xtb import XTBParams
from scm.plams.core.errors import ResultsError
from scm.plams.interfaces.adfsuite.reaxff import reaxff_control_to_settings

from ..core.manager import GloMPOManager
from ..opt_selectors.baseselector import BaseSelector

try:
    from scm.params.core.dataset import DataSetEvaluationError
except ImportError:
    # Different versions of ParAMSs raise different error types.
    DataSetEvaluationError = ResultsError

__all__ = ("GlompoParamsWrapper",
           "ReaxFFError",
           "XTBError",
           "setup_reax_from_classic",
           "setup_reax_from_params",
           "setup_xtb_from_params",)

PARAMS_VERSION_INFO = tuple(map(int, PARAMS_VERSION.split('.')))


class _FunctionWrapper:
    """ Wraps function produced by ParAMS internals (instance of :class:`scm.params.core.opt_components._Step`) to
    match the API required by the :attr:`.GloMPOManager.task`. Can be modified to achieve compatibility with other
    optimizers.
    """

    def __init__(self, func: _Step):
        self.func = func
        if self.func.cbs:
            warnings.warn("Callbacks provided through the Optimization class are ignored. Callbacks to individual "
                          "optimizers can be passed to GloMPO through BaseSelector objects. Callbacks to control the "
                          "manager itself are passed using GloMPO BaseChecker objects, some conditions should be sent "
                          "as BaseHunter objects.", UserWarning)
            self.func.cbs = None

    def __call__(self, pars) -> float:
        return self.func(pars)


[docs]class GlompoParamsWrapper(BaseOptimizer):
    """ Wraps the GloMPO manager into a ParAMS :class:`~scm.params.optimizers.base.BaseOptimizer`.
    This is not the recommended way to make use of the GloMPO interface, it is preferable to make use of the
    :class:`.BaseParamsError` classes. This class is only applicable in cases where the ParAMS
    :class:`~scm.params.core.parameteroptimization.Optimization` class interface is preferred.

    Parameters
    ----------
    opt_selector
        Initialised :class:`.BaseSelector` object which specifies how optimizers are selected and initialised.
    **manager_kwargs
        Optional arguments to the :class:`.GloMPOManager` initialisation function.

    Notes
    -----
    `manager_kwargs` accepts all arguments of :meth:`.GloMPOManager.setup` but required GloMPO arguments
    :attr:`~.GloMPOManager.task` and :attr:`~.GloMPOManager.bounds` will be overwritten as they are passed by the
    :meth:`minimize` function in accordance with ParAMS API.
    """

    def __init__(self, opt_selector: BaseSelector, **manager_kwargs):
        self.manager = GloMPOManager()
        self.manager_kwargs = manager_kwargs
        for kw in ['task', 'bounds']:
            if kw in self.manager_kwargs:
                del self.manager_kwargs[kw]

        self.selector = opt_selector

[docs]    def minimize(self,
                 function: _Step,
                 x0: Sequence[float],
                 bounds: Sequence[Tuple[float, float]],
                 workers: int = 1) -> MinimizeResult:
        """
        Passes 'function' to GloMPO to be minimized. Returns an instance of MinimizeResult.

        Parameters
        ----------
        function
            Function to be minimized, this is passed as GloMPO's :attr:`~.GloMPOManager.task` parameter.
        x0
            Ignored by GloMPO, the correct way to control the optimizer starting points is by using GloMPO
            :class:`.BaseGenerator` objects.
        bounds
            Sequence of (min, max) pairs used to bound the search area for every parameter. The 'bounds' parameter is
            passed to GloMPO as its :attr:`~.GloMPOManager.bounds` parameter.
        workers
            Represents the maximum number of optimizers run in parallel. Passed to GloMPO as its
            :attr:`~.GloMPOManager.max_jobs` parameter if it has not been sent during initialisation via
            `manager_kwargs` otherwise ignored. If allowed to default this will usually result in the number of
            optimizers as there are cores available.

        Notes
        -----
        GloMPO is not currently compatible with using multiple :class:`~scm.params.core.dataset.DataSet` and only the
        first one will be considered.

        By default ParAMS shifts and scales all parameters to the interval (0, 1). GloMPO will work in this space and be
        blind to the true bounds, thus results from the GloMPO logs cannot be applied directly to the function.
        """

        warnings.warn("The x0 parameter is ignored by GloMPO. To control the starting locations of optimizers within "
                      "GloMPO make use of its BaseGenerator objects.", RuntimeWarning)

        if 'max_jobs' not in self.manager_kwargs:
            self.manager_kwargs['max_jobs'] = workers

        # Silence function printing
        function.v = False

        self.manager.setup(task=_FunctionWrapper(function), bounds=bounds, opt_selector=self.selector,
                           **self.manager_kwargs)

        result = self.manager.start_manager()

        # Reshape glompo.common.namedtuples.Result into scm.params.optimizers.base.MinimizeResult
        params_res = MinimizeResult()
        params_res.x = result.x
        params_res.fx = result.fx
        params_res.success = self.manager.converged and len(result.x) > 0

        return params_res


[docs]class BaseParamsError:
    """ Base error function instance from which other classes derive depending on the engine used e.g. ReaxFF, xTB etc.
    Primarily initialized from ParAMS objects. To initialize from files see the class methods
    :meth:`~.ReaxFFError.from_classic_files` or :meth:`~.ReaxFFError.from_params_files`.

    Parameters
    ----------
    data_set
        Reference data used to compare against force field results.

    job_collection
        AMS jobs from which the data can be extracted for comparison to the :class:`~scm.params.core.dataset.DataSet`

    parameters
        :class:`~scm.params.parameterinterfaces.base.BaseParameters` object which holds the force field values, ranges,
        engine and which parameters are active or not.

    validation_dataset
        If a validation set is being used and evaluated along with the training set, it may be added here.
        Jobs for the validation set are expected to be included in `job_collection`.

    scale_residuals
        See :attr:`scale_residuals`.

    Notes
    -----
    The class provides several convenience functions to access/read/modify the force field parameters (for example:
    :attr:`n_parms`, :attr:`active_names`, :meth:`set_parameters`, :meth:`reweigh_residuals` etc.). These are typically
    light wrappers around various :attr:`par_eng` commands. Not all forms of interface have been provided and, in
    general, the user may access the :attr:`par_eng` directly for fine control.

    Attributes
    ----------
    dat_set : ~scm.params.core.dataset.DataSet
        Represents the training set.

    job_col : ~scm.params.core.jobcollection.JobCollection
        Represents the jobs from which model results will be extracted and compared to the training set.

    loss : Union[str, ~scm.params.core.dataset.Loss]
        Method by which individual errors are grouped into a single error function value.

    par_eng : ~scm.params.parameterinterfaces.base.BaseParameters
        Parameter engine interface representing the model and its parameters to tune.

    par_levels : ~scm.params.common.parallellevels.ParallelLevels
        The layers of parallelism possible within the evaluation of the jobs.

    scale_residuals : bool
        If :obj:`True` then the raw residuals (i.e. the differences between engine evaluation and training data)
        will be scaled by the weight and sigma values in the datasets i.e. :code:`r_scaled = weight * (r / sigma) ** 2`.
        Otherwise the raw residual is returned. This setting effects :meth:`resids` and :meth:`detailed_call`.

    val_set : ~scm.params.core.dataset.DataSet
        Optional validation set to evaluate in parallel to the training set.
    """

    def __init__(self, data_set: DataSet, job_collection: JobCollection, parameters: BaseParameters,
                 validation_dataset: Optional[DataSet] = None,
                 scale_residuals: bool = False):
        self.dat_set = data_set
        self.job_col = job_collection
        self.par_eng = parameters
        self.val_set = validation_dataset

        self.scale_residuals = scale_residuals

        self.loss = SSE()
        self.par_levels = ParallelLevels(jobs=1)

    @property
    def n_parms(self) -> int:
        """ Returns the number of active parameters.

        See Also
        --------
        :attr:`n_all_parms`
        """
        return len(self.par_eng.active.x)

    @property
    def n_all_parms(self) -> int:
        """ Returns the total number of active and inactive parameters.

        See Also
        --------
        :attr:`n_parms`
        """
        return len(self.par_eng.is_active)

    @property
    def active_abs_indices(self) -> List[int]:
        """ Returns the absolute index number of the active parameters.

        See Also
        --------
        :meth:`active_names`
        :meth:`convert_indices_abs2rel`
        :meth:`convert_rel2abs_indices`
        """
        return [p._id for p in self.par_eng.active]

    @property
    def active_names(self) -> List[str]:
        """ Returns the names of the active parameters.

        See Also
        --------
        :meth:`active_abs_indices`
        :meth:`convert_indices_abs2rel`
        :meth:`convert_rel2abs_indices`
        """
        return self.par_eng.active.names

    @property
    def bounds(self) -> Sequence[Tuple[float, float]]:
        """ Returns the min, max bounds in each dimension in **scaled space** i.e. a list of :code:`(0, 1)` tuples for
        each parameter.

        See Also
        --------
        :meth:`convert_parms_real2scaled`
        :meth:`convert_parms_scaled2real`
        """
        return [(0, 1)] * self.n_parms

[docs]    def __call__(self, x: Sequence[float]) -> float:
        """ Returns the error value between the the force field with the given parameters and the training values.

        Notes
        -----
        Optimizations are done in scaled space to improve the numerics of the problem. Thus `x` is expected to be given
        in **scaled space**. To transform from one space to another see :meth:`convert_parms_real2scaled` and
        :meth:`convert_parms_scaled2real`.
        """
        return self._calculate(x)[0][0]

[docs]    def detailed_call(self, x: Sequence[float]) -> Union[Tuple[float, np.ndarray],
                                                         Tuple[float, np.ndarray, float, np.ndarray]]:
        """ A full return of the error results.
        Returns a tuple of:

        .. code-block:: python

            training_set_error, [training_set_residual_1, ..., training_set_residual_N]

        If a validation set is included then returned tuple is:

        .. code-block:: python

           training_set_error, [training_set_residual_1, ..., training_set_residual_N],
           validation_set_error, [validation_set_residual_1, ..., validation_set_residual_N]

        See Also
        --------
        :meth:`__call__`
        """
        calc = self._calculate(x)
        ts_fx = calc[0][0]
        ts_resids = calc[0][1]
        ts_resids = self._scale_residuals(ts_resids, self.dat_set) if self.scale_residuals else ts_resids

        if self.val_set is not None:
            vs_fx = calc[1][0]
            vs_resids = calc[1][1]
            vs_resids = self._scale_residuals(vs_resids, self.val_set) if self.scale_residuals else vs_resids
            return ts_fx, ts_resids, vs_fx, vs_resids

        return ts_fx, ts_resids

[docs]    def headers(self) -> Dict[str, tb.Col]:
        """ Returns a the column headers for the :meth:`detailed_call` return.
        See :meth:`.BaseFunction.headers`.
        """
        heads = {'fx': tb.Float64Col(pos=0),
                 'resids_ts': tb.Float64Col((1, len(self.dat_set)), pos=1)}

        if self.val_set:
            heads['fx_vs'] = tb.Float64Col(pos=2)
            heads['resids_vs'] = tb.Float64Col((1, len(self.val_set)), pos=3)

        return heads

[docs]    def resids(self, x: Sequence[float]) -> np.ndarray:
        """ Method for compatibility with GFLS optimizer.
        Returns the signed differences between the force field and training set residuals. Will be scaled by sigma and
        weight if :attr:`scale_residuals` is :obj:`True`, otherwise not.
        """
        residuals = self._calculate(x)[0][1]
        if self.scale_residuals:
            residuals = self._scale_residuals(residuals, self.dat_set)

        return residuals

[docs]    def save(self, path: Union[Path, str], filenames: Optional[Dict[str, str]] = None,
             parameters: Optional[Sequence[float]] = None):
        """ Writes the :attr:`dat_set` and :attr:`job_col` to YAML files.
        Writes the engine object to an appropriate parameter file.

        Parameters
        ----------
        path
            Path to directory in which files will be saved.
        filenames
            Custom filenames for the written files. The dictionary may include any/all of the keys in the example
            below. This example contains the default names used if not given::

                {'ds': 'data_set.yml', 'jc': 'job_collection.yml', 'ff': 'ffield'}

        parameters
            Optional parameters to be written into the force field file. If not given, the parameters currently
            therein will be used.
        """
        path = Path(path).resolve(True)

        if not filenames:
            filenames = {}

        names = {'ds': filenames['ds'] if 'ds' in filenames else 'data_set.yml',
                 'jc': filenames['jc'] if 'jc' in filenames else 'job_collection.yml',
                 'ff': filenames['ff'] if 'ff' in filenames else 'ffield'}

        self.dat_set.store(str(path / names['ds']))
        self.job_col.store(str(path / names['jc']))
        self.par_eng.write(str(path / names['ff']), parameters)

[docs]    def set_parameters(self, x: Sequence[float], space: str, full: bool = False):
        """ Store parameters in the class.

        Parameters
        ----------
        x
            Parameters to store in :class:`~scm.params.parameterinterfaces.base.BaseParameters`.
        space
            Represents the space in which `x` is given. Accepts:

            #. :code:`'real'`: Actual parameter values

            #. :code:`'scaled'`: Transformed parameter values, bounded by 0 and 1 according to their ranges
               (see :meth:`convert_parms_real2scaled` and :meth:`convert_parms_scaled2real`).
        full
            If :obj:`True`, `x` is expected to be an array of ALL parameters in the force field, otherwise `x` is
            expected to be an array of active parameters only.

        Warns
        -----
        UserWarning
            If any value in `x` is outside of the bounds associated with that parameter.

        See Also
        --------
        :meth:`convert_parms_real2scaled`
        :meth:`convert_parms_scaled2real`
        """
        if space == 'scaled':
            x = self.convert_parms_scaled2real(x)
        elif space != 'real':
            raise ValueError(f"Cannot parse space='{space}', 'real' or 'scaled' expected.")

        if any([not (min_ < x_ < max_) for x_, (min_, max_) in
                zip(x, self.par_eng.range if not full else self.par_eng.active.range)]):
            warnings.warn("x contains parameters which are outside their bounds.", UserWarning)

        if full:
            self.par_eng.x = x
        else:
            self.par_eng.active.x = x

[docs]    def convert_indices_abs2rel(self, indices: List[int]) -> List[int]:
        """ Converts a sequence of absolute indices to relative indices pointing to the corresponding parameter in the
        active subset.

        Parameters
        ----------
        indices
            Sequence of absolute indices for *active* parameters.

        Returns
        -------
        List[int]
            List of the same length as `indices` with corresponding elements giving the index of the parameters in the
            smaller active subset.

        Warns
        -----
        UserWarning
            If indices contains an index for an inactive parameter. :obj:`None` will be returned for that index.

        Examples
        --------
        Suppose :attr:`par_eng` has 100 parameters of which 5 are active. The absolute index numbers of these five are:

        >>> active = [23, 57, 78, 10, 98]

        Converting to the relative indices in the active subset:

        >>> err.convert_indices_abs2rel(active)
        [1, 2, 3, 0, 4]

        Note that this method correctly accounts for the ordering of the parameters given to `indices`.

        Suppose you attempted to convert a parameter which was not active:

        err.convert_indices_abs2rel([23, 57, 1])
        [1, 2, None]
        """
        asked_names = [self.par_eng[i].name for i in indices]
        name_rel_map = {n: i for i, n in enumerate(self.par_eng.active.names)}
        return [name_rel_map[n] if n in name_rel_map else None for n in asked_names]

[docs]    def convert_indices_rel2abs(self, indices: List[int]) -> List[int]:
        """ Converts a sequence of relative indices in the active parameter subset to absolute indices in the
        :attr:`par_eng`.

        Parameters
        ----------
        indices
            Sequence of relative indices in the active parameter subset.

        Returns
        -------
        List[int]
            List of the same length as `indices` with corresponding elements giving the index of the parameters in the
            :attr:`par_eng`.

        Examples
        --------
        Suppose :attr:`par_eng` has 100 parameters of which 5 are active. To find the absolute index numbers of all of
        them:

        >>> err.convert_rel2abs_indices(range(4))
        [10, 23, 57, 78, 98]
        """
        rel_name_map = {i: n for i, n in enumerate(self.par_eng.active.names)}
        asked_names = [rel_name_map[i] for i in indices]
        return [self.par_eng._name_to_allidx[n] for n in asked_names]

[docs]    def convert_parms_real2scaled(self, x: List[float]) -> np.ndarray:
        """ Transforms parameters from their actual values, to values between 0 and 1 where 0 and 1 represent the lower
        and upper bounds of the parameter respectively.

        .. important::

           Active parameter values exist in in two spaces:

           #. The real and actual parameter values which appear in the force field.

           #. A scaled space between 0 and 1 in all dimensions where 0 and 1 represent the lower bound and upper bounds
              of the active parameters respectively.

           Optimizations are done in scaled space to improve the numerics of the problem.

        Parameters
        ----------
        x
            Sequence of parameter values to transform. May be the same length as the number of active parameters, or the
            length of the total number of parameters in the set.

        Raises
        ------
        ValueError
            If the length of `x` does not match the number of active or total parameters
        """
        min_, max_ = self._convert_parms_core(x)
        return (np.array(x) - min_) / (max_ - min_)

[docs]    def convert_parms_scaled2real(self, x: List[float]) -> np.ndarray:
        """ Transforms parameters from their [0, 1] scaled values, to actual parameter values.
        Exact opposite transformation of :meth:`convert_parms_real2scaled`.
        """
        min_, max_ = self._convert_parms_core(x)
        return (max_ - min_) * np.array(x) + min_

[docs]    def toggle_parameter(self, parameters: Union[Sequence[int], Sequence[str]], toggle: Union[str, bool] = None):
        """ De/Activate parameters.
        This means either allowing them to be changed during an optimization, or fixing their value so that they are not
        changed.

        Parameters
        ----------
        parameters
            Sequence of integers (which refer to the parameters' indices in
            :class:`~scm.params.parameterinterfaces.base.BaseParameters`) or parameter name strings which should be
            de/activated. A mix of integers and strings is not supported.
        toggle
            Accepts :code:`'on'`, :code:`'off'`, :obj:`True` or :obj:`False`. Specifies how the toggle should be
            applied. Must be supplied. :code:`'on'` means the parameters will be optimized and changed during the
            optimization. :code:`'off'` means the parameters will be fixed. To set the parameter values see
            :meth:`set_parameters`.

        Notes
        -----
        If using integers in `parameters` these are the absolute index numbers of the full parameter set. **Not** the
        parameter indices of the already activated subset. This may lead to unexpected results. For example, if you
        have a field with five activated parameters, attempting :code:`err.toggle_parameters(4, 'off')` will not
        deactivate the fifth active parameter but rather the parameter indexed 4 in the overall set. See
        :meth:`convert_indices_abs2rel` and :meth:`convert_indices_rel2abs` to be able to convert between the reference
        systems.

        Warnings
        --------
        When toggling parameters on, make sure that their associated bounds are sensible!

        See Also
        --------
        :attr:`active_abs_indices`
        :attr:`convert_indices_abs2rel`
        :attr:`convert_rel2abs_indices`
        :meth:`set_parameters`
        """

        if (toggle == 'on') or (toggle is True):
            toggle = True
        elif (toggle == 'off') or (toggle is False):
            toggle = False
        else:
            raise ValueError("Must specify toggle parameter. Do you want these parameters turned 'on' or 'off'?")

        if isinstance(parameters[0], str):
            mapping = dict(zip(self.par_eng.names, self.par_eng.is_active))
        else:
            mapping = dict(zip(range(len(self.par_eng.is_active)), self.par_eng.is_active))

        for p in parameters:
            if isinstance(p, str) and p not in mapping:
                warnings.warn(f"Parameter name '{p}' not recognised, ignoring.", UserWarning)
                continue

            mapping[p] = toggle

        self.par_eng.is_active = [*mapping.values()]

[docs]    def reweigh_residuals(self, resids: Union[Sequence[str], Sequence[int], Dict[Union[str, int], float]],
                          new_weight: Optional[float] = None):
        """ Changes weights for elements in the :class:`~scm.params.core.dataset.DataSet`.
        Can be used to `deactivate` contributions to the training set by setting their weight to zero.

        .. note::

           Deactivating a residual does not stop its associated jobs from still being calculated.

        Parameters
        ----------
        resids
            Sequence of integers (which refer to the :class:`~scm.params.core.dataset.DataSetEntry` indices in the
            :class:`~scm.params.core.dataset.DataSet`) or strings corresponding to
            :class:`~scm.params.core.dataset.DataSet` keys. A mix of integers and strings is not supported. May also be
            a dictionary mapping the above to new weight values.
        new_weight
            New weight to apply to all elements in `resids`. Ignored if `resids` is a dictionary, must be supplied
            otherwise.
        """
        ret = lambda r: new_weight  # Default new weight
        if isinstance(resids, dict):
            ret = lambda r: resids[r]
        elif new_weight is None:
            raise ValueError("new_weight cannot be None if resids is a sequence of names or indices.")

        for r in resids:
            self.dat_set[r].weight = ret(r)

    def _calculate(self, x: Sequence[float]) -> Sequence[Tuple[float, np.ndarray, np.ndarray]]:
        """ Core calculation function, returns both the error function value and the residuals. """
        default = (float('inf'), np.array([float('inf')]), np.array([float('inf')]))
        try:
            engine = self.par_eng.get_engine(self.convert_parms_scaled2real(x))
            ff_results = self.job_col.run(engine.settings, parallel=self.par_levels)
            ts_result = self.dat_set.evaluate(ff_results, self.loss, True)
            vs_result = self.val_set.evaluate(ff_results, self.loss, True) if self.val_set is not None else default
            return (ts_result[0], np.squeeze(ts_result[1]), np.squeeze(ts_result[2])), \
                   (vs_result[0], np.squeeze(vs_result[1]), np.squeeze(vs_result[2]))
        except (ResultsError, DataSetEvaluationError):
            return default, default

    @staticmethod
    def _scale_residuals(resids: np.ndarray, data_set: DataSet) -> np.ndarray:
        """ Scales a sequence of residuals by weight and sigma values in the associated
        :class:`scm.params.core.dataset.DataSet`.

        .. math::

            r_i = w_i \\left(\\frac{f'-f}{\\sigma}\\right)^2

        """
        return np.array(data_set.get('weight')) * (resids / np.array(data_set.get('sigma'))) ** 2

    def _convert_parms_core(self, x) -> Tuple[np.ndarray, np.ndarray]:
        """ Core conversion code using in both directions. Returns the appropriate min and max bounds. """
        lenx = len(x)
        if lenx == self.n_parms:
            min_, max_ = np.array(self.par_eng.active.range).T
        elif lenx == self.n_all_parms:
            min_, max_ = np.array(self.par_eng.range).T
        else:
            raise ValueError(f"Cannot parse x with length {lenx}. Must contain values for all parameters or values for"
                             f"active parameters.")

        return min_, max_


[docs]class ReaxFFError(BaseParamsError):
    """ ReaxFF error function. """

[docs]    @classmethod
    def from_classic_files(cls, path: Union[Path, str], **kwargs) -> 'ReaxFFError':
        """ Initializes the error function from classic ReaxFF files.

        Parameters
        ----------
        path
            Path to classic ReaxFF files, passed to :func:`.setup_reax_from_classic`.
        """
        dat_set, job_col, rxf_eng = setup_reax_from_classic(path)
        return cls(dat_set, job_col, rxf_eng, **kwargs)

[docs]    @classmethod
    def from_params_files(cls, path: Union[Path, str], **kwargs) -> 'ReaxFFError':
        """ Initializes the error function from ParAMS data files.

        Parameters
        ----------
        path
            Path to directory containing ParAMS data set, job collection and ReaxFF engine files (see
            :func:`.setup_reax_from_params`).
        """
        dat_set, job_col, rxf_eng = setup_reax_from_params(path)
        return cls(dat_set, job_col, rxf_eng, **kwargs)

[docs]    def toggle_parameter(self, parameters: Union[Sequence[int], Sequence[str]], toggle: Union[str, bool] = None,
                         force: bool = False):
        """ De/Activate parameters.
        This means either allowing them to be changed during an optimization, or fixing their value so that they are not
        changed.

        See :meth:`.toggle_parameter`.

        Parameters
        ----------
        force
            If :obj:`True`, the sense checks which verify that certain parameters are not activated will be bypassed.

        Warns
        -----
        UserWarning
            If `parameters` contains a parameter which should never be activated and `toggle` is :obj:`True` or
            :code:`'on'`.

        Notes
        -----
        Certain parameters should never be activated. For examples, some represent two- or three-way toggles for certain
        behaviours. Others can only take very specific values based on which atoms are present. This method will ignore
        and warn about attempts to activate such parameters unless `force` is used.
        """
        if toggle is True or toggle == 'on':
            allowed = np.array(self.par_eng._get_active())

            activating = np.full(self.n_all_parms, False)
            activating[[self.par_eng[i]._id for i in parameters]] = True

            invalid_act = np.argwhere((allowed == False) & (activating == True)).ravel()
            if invalid_act.size > 0:
                warnings.warn(f"The following parameters should never be activated: {invalid_act}.", UserWarning)

            valid_parameters = parameters if force else np.argwhere((allowed == True) & (activating == True)).ravel()
        else:
            valid_parameters = parameters
        super().toggle_parameter(valid_parameters, toggle)

[docs]    def checkpoint_save(self, path: Union[Path, str]):
        """ Used to store files into a GloMPO checkpoint (at path) suitable to reconstruct the task when the checkpoint
        is loaded.
        """
        path = Path(path).resolve(True)
        self.dat_set.pickle_dump(str(path / 'data_set.pkl'))
        self.job_col.pickle_dump(str(path / 'job_collection.pkl'))
        self.par_eng.pickle_dump(str(path / 'reax_params.pkl'))


[docs]class XTBError(BaseParamsError):
    """ GFN-xTB error function. """

[docs]    @classmethod
    def from_params_files(cls, path: Union[Path, str], **kwargs) -> 'XTBError':
        """ Initializes the error function from ParAMS data files.

        Parameters
        ----------
        path
            Path to directory containing ParAMS data set, job collection and ReaxFF engine files (see
            :func:`setup_reax_from_params`).
        """
        dat_set, job_col, rxf_eng = setup_xtb_from_params(path)
        return cls(dat_set, job_col, rxf_eng, **kwargs)

[docs]    def checkpoint_save(self, path: Union[Path, str]):
        """ Used to store files into a GloMPO checkpoint (at path) suitable to reconstruct the task when the checkpoint
        is loaded.
        """
        path = Path(path).resolve(True)
        self.dat_set.pickle_dump(str(path / 'data_set.pkl'))
        self.job_col.pickle_dump(str(path / 'job_collection.pkl'))
        self.par_eng.write(str(path))


[docs]def setup_reax_from_classic(path: Union[Path, str]) -> Tuple[DataSet, JobCollection, ReaxParams]:
    """ Parses classic ReaxFF force field and configuration files into instances which can be evaluated by AMS.

    Parameters
    ----------
    path
        Path to directory containing classic ReaxFF configuration files:

    Notes
    -----
    `path` must contain:

        ``trainset.in``: Contains the description of the items in the training set.

        ``control``: Contains ReaxFF settings.

        ``geo``: Contains the geometries of the items used in the training set, will make the
        :class:`~scm.params.core.jobcollection.JobCollection` along with the ``control`` file.

        ``ffield``: A force field file which contains values for all the parameters. By default almost all parameters
        are activated and given ranges of :math:`\\pm 20%` if non-zero and [-1, 1] otherwise. See
        :class:`~scm.params.parameterinterfaces.reaxff.ReaxParams` for details.

    Optionally, the directory may contain:

        ``params``: File which describes which parameters to optimize and their ranges.

    Or, alternatively:

        ``ffield_bool``: A force field file with all parameters set to 0 or 1. 1 indicates it will be adjusted during
        optimization. 0 indicates it will not be changed during optimization.

        ``ffield_max``: A force field file where the active parameters are set to their maximum value (value of other
        parameters is ignored).

        ``ffield_min``: A force field file where the active parameters are set to their maximum value (value of other
        parameters is ignored).

    The method will ignore ``ffield_bool``, ``ffield_min`` and ``ffield_max`` if ``params`` is also present.

    .. caution::

       ``params`` files are not supported in ParAMS <v0.5.1. In this case the file will be ignored and the method will
       directly look for ``ffield_bool``, ``ffield_min`` and ``ffield_max``.

    Returns
    -------
    Tuple[~scm.params.core.dataset.DataSet, ~scm.params.core.jobcollection.JobCollection, ~scm.params.parameterinterfaces.reaxff.ReaxParams]
        ParAMS reparameterization objects: job collection, training set and engine.
    """

    path = Path(path).resolve(True)

    # Setup the dataset
    dat_set = trainset_to_params(str(path / 'trainset.in'))

    # Setup the job collection depending on the types of data in the training set
    settings = reaxff_control_to_settings(str(path / 'control'))
    if dat_set.forces():
        settings.input.ams.properties.gradients = True
    job_col = geo_to_params(str(path / 'geo'), settings)

    # Setup the Engine and parameters
    rxf_eng = ReaxParams(str(path / 'ffield'), bounds_scale=1.2)

    # Look for optional extras files
    params_path = path / 'params'
    bool_path = path / 'ffield_bool'
    min_path = path / 'ffield_min'
    max_path = path / 'ffield_max'

    if params_path.exists() and PARAMS_VERSION_INFO > (0, 5, 0):
        rxf_eng.read_paramsfile(str(params_path))
    elif all(extra.exists() for extra in (bool_path, min_path, max_path)):
        bool_eng = ReaxParams(str(path / 'ffield_bool'))
        max_eng = ReaxParams(str(path / 'ffield_max'))
        min_eng = ReaxParams(str(path / 'ffield_min'))

        rxf_eng.is_active = [bool(val) for val in bool_eng.x]

        for p in rxf_eng:
            if p.is_active:
                if min_eng[p.name].value < max_eng[p.name].value:
                    p.range = (min_eng[p.name].value, max_eng[p.name].value)
                else:
                    p.x = min_eng[p.name].value
                    p.is_active = False
                    print(f"WARNING: '{p.name}' deactivated due to bounds min >= max.")

    # Consistency Checks

    # Parameter value between bounds
    for p in rxf_eng.active:
        if not p.range[0] < p.value < p.range[1]:
            p.value = (p.range[0] + p.range[1]) / 2
            warnings.warn(f"'{p.name}' starting value out of bounds moving to midpoint.")

    # Remove training set entries not in job collection
    remove_ids = dat_set.check_consistency(job_col)
    if remove_ids:
        print('The following jobIDs are not in the JobCollection, their respective training set entries will be '
              'removed:')
        print('\n'.join({s for e in [dat_set[i] for i in remove_ids] for s in e.jobids}))
        del dat_set[remove_ids]

    return dat_set, job_col, rxf_eng


def _setup_collections_from_params(path: Union[Path, str]) -> Tuple[DataSet, JobCollection]:
    """ Loads ParAMS produced ReaxFF files into ParAMS objects.

    Parameters
    ----------
    path
        Path to folder containing:
            ``data_set.yml`` OR ``data_set.pkl``
                Contains the description of the items in the training set. A YAML file must be of the form produced by
                :meth:`~scm.params.core.dataset.DataSet.store`, a pickle file must be of the form produced by
                :meth:`~scm.params.core.dataset.DataSet.pickle_dump`. If both files are present, the pickle is given
                priority.
            ``job_collection.yml`` OR ``job_collection.pkl``
                Contains descriptions of the AMS jobs to evaluate. A YAML file must be of the form produced by
                :meth:`~scm.params.core.jobcollection.JobCollection.store`, a pickle file must be of the form produced
                by :meth:`scm.params.core.jobcollection.JobCollection.pickle_dump`.  If both files are present, the
                pickle is given priority.
    """
    dat_set = DataSet()
    job_col = JobCollection()

    for name, params_obj in {'data_set': dat_set, 'job_collection': job_col}.items():
        built = False
        for suffix, loader in {'.pkl': 'pickle_load', '.yml': 'load'}.items():
            file = Path(path, name + suffix)
            if file.exists():
                getattr(params_obj, loader)(str(file))
                built = True
        if not built:
            raise FileNotFoundError(f"No {name.replace('_', ' ')} data found")

    return dat_set, job_col


[docs]def setup_reax_from_params(path: Union[Path, str]) -> Tuple[DataSet, JobCollection, ReaxParams]:
    """ Loads ParAMS produced ReaxFF files into ParAMS objects.

    Parameters
    ----------
    path
        Path to folder containing:
            ``data_set.yml`` OR ``data_set.pkl``
                Contains the description of the items in the training set. A YAML file must be of the form produced by
                :meth:`~scm.params.core.dataset.DataSet.store`, a pickle file must be of the form produced by
                :meth:`~scm.params.core.dataset.DataSet.pickle_dump`. If both files are present, the pickle is given
                priority.
            ``job_collection.yml`` OR ``job_collection.pkl``
                Contains descriptions of the AMS jobs to evaluate. A YAML file must be of the form produced by
                :meth:`~scm.params.core.jobcollection.JobCollection.store`, a pickle file must be of the form produced
                by :meth:`~scm.params.core.jobcollection.JobCollection.pickle_dump`.  If both files are present, the
                pickle is given priority.
            ``reax_params.pkl``
                Pickle produced by :meth:`~scm.params.parameterinterfaces.base.BaseParameters.pickle_dump`, representing
                the force field, active parameters and their ranges.
    """
    dat_set, job_col = _setup_collections_from_params(path)
    rxf_eng = ReaxParams.pickle_load(str(Path(path, 'reax_params.pkl')))

    return dat_set, job_col, rxf_eng


[docs]def setup_xtb_from_params(path: Union[Path, str]) -> Tuple[DataSet, JobCollection, XTBParams]:
    """ Loads ParAMS produced ReaxFF files into ParAMS objects.

    Parameters
    ----------
    path
        Path to folder containing:
            ``data_set.yml`` OR ``data_set.pkl``
                Contains the description of the items in the training set. A YAML file must be of the form produced by
                :meth:`~scm.params.core.dataset.DataSet.store`, a pickle file must be of the form produced by
                :meth:`~scm.params.core.dataset.DataSet.pickle_dump`. If both files are present, the pickle is given
                priority.
            ``job_collection.yml`` OR ``job_collection.pkl``
                Contains descriptions of the AMS jobs to evaluate. A YAML file must be of the form produced by
                :meth:`~scm.params.core.jobcollection.JobCollection.store`, a pickle file must be of the form produced
                by :meth:`~scm.params.core.jobcollection.JobCollection.pickle_dump`.  If both files are present, the
                pickle is given priority.
            ``elements.xtbpar``, ``basis.xtbpar``, ``globals.xtbpar``, ``additional_parameters.yaml``, ``metainfo.yaml``, ``atomic_configurations.xtbpar``, ``metals.xtbpar``
                Classic xTB parameter files.
    """
    dat_set, job_col = _setup_collections_from_params(path)
    xtb_eng = XTBParams(str(path))

    return dat_set, job_col, xtb_eng