Source code for pde.solvers.controller

"""Defines a class controlling the simulations of PDEs.

.. codeauthor:: David Zwicker <david.zwicker@ds.mpg.de>
"""

from __future__ import annotations

import datetime
import logging
import math
import time
from typing import TYPE_CHECKING, Any

from .. import __version__
from ..trackers.base import (
    FinishedSimulation,
    TrackerCollection,
    TrackerCollectionDataType,
)

if TYPE_CHECKING:
    from collections.abc import Callable

    from ..tools.typing import TField
    from .base import SolverBase

_logger = logging.getLogger(__name__)
""":class:`logging.Logger`: Logger for controller."""

TRangeType = float | tuple[float, float]



[docs]
class Controller:
    """Class controlling a simulation.

    The controller calls a solver to advance the simulation into the future and it takes
    care of trackers that analyze and modify the state periodically. The controller also
    handles errors in the simulations and the trackers, as well as user-induced
    interrupts, e.g., by hitting Ctrl-C or Cmd-C to cause a :class:`KeyboardInterrupt`.
    In case of problems, the Controller writes additional information into
    :attr:`~Controller.diagnostics`, which can help to diagnose problems.
    """

    diagnostics: dict[str, Any]
    """dict: diagnostic information (available after simulation finished)"""

    _get_current_time: Callable = time.process_time
    """callable: function to determine the current time for profiling purposes. We
    generally use the more accurate :func:`time.process_time`, but better performance
    may be obtained by the faster :func:`time.time`. This will only affect simulations
    with many iterations."""

    def __init__(
        self,
        solver: SolverBase,
        t_range: TRangeType,
        tracker: TrackerCollectionDataType = "auto",
    ):
        """
        Args:
            solver (:class:`~pde.solvers.base.SolverBase`):
                Solver instance that is used to advance the simulation in time
            t_range (float or tuple):
                Sets the time range for which the simulation is run. If only a single
                value `t_end` is given, the time range is assumed to be `[0, t_end]`.
            tracker:
                Defines trackers that process the state of the simulation at specified
                times. A tracker is either an instance of
                :class:`~pde.trackers.base.TrackerBase` or a string identifying a
                tracker (possible identifiers can be obtained by calling
                :func:`~pde.trackers.registered_trackers`). Multiple trackers can be
                specified as a list. The default value `auto` checks the state for
                consistency (tracker 'consistency') and displays a progress bar (tracker
                'progress') when :mod:`tqdm` is installed. More general trackers are
                defined in :mod:`~pde.trackers`, where all options are explained in
                detail. In particular, the time points where the tracker analyzes data
                can be chosen when creating a tracker object explicitly.
        """
        self.solver = solver
        self.t_range = t_range
        self.trackers = TrackerCollection.from_data(tracker)

        # initialize some diagnostic information
        self.info: dict[str, Any] = {}
        self.diagnostics = {
            "controller": self.info,
            "package_version": __version__,
        }

    @property
    def t_range(self) -> tuple[float, float]:
        """tuple: start and end time of the simulation"""
        return self._t_range

    @t_range.setter
    def t_range(self, value: TRangeType):
        """Set start and end time of the simulation.

        Args:
            value (float or tuple):
                Set the time range of the simulation. If a single number is given, it
                specifies the final time and the start time is set to zero. If a tuple
                of two numbers is given they are used as start and end time.
        """
        # determine time range
        try:
            self._t_range: tuple[float, float] = (0, float(value))  # type: ignore
        except TypeError as err:  # assume a single number was given
            if len(value) == 2:  # type: ignore
                self._t_range = tuple(value)  # type: ignore
            else:
                msg = "t_range must be set to a single number or a tuple of two numbers"
                raise ValueError(msg) from err

    def _get_stop_handler(self) -> Callable[[Exception, float], tuple[int, str]]:
        """Return function that handles messaging."""

        def _handle_stop_iteration(err: Exception, t: float) -> tuple[int, str]:
            """Helper function for handling interrupts raised by trackers."""
            if isinstance(err, FinishedSimulation):
                # tracker determined that the simulation finished
                self.info["successful"] = True
                msg = f"Simulation finished at t={t}"
                msg_level = logging.INFO
                if hasattr(err, "value") and err.value:
                    self.info["stop_reason"] = err.value
                    msg += f" ({err.value})"
                else:
                    self.info["stop_reason"] = "Tracker raised FinishedSimulation"

            else:
                # tracker determined that there was a problem
                self.info["successful"] = False
                msg = f"Simulation aborted at t={t}"
                msg_level = logging.WARNING
                if hasattr(err, "value") and err.value:
                    self.info["stop_reason"] = err.value
                    msg += f" ({err.value})"
                else:
                    self.info["stop_reason"] = "Tracker raised StopIteration"

            return msg_level, msg

        return _handle_stop_iteration

    def _run_main_process(self, state: TField, dt: float | None = None) -> None:
        """Run the main part of the simulation.

        This is either a serial run or the main node of an MPI run. Diagnostic
        information about the solver procedure are available in the `diagnostics`
        property of the instance after this function has been called.

        Args:
            state:
                The initial state, which will be updated during the simulation.
            dt (float):
                Initial time step of the chosen stepping scheme. If `None`, a default
                value based on the solver configuration will be chosen.
        """
        if self.solver.mpi_run:
            from ..tools import mpi

            assert mpi.is_main  # this is the main process (and there can be others)

        # gather basic information
        t_start, t_end = self.t_range
        get_time = self._get_current_time

        # initialize solver information
        self.info["mpi_run"] = self.solver.mpi_run
        self.info["t_start"] = t_start
        self.info["t_end"] = t_end
        self.diagnostics["solver"] = getattr(self.solver, "info", {})

        # initialize profilers
        profiler = {"solver": 0.0, "tracker": 0.0}
        self.info["profiler"] = profiler
        prof_start_compile = get_time()

        # initialize trackers and handlers
        self.trackers.initialize(state, info=self.diagnostics)
        handle_stop_iteration = self._get_stop_handler()

        # Build the executable stepping function from the solver. This call also
        # inherently selects the backend for this simulation (if it was not set
        # explicitly). The backend is available via `self.solver.backend`.
        stepper = self.solver.make_stepper(state=state, dt=dt)

        # store intermediate profiling information before starting simulation
        prof_start_tracker = get_time()
        profiler["compilation"] = prof_start_tracker - prof_start_compile
        solver_start = datetime.datetime.now(datetime.timezone.utc)
        self.info["solver_start"] = str(solver_start)

        if dt is None:
            # use self.solver.info['dt'] if it is present
            dt = self.diagnostics["solver"].get("dt")
        # add absolute tolerance for time to account for inaccurate float point math
        if dt is None:  # self.solver.info['dt'] might be None
            # use conservative default values if time step is unknown
            stepper_atol = 1e-12
            tracker_atol = 1e-8
        else:
            # adapt tolerances to time step
            stepper_atol = 1e-6 * dt  # control loop termination and min advance
            tracker_atol = 0.5 * dt  # allow firing within half a step of the interrupt

        # evolve the system from t_start to t_end
        t = t_start
        _logger.debug("Start simulation at t=%g", t)
        try:
            while t < t_end - stepper_atol:
                # determine next time point with an action
                t_next_action = self.trackers.handle(state, t, atol=tracker_atol)
                t_next_action = min(t_next_action, t_end)  # stop at t_end
                if self.solver.mpi_run:
                    mpi.mpi_bcast(t_next_action, root=0)  # send time to all other nodes

                # track runtime of trackers and solver
                prof_start_solve = get_time()
                profiler["tracker"] += prof_start_solve - prof_start_tracker

                # advance system to next time point with an action
                t = stepper(state, t, t_next_action)

                # track runtime of trackers and solver
                prof_start_tracker = get_time()
                profiler["solver"] += prof_start_tracker - prof_start_solve

                # update the tolerances to reflect changes in time step `dt`
                if dt := self.diagnostics["solver"].get("dt"):
                    stepper_atol = 1e-6 * dt
                    tracker_atol = 0.5 * dt

        except StopIteration as err:
            # iteration has been interrupted by a tracker
            self.info["stop_reason"] = "Tracker raised StopIteration"
            msg_level, msg = handle_stop_iteration(err, t)
            self.diagnostics["last_tracker_time"] = t
            self.diagnostics["last_state"] = state

        except KeyboardInterrupt:
            # iteration has been interrupted by the user
            self.info["successful"] = False
            self.info["stop_reason"] = "User interrupted simulation"
            msg = f"Simulation interrupted at t={t}"
            msg_level = logging.INFO
            self.diagnostics["last_tracker_time"] = t
            self.diagnostics["last_state"] = state

        except Exception:
            # any other exception
            self.diagnostics["last_tracker_time"] = t
            self.diagnostics["last_state"] = state
            raise

        else:
            # reached final time
            self.info["successful"] = True
            self.info["stop_reason"] = "Reached final time"
            msg = f"Simulation finished at t={t_end}."
            msg_level = logging.INFO

            # handle trackers one more time when t_end is reached
            try:
                # We here use stepper_atol to make sure the final time point is only
                # added when it the tracker actually requested exactly this time point.
                # If we used tracker_atol, many trackers would analyze an additional
                # time point.
                self.trackers.handle(state, t, atol=stepper_atol)
            except StopIteration as err:
                # error detected in the final handling of the tracker
                msg_level, msg = handle_stop_iteration(err, t)

        # signal that client nodes should exit
        if self.solver.mpi_run:
            mpi.mpi_bcast(-math.inf, root=0)

        # calculate final statistics
        profiler["tracker"] += get_time() - prof_start_tracker
        duration = datetime.datetime.now(datetime.timezone.utc) - solver_start
        self.info["solver_duration"] = str(duration)
        self.info["t_final"] = t
        self.trackers.finalize(info=self.diagnostics)
        if "dt_statistics" in getattr(self.solver, "info", {}):
            dt_statistics = dict(self.solver.info["dt_statistics"].to_dict())
            self.solver.info["dt_statistics"] = dt_statistics

        # show information after a potential progress bar has been deleted to not mess
        # up the display
        _logger.log(msg_level, msg)
        if profiler["tracker"] > max(profiler["solver"], 1):
            _logger.warning(
                "Spent more time on handling trackers (%.3g) than on the actual "
                "simulation (%.3g)",
                profiler["tracker"],
                profiler["solver"],
            )

    def _run_client_process(self, state: TField, dt: float | None = None) -> None:
        """Run the simulation on client nodes during an MPI run.

        This function just calls the stepping function to advance the sub field of the
        current node in time. All other logic, including trackers, is handled in the
        main node.

        Args:
            state:
                The initial state, which will be updated during the simulation.
            dt (float):
                Initial time step of the chosen stepping scheme. If `None`, a default
                value based on the solver configuration will be chosen.

        Returns:
            The state at the final time point.
        """
        from ..tools import mpi

        assert mpi.size > 1  # there are multiple processes
        assert not mpi.is_main  # this is not the main node

        # build the executable stepping function from the solver
        stepper = self.solver.make_stepper(state=state, dt=dt)

        if not self.solver.mpi_run:
            msg = (
                "Started multiprocessing run without a solver/backend combination "
                "that supports MPI stepping. Use `ExplicitMPISolver` to profit from "
                "multiple cores"
            )
            raise RuntimeError(msg)

        # evolve the system from t_start until main process is finished
        t_start, _ = self.t_range
        t = t_start

        while True:
            # receive next interrupt time from main node
            t_next_action = mpi.mpi_bcast(0.0, root=0)
            if t_next_action < t_start:
                break  # this signals that we should abort the client
            t = stepper(state, t, t_next_action)

    def _run_serial(self, state: TField, dt: float | None = None) -> TField | None:
        """Run the simulation in serial mode.

        Diagnostic information about the solver are available in the
        :attr:`~Controller.diagnostics` property after this function has been called.

        Args:
            state (:class:`~pde.fields.base.FieldBase`):
                The initial state of the simulation.
            dt (float):
                Initial time step of the chosen stepping scheme. If `None`, a default
                value based on the solver configuration will be chosen.

        Returns:
            The state at the final time point. If multiprocessing is used, only the main
            node will return the state. All other nodes return None.
        """
        self._run_main_process(state, dt)
        return state

    def _run_parallel(self, state: TField, dt: float | None = None) -> TField | None:
        """Run the simulation in MPI mode.

        Diagnostic information about the solver are available in the
        :attr:`~Controller.diagnostics` property after this function has been called.

        Args:
            state (:class:`~pde.fields.base.FieldBase`):
                The initial state of the simulation.
            dt (float):
                Initial time step of the chosen stepping scheme. If `None`, a default
                value based on the solver configuration will be chosen.

        Returns:
            The state at the final time point. If multiprocessing is used, only the main
            node will return the state. All other nodes return None.
        """
        from ..tools import mpi

        self.info["mpi_count"] = mpi.size
        self.info["mpi_rank"] = mpi.rank

        if mpi.is_main:
            # this node is the primary one and must thus run the main process
            try:
                self._run_main_process(state, dt)
            except Exception as err:
                _logger.exception("Error in main node", exc_info=err)
                mpi.mpi_excepthook(type(err), err, err.__traceback__)
                raise
            else:
                _logger.info("MPI main process finished")
                return state  # return final state only in main process

        else:
            # this node is a secondary node and must thus run the client process
            try:
                self._run_client_process(state, dt)
            except Exception as err:
                _logger.exception("Error in node %d", mpi.rank, exc_info=err)
                mpi.mpi_excepthook(type(err), err, err.__traceback__)
                raise
            else:
                _logger.info("MPI client process finished")
                return None  # do not return anything in client processes


[docs]
    def run(self, initial_state: TField, dt: float | None = None) -> TField | None:
        """Run the simulation.

        Diagnostic information about the solver are available in the
        :attr:`~Controller.diagnostics` property after this function has been called.

        Args:
            initial_state (:class:`~pde.fields.base.FieldBase`):
                The initial state of the simulation. This state will be copied and thus
                not modified by the simulation. Instead, the final state will be
                returned and trackers can be used to record intermediate states.
            dt (float):
                Initial time step of the chosen stepping scheme. If `None`, a default
                value based on the solver configuration will be chosen.

        Returns:
            The state at the final time point. If multiprocessing is used, only the main
            node will return the state. All other nodes return None.
        """
        # copy the initial state to not modify the supplied one
        if getattr(self.solver, "pde", None) and self.solver.pde.complex_valued:
            _logger.info("Convert state to complex numbers")
            state: TField = initial_state.copy(dtype=complex)
        else:
            state = initial_state.copy()

        if self.solver.mpi_run:
            # run the simulation on multiple nodes
            return self._run_parallel(state, dt)
        return self._run_serial(state, dt)