Index: trunk/src/vegas/vegas.nw
===================================================================
--- trunk/src/vegas/vegas.nw	(revision 8798)
+++ trunk/src/vegas/vegas.nw	(revision 8799)
@@ -1,8428 +1,9889 @@
 %% -*- ess-noweb-default-code-mode: f90-mode; noweb-default-code-mode: f90-mode; noweb-code-mode: f90-mode -*-
 % WHIZARD code as NOWEB source: VEGAS algorithm
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \chapter{VEGAS Integration}
 \label{cha:vegas-integration}
 
 @ The backbone integrator of WHIZARD is a object-oriented implemetation of the VEGAS algorithm.
 
 <<[[vegas.f90]]>>=
 <<File header>>
 
 module vegas
 <<Use kinds>>
 <<Vegas: modules>>
 
 <<Standard module head>>
 
 <<Vegas: public>>
 
 <<Vegas: parameters>>
 
 <<Vegas: types>>
 
 <<Vegas: interfaces>>
 
+  interface
+<<Vegas: sub interfaces>>
+  end interface
+
 contains
 
-<<Vegas: procedures>>
+<<Vegas: main procedures>>
 
 end module vegas
 @ %def vegas
+@
+<<[[vegas_sub.f90]]>>=
+<<File header>>
+
+submodule (vegas) vegas_s
+
+  implicit none
+
+contains
+
+<<Vegas: procedures>>
+
+end submodule vegas_s
+
+@ %def vegas_sub_s
+@
 <<Vegas: modules>>=
   use diagnostics
   use io_units
   use format_utils, only: write_indent
   use format_defs, only: FMT_17
   use rng_base, only: rng_t
   use rng_stream, only: rng_stream_t
 @
 @ MPI Module.
 <<MPI: Vegas: modules>>=
   use request_callback, only: request_handler_t
   use mpi_f08 !NODEP!
 @
 
 \section{Integration modes}
 \label{sec:integration-modes}
 
-@ VEGAS operates in three different modes: [[vegas_mode_importance_only]], [[vegas_mode_importance]] or [[vegas_mode_stratified]].
-The default mode is [[vegas_mode_importance]], where the algorithm decides whether if it is possible to use importance sampling or stratified sampling.
-In low dimensions VEGAS uses strict stratified sampling.
+VEGAS operates in three different modes:
+[[vegas_mode_importance_only]], [[vegas_mode_importance]] or
+[[vegas_mode_stratified]]. The default mode is
+[[vegas_mode_importance]], where the algorithm decides whether if it
+is possible to use importance sampling or stratified sampling. In low
+dimensions VEGAS uses strict stratified sampling.
 
 <<Vegas: parameters>>=
   integer, parameter, public :: VEGAS_MODE_IMPORTANCE = 0, &
        & VEGAS_MODE_STRATIFIED = 1, VEGAS_MODE_IMPORTANCE_ONLY = 2
 @ %def vegas_mode_importance vegas_mode_stratified vegas_mode_importance_only
 @
 \section{Type: vegas\_func\_t}
 \label{sec:type:vegas_func_t}
 
 We define a abstract [[func]] type which only gives an interface to an [[evaluate]] procedure.
 The inside of implementation and also the optimization of are not a concern of the [[vegas]] implementation.
 <<Vegas: public>>=
   public :: vegas_func_t
 <<Vegas: types>>=
   type, abstract :: vegas_func_t
      !
    contains
      procedure(vegas_func_evaluate), deferred, pass, public :: evaluate
   end type vegas_func_t
 
 @ %def vegas_func_t
 @ The only procedure called in [[vegas]] is [[vegas_func_evaluate]].
 It takes a real value [[x]] and returns a real value [[f]].
 <<Vegas: interfaces>>=
   abstract interface
      real(default) function vegas_func_evaluate (self, x) result (f)
        import :: default, vegas_func_t
        class(vegas_func_t), intent(inout) :: self
        real(default), dimension(:), intent(in) :: x
      end function vegas_func_evaluate
   end interface
 
 @ %def vegas_func_evaluate
 @
 \section{Type: vegas\_config\_t}
 \label{sec:type:vegas_config_t}
 
 We store the complete configuration in a transparent container.
 The [[vegas_config_t]] object inside VEGAS must not be directly accesible.
 We provide a get method which returns a copy of the [[vegas_config_t]] object.
 
 Apart from the options which can be set by the constructor of [[vegas_t]] object, we store the run-time configuration [[n_calls]], [[calls_per_box]], [[n_bins]] and [[n_boxes]].
 Those are calculated and set accordingly by VEGAS.
 <<Vegas: public>>=
   public :: vegas_config_t
 <<Vegas: types>>=
   type :: vegas_config_t
      integer :: n_dim = 0
-     real(default) :: alpha = 1.5
+     real(default) :: alpha = 1.5_default
      integer :: n_bins_max = 50
      integer :: iterations = 5
      integer :: mode = VEGAS_MODE_STRATIFIED
      integer :: calls_per_box = 0
      integer :: n_calls = 0
      integer :: n_calls_min = 20
      integer :: n_boxes = 1
      integer :: n_bins = 1
    contains
    <<Vegas: vegas config: TBP>>
   end type vegas_config_t
 
 @ %def vegas_config_t, n_calls, calls_per_box, n_bins, n_boxes
 @ Write out the configuration of the grid.
 <<Vegas: vegas config: TBP>>=
   procedure, public :: write => vegas_config_write
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_config_write (self, unit, indent)
+      class(vegas_config_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+      integer, intent(in), optional :: indent
+    end subroutine vegas_config_write
 <<Vegas: procedures>>=
-  subroutine vegas_config_write (self, unit, indent)
+  module subroutine vegas_config_write (self, unit, indent)
     class(vegas_config_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of dimensions            = ", self%n_dim
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Adaption power (alpha)          = ", self%alpha
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Max. number of bins (per dim.)  = ", self%n_bins_max
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of iterations            = ", self%iterations
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Mode (stratified or importance) = ", self%mode
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Calls per box                   = ", self%calls_per_box
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of calls                 = ", self%n_calls
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Min. number of calls            = ", self%n_calls_min
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of bins                  = ", self%n_bins
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of boxes                 = ", self%n_boxes
   end subroutine vegas_config_write
 
 @ %def vegas_config_write
 @
 \section{Type: vegas\_grid\_t}
 \label{sec:type:-vegas_g}
 
 We provide a simple and transparent grid container. The container can then later
 be used, to export the actual grid.
 <<Vegas: public>>=
   public :: vegas_grid_t
 <<Vegas: types>>=
   type :: vegas_grid_t
      integer :: n_dim = 1
      integer :: n_bins = 1
      real(default), dimension(:), allocatable :: x_lower
      real(default), dimension(:), allocatable :: x_upper
      real(default), dimension(:), allocatable :: delta_x
      real(default), dimension(:,:), allocatable :: xi
    contains
    <<Vegas: vegas grid: TBP>>
   end type vegas_grid_t
 
 @ %def vegas_grid_t
 @ Initialise grid.
 <<Vegas: interfaces>>=
   interface vegas_grid_t
      module procedure vegas_grid_init
   end interface vegas_grid_t
 
+<<Vegas: sub interfaces>>=
+    module function vegas_grid_init (n_dim, n_bins_max) result (self)
+      type(vegas_grid_t) :: self
+      integer, intent(in) :: n_dim
+      integer, intent(in) :: n_bins_max
+    end function vegas_grid_init
 <<Vegas: procedures>>=
-  type(vegas_grid_t) function vegas_grid_init (n_dim, n_bins_max) result (self)
+  module function vegas_grid_init (n_dim, n_bins_max) result (self)
+    type(vegas_grid_t) :: self
     integer, intent(in) :: n_dim
     integer, intent(in) :: n_bins_max
     self%n_dim = n_dim
     self%n_bins = 1
     allocate (self%x_upper(n_dim), source=1.0_default)
     allocate (self%x_lower(n_dim), source=0.0_default)
     allocate (self%delta_x(n_dim), source=1.0_default)
     allocate (self%xi((n_bins_max + 1), n_dim), source=0.0_default)
   end function vegas_grid_init
 
 @ %def vegas_grid_init
 @ Output.
 <<Vegas: vegas grid: TBP>>=
   procedure, public :: write => vegas_grid_write
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_grid_write (self, unit, pacify)
+      class(vegas_grid_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+      logical, intent(in), optional :: pacify
+    end subroutine vegas_grid_write
 <<Vegas: procedures>>=
-  subroutine vegas_grid_write (self, unit, pacify)
+  module subroutine vegas_grid_write (self, unit, pacify)
     class(vegas_grid_t), intent(in) :: self
     integer, intent(in), optional :: unit
     logical, intent(in), optional :: pacify
     logical :: pac
     integer :: u, i, j
     pac = .false.; if (present (pacify))  pac = pacify
     u = given_output_unit (unit)
     write (u, descr_fmt) "begin vegas_grid_t"
     write (u, integer_fmt) "n_dim = ", self%n_dim
     write (u, integer_fmt) "n_bins = ", self%n_bins
     write (u, descr_fmt) "begin x_lower"
     do j = 1, self%n_dim
        if (pac) then
           write (u, double_array_pac_fmt)  j, self%x_lower(j)
        else
           write (u, double_array_fmt)  j, self%x_lower(j)
        end if
     end do
     write (u, descr_fmt) "end x_lower"
     write (u, descr_fmt) "begin x_upper"
     do j = 1, self%n_dim
        if (pac) then
           write (u, double_array_pac_fmt) j, self%x_upper(j)
        else
           write (u, double_array_fmt) j, self%x_upper(j)
        end if
     end do
     write (u, descr_fmt) "end x_upper"
     write (u, descr_fmt) "begin delta_x"
     do j = 1, self%n_dim
        if (pac) then
           write (u, double_array_pac_fmt)  j, self%delta_x(j)
        else
           write (u, double_array_fmt)  j, self%delta_x(j)
        end if
     end do
     write (u, descr_fmt) "end delta_x"
     write (u, descr_fmt) "begin xi"
     do j = 1, self%n_dim
        do i = 1, self%n_bins + 1
           if (pac) then
              write (u, double_array2_pac_fmt) i, j, self%xi(i, j)
           else
              write (u, double_array2_fmt) i, j, self%xi(i, j)
           end if
        end do
     end do
     write (u, descr_fmt) "end xi"
     write (u, descr_fmt) "end vegas_grid_t"
   end subroutine vegas_grid_write
 
 @ %def vegas_grid_write
 @ Compare two grids, if they match up to an given precision.
 <<Vegas: public>>=
   public :: operator (==)
 <<Vegas: interfaces>>=
   interface operator (==)
      module procedure vegas_grid_equal
   end interface operator (==)
+<<Vegas: sub interfaces>>=
+    module function vegas_grid_equal (grid_a, grid_b) result (yorn)
+      logical :: yorn
+      type(vegas_grid_t), intent(in) :: grid_a, grid_b
+    end function vegas_grid_equal
 <<Vegas: procedures>>=
-  logical function vegas_grid_equal (grid_a, grid_b) result (yorn)
+  module function vegas_grid_equal (grid_a, grid_b) result (yorn)
+    logical :: yorn
     type(vegas_grid_t), intent(in) :: grid_a, grid_b
     yorn = .true.
     yorn = yorn .and. (grid_a%n_dim == grid_b%n_dim)
     yorn = yorn .and. (grid_a%n_bins == grid_b%n_bins)
     yorn = yorn .and. all (grid_a%x_lower == grid_b%x_lower)
     yorn = yorn .and. all (grid_a%x_upper == grid_b%x_upper)
     yorn = yorn .and. all (grid_a%delta_x == grid_b%delta_x)
     yorn = yorn .and. all (grid_a%xi == grid_b%xi)
   end function vegas_grid_equal
 
 @ %def vegas_grid_equal
 @ Resize each bin accordingly to its corresponding weight [[w]]. Can be used to
 resize the grid to a new size of bins or refinement.
 
 The procedure expects two arguments, firstly, [[n_bins]] and, secondly, the
 refinement weights [[w]]. If [[n_bins]] differs from the internally stored one,
 we resize the grid under consideration of [[w]]. If each element of [[w]] equals
 one, then the bins are resized preserving their original bin density.
 
 Anytime else, we refine the grid accordingly to [[w]].
 <<Vegas: vegas grid: TBP>>=
   procedure, private :: resize => vegas_grid_resize
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_grid_resize (self, n_bins, w)
+      class(vegas_grid_t), intent(inout) :: self
+      integer, intent(in) :: n_bins
+      real(default), dimension(:, :), intent(in) :: w
+    end subroutine vegas_grid_resize
 <<Vegas: procedures>>=
-  subroutine vegas_grid_resize (self, n_bins, w)
+  module subroutine vegas_grid_resize (self, n_bins, w)
     class(vegas_grid_t), intent(inout) :: self
     integer, intent(in) :: n_bins
     real(default), dimension(:, :), intent(in) :: w
     real(default), dimension(size(self%xi)) :: xi_new
     integer :: i, j, k
     real(default) :: pts_per_bin
     real(default) :: d_width
     do j = 1, self%n_dim
        if (self%n_bins /= n_bins) then
           pts_per_bin = real(self%n_bins, default) / real(n_bins, default)
           self%n_bins = n_bins
        else
           if (all (w(:, j) == 0.)) then
              call msg_bug ("[VEGAS] grid_resize: resize weights are zero.")
           end if
           pts_per_bin = sum(w(:, j)) / self%n_bins
        end if
        d_width = 0.
        k = 0
        do i = 2, self%n_bins
           do while (pts_per_bin > d_width)
              k = k + 1
              d_width = d_width + w(k, j)
           end do
           d_width = d_width - pts_per_bin
           associate (x_upper => self%xi(k + 1, j), x_lower => self%xi(k, j))
             xi_new(i) = x_upper - (x_upper - x_lower) * d_width / w(k, j)
           end associate
        end do
        self%xi(:, j) = 0. ! Reset grid explicitly
        self%xi(2:n_bins, j) = xi_new(2:n_bins)
        self%xi(n_bins + 1, j) = 1.
     end do
   end subroutine vegas_grid_resize
 
 @ %def vegas_grid_resize
 @ Find the probability for a given [[x]] in the unit hypercube.
 
-For the case [[n_bins < N_BINARY_SEARCH]], we utilize linear search which is faster for short arrays.
-Else we make use of a binary search. Furthermore, we calculate the inverse of
-the probability and invert the result only at the end (saving some time on division).
+For the case [[n_bins < N_BINARY_SEARCH]], we utilize linear search
+which is faster for short arrays. Else we make use of a binary
+search. Furthermore, we calculate the inverse of the probability and
+invert the result only at the end (saving some time on division).
 <<Vegas: vegas grid: TBP>>=
   procedure, public :: get_probability => vegas_grid_get_probability
+<<Vegas: sub interfaces>>=
+    module function vegas_grid_get_probability (self, x) result (g)
+      class(vegas_grid_t), intent(in) :: self
+      real(default), dimension(:), intent(in) :: x
+      real(default) :: g
+    end function vegas_grid_get_probability
 <<Vegas: procedures>>=
-  function vegas_grid_get_probability (self, x) result (g)
+  module function vegas_grid_get_probability (self, x) result (g)
     class(vegas_grid_t), intent(in) :: self
     real(default), dimension(:), intent(in) :: x
     real(default) :: g
     integer :: j, i_lower, i_higher, i_mid
     real(default), dimension(size(x)) :: y
     g = 1
     y = (x - self%x_lower) / self%delta_x
     if (any (y < 0 .or. y > 1)) then
        g = 0; return
     end if
     ndim: do j = 1, self%n_dim
        i_lower = 1
        i_higher = self%n_bins + 1
        !! Left-most search
        search: do while (i_lower < i_higher - 1)
           i_mid = floor ((i_higher + i_lower) / 2.)
           if (y(j) > self%xi(i_mid, j)) then
              i_lower = i_mid
           else
              i_higher = i_mid
           end if
        end do search
        g = g * (self%delta_x(j) * &
             & self%n_bins * (self%xi(i_lower + 1, j) - self%xi(i_lower, j)))
     end do ndim
     ! Move division to the end, which is more efficient.
     if (g /= 0) g = 1 / g
   end function vegas_grid_get_probability
 
 @ %def vegas_grid_get_probability
 @ Broadcast the grid information. As safety measure, we get the actual grid
 object from VEGAS (correclty allocated, but for non-root unfilled) and broadcast
 the root object. On success, we set grid into VEGAS.
 
 We use the non-blocking broadcast routine, because we have to send quite a bunch
 of integers and reals.
 
 We have to be very careful with [[n_bins]], the number of bins can actually
 change during different iterations. If we reuse a grid, we have to take that,
 every grid uses the same [[n_bins]]. We expect, that the number of dimension
 does not change, which is in principle possible, but will be checked onto in
 [[vegas_set_grid]].
 <<MPI: Vegas: vegas grid: TBP>>=
   procedure, public :: broadcast => vegas_grid_broadcast
+<<MPI: Vegas: sub interfaces>>=
+    module subroutine vegas_grid_broadcast (self, comm)
+      class(vegas_grid_t), intent(inout) :: self
+      type(MPI_COMM), intent(in) :: comm
+    end subroutine vegas_grid_broadcast
 <<MPI: Vegas: procedures>>=
-  subroutine vegas_grid_broadcast (self, comm)
+  module subroutine vegas_grid_broadcast (self, comm)
     class(vegas_grid_t), intent(inout) :: self
     type(MPI_COMM), intent(in) :: comm
     integer :: j, ierror
     type(MPI_Request), dimension(self%n_dim + 4) :: status
     ! Blocking
     call MPI_Bcast (self%n_bins, 1, MPI_INTEGER, 0, comm)
     ! Non blocking
     call MPI_Ibcast (self%n_dim, 1, MPI_INTEGER, 0, comm, status(1))
     call MPI_Ibcast (self%x_lower, self%n_dim, &
          & MPI_DOUBLE_PRECISION, 0, comm, status(2))
     call MPI_Ibcast (self%x_upper, self%n_dim, &
          & MPI_DOUBLE_PRECISION, 0, comm, status(3))
     call MPI_Ibcast (self%delta_x, self%n_dim, &
          & MPI_DOUBLE_PRECISION, 0, comm, status(4))
     ndim: do j = 1, self%n_dim
        call MPI_Ibcast (self%xi(1:self%n_bins + 1, j), self%n_bins + 1,&
             & MPI_DOUBLE_PRECISION, 0, comm, status(4 + j))
     end do ndim
     call MPI_Waitall (self%n_dim + 4, status, MPI_STATUSES_IGNORE)
   end subroutine vegas_grid_broadcast
 
 @ %def vegas_grid_broadcast
 @
 \section{Type: vegas\_result\_t}
 \label{sec:type:-vegas_r}
 
 We store the result of the latest iteration(s) in a transparent container.
 The [[vegas_result_t]] object inside VEGAS must not be directly accessible.
 We export the a copy of the result via a get-method of the [[vegas_t]] object.
 
 We store latest event weight in [[evt_weight]] and a (possible) evebt weight
 excess in [[evt_weight_excess]], if the event weight is larger than
 [[max_abs_f]].
 <<Vegas: public>>=
   public :: vegas_result_t
 <<Vegas: types>>=
   type :: vegas_result_t
      integer :: it_start = 0
      integer :: it_num = 0
      integer :: samples = 0
-     real(default) :: sum_int_wgtd = 0.
+     real(default) :: sum_int_wgtd = 0._default
      real(default) :: sum_wgts
-     real(default) :: sum_chi = 0.
-     real(default) :: chi2 = 0.
-     real(default) :: efficiency = 0.
-     real(default) :: efficiency_pos = 0.
-     real(default) :: efficiency_neg = 0.
-     real(default) :: max_abs_f = 0.
-     real(default) :: max_abs_f_pos = 0.
-     real(default) :: max_abs_f_neg = 0.
-     real(default) :: result = 0.
-     real(default) :: std = 0.
-     real(default) :: evt_weight = 0.
-     real(default) :: evt_weight_excess = 0.
+     real(default) :: sum_chi = 0._default
+     real(default) :: chi2 = 0._default
+     real(default) :: efficiency = 0._default
+     real(default) :: efficiency_pos = 0._default
+     real(default) :: efficiency_neg = 0._default
+     real(default) :: max_abs_f = 0._default
+     real(default) :: max_abs_f_pos = 0._default
+     real(default) :: max_abs_f_neg = 0._default
+     real(default) :: result = 0._default
+     real(default) :: std = 0._default
+     real(default) :: evt_weight = 0._default
+     real(default) :: evt_weight_excess = 0._default
    contains
    <<Vegas: vegas result: TBP>>
   end type vegas_result_t
 
 @ %def vegas_results_t
 @ Write out the current status of the integration result.
 <<Vegas: vegas result: TBP>>=
   procedure, public :: write => vegas_result_write
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_result_write (self, unit, indent)
+      class(vegas_result_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+      integer, intent(in), optional :: indent
+    end subroutine vegas_result_write
 <<Vegas: procedures>>=
-  subroutine vegas_result_write (self, unit, indent)
+  module subroutine vegas_result_write (self, unit, indent)
     class(vegas_result_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Start iteration                 = ", self%it_start
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Iteration number                = ", self%it_num
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Sample number                   = ", self%samples
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 //")") &
          & "Sum of weighted integrals       = ", self%sum_int_wgtd
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")")  &
          & "Sum of weights                  = ", self%sum_wgts
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Sum of chi                      = ", self%sum_chi
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "chi2                            = ", self%chi2
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Overall efficiency              = ", self%efficiency
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "f-positive efficiency           = ", self%efficiency_pos
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "f-negative efficiency           = ", self%efficiency_neg
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Maximum absolute overall value  = ", self%max_abs_f
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Maximum absolute positive value = ", self%max_abs_f_pos
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Maximum absolute negative value = ", self%max_abs_f_neg
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Integral (of latest iteration)  = ", self%result
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Standard deviation              = ", self%std
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Event weight                    = ", self%evt_weight
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Event weight excess             = ", self%evt_weight_excess
   end subroutine vegas_result_write
 
 @  %def vegas_results_write
 @ Update results and efficiency.
 
 We define
 \begin{equation}
 \operatorname*{max}_{x} w(x) = \frac{f(x)}{p(x)} \Delta_{\text{jac}}.
 \end{equation}
 In the implementation we have to factor out [[n_calls]] in the jacobian.
 Also, during event generation.
 <<Vegas: vegas result: TBP>>=
   procedure, public :: update => vegas_result_update
   procedure, public :: update_efficiency => vegas_result_update_efficiency
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_result_update (result, integral, variance)
+      class(vegas_result_t), intent(inout) :: result
+      real(default), intent(in) :: integral
+      real(default), intent(in) :: variance
+    end subroutine vegas_result_update
+    module subroutine vegas_result_update_efficiency (result, &
+         n_calls, max_pos, max_neg, sum_pos, sum_neg)
+      class(vegas_result_t), intent(inout) :: result
+      integer, intent(in) :: n_calls
+      real(default), intent(in) :: max_pos
+      real(default), intent(in) :: max_neg
+      real(default), intent(in) :: sum_pos
+      real(default), intent(in) :: sum_neg
+    end subroutine vegas_result_update_efficiency
 <<Vegas: procedures>>=
-  subroutine vegas_result_update (result, integral, variance)
+  module subroutine vegas_result_update (result, integral, variance)
     class(vegas_result_t), intent(inout) :: result
     real(default), intent(in) :: integral
     real(default), intent(in) :: variance
     real(default) :: guarded_variance, sq_integral
     real(default) :: wgt, chi
     !! Guard against zero (or negative) variance.
     !! \Delta = I * \epsilon -> I = \Delta.
     if (variance < &
          max ((epsilon (integral) * integral)**2, tiny(integral))) then
        guarded_variance = &
             max ((epsilon (integral) * integral)**2, &
             tiny (integral))
     else
       guarded_variance = variance
     end if
     wgt = 1._default / guarded_variance
     sq_integral = integral**2
     result%result = integral
     result%std = sqrt (guarded_variance)
     result%samples = result%samples + 1
     if (result%samples == 1) then
        result%chi2 = 0._default
     else
        chi = integral
        if (result%sum_wgts > 0) then
           chi = chi - result%sum_int_wgtd / result%sum_wgts
        end if
        result%chi2 = result%chi2 * (result%samples - 2.0_default)
        result%chi2 = (wgt / (1._default + (wgt / result%sum_wgts))) &
             & * chi**2
        result%chi2 = result%chi2 / (result%samples - 1._default)
     end if
     result%sum_wgts = result%sum_wgts + wgt
     result%sum_int_wgtd = result%sum_int_wgtd + (integral * wgt)
     result%sum_chi = result%sum_chi + (sq_integral * wgt)
   end subroutine vegas_result_update
 
-  subroutine vegas_result_update_efficiency (result, &
+  module subroutine vegas_result_update_efficiency (result, &
        n_calls, max_pos, max_neg, sum_pos, sum_neg)
     class(vegas_result_t), intent(inout) :: result
     integer, intent(in) :: n_calls
     real(default), intent(in) :: max_pos
     real(default), intent(in) :: max_neg
     real(default), intent(in) :: sum_pos
     real(default), intent(in) :: sum_neg
     result%max_abs_f_pos = n_calls * max_pos
     result%max_abs_f_neg = n_calls * max_neg
     result%max_abs_f = &
          & max (result%max_abs_f_pos, result%max_abs_f_neg)
     result%efficiency_pos = 0
     if (max_pos > 0) then
        result%efficiency_pos = &
             & sum_pos / max_pos
     end if
     result%efficiency_neg = 0
     if (max_neg > 0) then
        result%efficiency_neg = &
             & sum_neg / max_neg
     end if
     result%efficiency = 0
     if (result%max_abs_f > 0.) then
        result%efficiency = (sum_pos + sum_neg) &
             & / result%max_abs_f
     end if
   end subroutine vegas_result_update_efficiency
 
 @ %def vegas_result_update, vegas_result_update_efficiency
 @ Reset results.
 <<Vegas: vegas result: TBP>>=
   procedure, public :: reset => vegas_result_reset
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_result_reset (result)
+      class(vegas_result_t), intent(inout) :: result
+    end subroutine vegas_result_reset
 <<Vegas: procedures>>=
-  subroutine vegas_result_reset (result)
+  module subroutine vegas_result_reset (result)
     class(vegas_result_t), intent(inout) :: result
     result%sum_int_wgtd = 0
     result%sum_wgts = 0
     result%sum_chi = 0
     result%it_num = 0
     result%samples = 0
     result%chi2 = 0
     result%efficiency = 0
     result%efficiency_pos = 0
     result%efficiency_neg = 0
     result%max_abs_f = 0
     result%max_abs_f_pos = 0
     result%max_abs_f_neg = 0
   end subroutine vegas_result_reset
 
 @ %def vegas_result_reset
 @ Send the result object to specified rank, internally in a non-blocking way.
 
 We do not need to handle the event results, because each event result is atomic.
 <<MPI: Vegas: vegas result: TBP>>=
   procedure, public :: send => vegas_result_send
+<<MPI: Vegas: sub interfaces>>=
+    module subroutine vegas_result_send (self, receiver, tag, comm, request)
+      class(vegas_result_t), intent(in) :: self
+      integer, intent(in) :: receiver
+      integer, intent(in) :: tag
+      type(MPI_COMM), intent(in) :: comm
+      type(MPI_Request), dimension(:), intent(inout) :: request
+    end subroutine vegas_result_send
 <<MPI: Vegas: procedures>>=
-  subroutine vegas_result_send (self, receiver, tag, comm, request)
+  module subroutine vegas_result_send (self, receiver, tag, comm, request)
     class(vegas_result_t), intent(in) :: self
     integer, intent(in) :: receiver
     integer, intent(in) :: tag
     type(MPI_COMM), intent(in) :: comm
     type(MPI_Request), dimension(:), intent(inout) :: request
     if (size (request) /= self%get_n_requests ()) &
          call msg_bug ("VEGAS: number of requests does not match.")
     call MPI_Isend (self%it_start, 1, MPI_INTEGER, receiver, tag + 1,&
          & comm, request(1))
     call MPI_Isend (self%it_num, 1, MPI_INTEGER, receiver , tag + 2,&
          & comm, request(2))
     call MPI_Isend (self%samples, 1, MPI_INTEGER, receiver, tag + 3,&
          & comm, request(3))
     call MPI_Isend (self%sum_int_wgtd, 1, MPI_DOUBLE_PRECISION, receiver, tag + 4,&
          & comm, request(4))
     call MPI_Isend (self%sum_wgts, 1, MPI_DOUBLE_PRECISION, receiver, tag + 5,&
          & comm, request(5))
     call MPI_Isend (self%sum_chi, 1, MPI_DOUBLE_PRECISION, receiver, tag + 6,&
          & comm, request(6))
     call MPI_Isend (self%chi2, 1, MPI_DOUBLE_PRECISION, receiver, tag + 7,&
          & comm, request(7))
     call MPI_Isend (self%efficiency, 1, MPI_DOUBLE_PRECISION, receiver, tag + 8,&
          & comm, request(8))
     call MPI_Isend (self%efficiency_pos, 1, MPI_DOUBLE_PRECISION, receiver, tag + 9,&
          & comm, request(9))
     call MPI_Isend (self%efficiency_neg, 1, MPI_DOUBLE_PRECISION, receiver, tag + 10,&
          & comm, request(10))
     call MPI_Isend (self%max_abs_f, 1, MPI_DOUBLE_PRECISION, receiver, tag + 11,&
          & comm, request(11))
     call MPI_Isend (self%max_abs_f_pos, 1, MPI_DOUBLE_PRECISION, receiver, tag + 12,&
          & comm, request(12))
     call MPI_Isend (self%max_abs_f_neg, 1, MPI_DOUBLE_PRECISION, receiver, tag + 13,&
          & comm, request(13))
     call MPI_Isend (self%result, 1, MPI_DOUBLE_PRECISION, receiver, tag + 14,&
          & comm, request(14))
     call MPI_Isend (self%std, 1, MPI_DOUBLE_PRECISION, receiver, tag + 15,&
          & comm, request(15))
   end subroutine vegas_result_send
 
 @ %def vegas_result_communicate
 @ Receive the result object from a specified rank, internally in a non-blocking way.
 <<MPI: Vegas: vegas result: TBP>>=
   procedure, public :: receive => vegas_result_receive
+<<MPI: Vegas: sub interfaces>>=
+    module subroutine vegas_result_receive (self, sender, tag, comm, request)
+      class(vegas_result_t), intent(inout) :: self
+      integer, intent(in) :: sender
+      integer, intent(in) :: tag
+      type(MPI_COMM), intent(in) :: comm
+      type(MPI_REQUEST), dimension(:), intent(inout) :: request
+    end subroutine vegas_result_receive
 <<MPI: Vegas: procedures>>=
-  subroutine vegas_result_receive (self, sender, tag, comm, request)
+  module subroutine vegas_result_receive (self, sender, tag, comm, request)
     class(vegas_result_t), intent(inout) :: self
     integer, intent(in) :: sender
     integer, intent(in) :: tag
     type(MPI_COMM), intent(in) :: comm
     type(MPI_REQUEST), dimension(:), intent(inout) :: request
     if (size (request) /= self%get_n_requests ()) &
          call msg_bug ("VEGAS: number of requests does not match.")
     call MPI_Irecv (self%it_start, 1, MPI_INTEGER, sender, tag + 1,&
          & comm, request(1))
     call MPI_Irecv (self%it_num, 1, MPI_INTEGER, sender , tag + 2,&
          & comm, request(2))
     call MPI_Irecv (self%samples, 1, MPI_INTEGER, sender, tag + 3,&
          & comm, request(3))
     call MPI_Irecv (self%sum_int_wgtd, 1, MPI_DOUBLE_PRECISION, sender, tag + 4,&
          & comm, request(4))
     call MPI_Irecv (self%sum_wgts, 1, MPI_DOUBLE_PRECISION, sender, tag + 5,&
          & comm, request(5))
     call MPI_Irecv (self%sum_chi, 1, MPI_DOUBLE_PRECISION, sender, tag + 6,&
          & comm, request(6))
     call MPI_Irecv (self%chi2, 1, MPI_DOUBLE_PRECISION, sender, tag + 7,&
          & comm, request(7))
     call MPI_Irecv (self%efficiency, 1, MPI_DOUBLE_PRECISION, sender, tag + 8,&
          & comm, request(8))
     call MPI_Irecv (self%efficiency_pos, 1, MPI_DOUBLE_PRECISION, sender, tag + 9,&
          & comm, request(9))
     call MPI_Irecv (self%efficiency_neg, 1, MPI_DOUBLE_PRECISION, sender, tag + 10,&
          & comm, request(10))
     call MPI_Irecv (self%max_abs_f, 1, MPI_DOUBLE_PRECISION, sender, tag + 11,&
          & comm, request(11))
     call MPI_Irecv (self%max_abs_f_pos, 1, MPI_DOUBLE_PRECISION, sender, tag + 12,&
          & comm, request(12))
     call MPI_Irecv (self%max_abs_f_neg, 1, MPI_DOUBLE_PRECISION, sender, tag + 13,&
          & comm, request(13))
     call MPI_Irecv (self%result, 1, MPI_DOUBLE_PRECISION, sender, tag + 14,&
          & comm, request(14))
     call MPI_Irecv (self%std, 1, MPI_DOUBLE_PRECISION, sender, tag + 15,&
          & comm, request(15))
   end subroutine vegas_result_receive
 
 @ %def vegas_result_receive
 @
 <<MPI: Vegas: vegas result: TBP>>=
   procedure, private :: get_n_requests => vegas_result_get_n_requests
+<<MPI: Vegas: sub interfaces>>=
+    pure module function vegas_result_get_n_requests (result) result (n_requests)
+      class(vegas_result_t), intent(in) :: result
+      integer :: n_requests
+    end function vegas_result_get_n_requests
 <<MPI: Vegas: procedures>>=
-  pure integer function vegas_result_get_n_requests (result) result (n_requests)
+  pure module  function vegas_result_get_n_requests (result) result (n_requests)
     class(vegas_result_t), intent(in) :: result
+    integer :: n_requests
     n_requests = 15
   end function vegas_result_get_n_requests
 @ %def vegas_result_get_n_requests
 @
 \section{Type: vegas\_handler\_t}
 \label{sec:vegas_handler_t}
 
 Callback handler for VEGAS result and grid.
 
 <<MPI: Vegas: public>>=
   public :: vegas_handler_t
 <<MPI: Vegas: types>>=
   type, extends(request_handler_t) :: vegas_handler_t
      type(vegas_result_t), pointer :: result => null ()
      real(default), dimension(:, :), pointer :: d => null ()
    contains
    <<Vegas: vegas handler: TBP>>
   end type vegas_handler_t
 @ %def vegas_handler_t
 @ Provide the actual communication between master and client.
 
 The communication for [[vegas_result_t]] is done in a separate procedure, where we only have to pass the MPI requests.
 Collecting of the distribution array is done directly.
 <<MPI: Vegas: vegas handler: TBP>>=
   procedure :: init => vegas_handler_init
   procedure :: write => vegas_handler_write
   procedure :: handle => vegas_handler_handle
   procedure :: client_handle => vegas_handler_client_handle
   final :: vegas_handler_final
+<<MPI: Vegas: sub interfaces>>=
+    module subroutine vegas_handler_init (handler, handler_id, result, d)
+      class(vegas_handler_t), intent(inout) :: handler
+      integer, intent(in) :: handler_id
+      type(vegas_result_t), intent(in), target :: result
+      real(default), dimension(:, :), intent(in), target :: d
+    end subroutine vegas_handler_init
+    module subroutine vegas_handler_write (handler, unit)
+      class(vegas_handler_t), intent(in) :: handler
+      integer, intent(in), optional :: unit
+    end subroutine vegas_handler_write
+    module subroutine vegas_handler_client_handle (handler, dest_rank, comm)
+      class(vegas_handler_t), intent(inout) :: handler
+      integer, intent(in) :: dest_rank
+      type(MPI_COMM), intent(in) :: comm
+    end subroutine vegas_handler_client_handle
+    module subroutine vegas_handler_handle (handler, source_rank, comm)
+      class(vegas_handler_t), intent(inout) :: handler
+      integer, intent(in) :: source_rank
+      type(MPI_COMM), intent(in) :: comm
+    end subroutine vegas_handler_handle
+    module subroutine vegas_handler_final (handler)
+      type(vegas_handler_t), intent(inout) :: handler
+    end subroutine vegas_handler_final
 <<MPI: Vegas: procedures>>=
-  subroutine vegas_handler_init (handler, handler_id, result, d)
+  module subroutine vegas_handler_init (handler, handler_id, result, d)
     class(vegas_handler_t), intent(inout) :: handler
     integer, intent(in) :: handler_id
     type(vegas_result_t), intent(in), target :: result
     real(default), dimension(:, :), intent(in), target :: d
     integer :: n_requests, tag_offset
     handler%result => result
     handler%d => d
     handler%finished = .false.
     !! Add one request for handling of the distribution d.
     n_requests = result%get_n_requests () + 1
     tag_offset = max(handler_id - 1, 0) * n_requests
     call handler%allocate (n_requests, tag_offset)
   end subroutine vegas_handler_init
 
-  subroutine vegas_handler_write (handler, unit)
+  module subroutine vegas_handler_write (handler, unit)
     class(vegas_handler_t), intent(in) :: handler
     integer, intent(in), optional :: unit
     integer :: u, j
     u = given_output_unit (unit)
     write (u, "(A)") "[VEGAS_HANDLER]"
     call handler%base_write (unit)
     call handler%result%write (u)
     write (u, "(A)") "BEGIN D"
     do j = 1, size (handler%d, dim=2)
        write (u, "(1X,I3,999(1X," // FMT_17 // "))") j, handler%d(:, j)
     end do
     write (u, "(A)") "END D"
   end subroutine vegas_handler_write
 
-  subroutine vegas_handler_handle (handler, source_rank, comm)
+  module subroutine vegas_handler_handle (handler, source_rank, comm)
     class(vegas_handler_t), intent(inout) :: handler
     integer, intent(in) :: source_rank
     type(MPI_COMM), intent(in) :: comm
     !! Take the complete contiguous array memory.
     call MPI_Irecv (handler%d, size (handler%d),&
             & MPI_DOUBLE_PRECISION, source_rank, handler%tag_offset, comm,&
             & handler%request(1))
     call handler%result%receive (source_rank, handler%tag_offset, comm, &
          handler%request(2:))
     handler%activated = .true.
     handler%finished = .false.
   end subroutine vegas_handler_handle
 
-  subroutine vegas_handler_client_handle (handler, dest_rank, comm)
+  module subroutine vegas_handler_client_handle (handler, dest_rank, comm)
     class(vegas_handler_t), intent(inout) :: handler
     integer, intent(in) :: dest_rank
     type(MPI_COMM), intent(in) :: comm
     !! Take the complete contiguous array memory.
     call MPI_Isend (handler%d, size (handler%d),&
          & MPI_DOUBLE_PRECISION, dest_rank, handler%tag_offset, comm,&
          & handler%request(1))
     call handler%result%send (dest_rank, handler%tag_offset, comm, &
          handler%request(2:))
     handler%activated = .true.
     handler%finished = .false.
   end subroutine vegas_handler_client_handle
 
   !> Finalize vegas_handler_t.
   !!
   !! Nullify pointer to object.
-  subroutine vegas_handler_final (handler)
+  module subroutine vegas_handler_final (handler)
     type(vegas_handler_t), intent(inout) :: handler
     nullify (handler%result)
     nullify (handler%d)
   end subroutine vegas_handler_final
 
 @ %def vegas_handler_init, vegas_handler_write, vegas_handler_handle,
 @ %def vegas_handler_client_handle, vegas_handler_final
 @
 \section{Type: vegas\_t}
 \label{sec:type:-vegas_t}
 
 The VEGAS object contains the methods for integration and grid resize- and
 refinement. We store the grid configuration and the (current) result in
 transparent containers alongside with the actual grid and the distribution.
 
 The values of the distribution depend on the chosen mode whether the function
 value or the variance is stored. The distribution is used after each iteration
 to refine the grid.
 <<Vegas: public>>=
   public :: vegas_t
 <<Vegas: types>>=
   type :: vegas_t
      private
      type(vegas_config_t) :: config
-     real(default) :: hypercube_volume = 0.
-     real(default) :: jacobian = 0.
+     real(default) :: hypercube_volume = 0._default
+     real(default) :: jacobian = 0._default
      real(default), dimension(:, :), allocatable :: d
      type(vegas_grid_t) :: grid
      integer, dimension(:), allocatable :: bin
      integer, dimension(:), allocatable :: box
      type(vegas_result_t) :: result
    <<Vegas: vegas: type>>
    contains
    <<Vegas: vegas: TBP>>
   end type vegas_t
 
 @ %def vegas_t
 @ Add members for MPI communication and parallel mode.
 Must be set before calling to [[vegas_integrate]], likewise [[vegas_set_calls]].
 <<MPI: Vegas: vegas: type>>=
   type(MPI_COMM) :: comm
   logical :: parallel_mode = .false.
 @ We overload the type constructor of [[vegas_t]] which initialises the
 mandatory argument [[n_dim]] and allocate memory for the grid.
 <<Vegas: interfaces>>=
   interface vegas_t
      module procedure vegas_init
   end interface vegas_t
-<<Vegas: procedures>>=
-  type(vegas_t) function vegas_init (n_dim, alpha, n_bins_max, iterations, mode) result (self)
+<<Vegas: sub interfaces>>=
+    module function vegas_init &
+         (n_dim, alpha, n_bins_max, iterations, mode) result (self)
+      type(vegas_t) :: self
+      integer, intent(in) :: n_dim
+      integer, intent(in), optional :: n_bins_max
+      real(default), intent(in), optional :: alpha
+      integer, intent(in), optional :: iterations
+      integer, intent(in), optional :: mode
+    end function vegas_init
+<<Vegas: procedures>>=
+  module function vegas_init &
+       (n_dim, alpha, n_bins_max, iterations, mode) result (self)
+    type(vegas_t) :: self
     integer, intent(in) :: n_dim
     integer, intent(in), optional :: n_bins_max
     real(default), intent(in), optional :: alpha
     integer, intent(in), optional :: iterations
     integer, intent(in), optional :: mode
     self%config%n_dim = n_dim
     if (present (alpha)) self%config%alpha = alpha
     if (present (n_bins_max)) self%config%n_bins_max = n_bins_max
     if (present (iterations)) self%config%iterations = iterations
     if (present (mode)) self%config%mode = mode
     self%grid = vegas_grid_t (n_dim, self%config%n_bins_max)
     allocate (self%d(self%config%n_bins_max, n_dim), source=0.0_default)
     allocate (self%box(n_dim), source=1)
     allocate (self%bin(n_dim), source=1)
     self%config%n_bins = 1
     self%config%n_boxes = 1
     call self%set_limits (self%grid%x_lower, self%grid%x_upper)
     call self%reset_grid ()
     call self%reset_result ()
   <<Vegas: vegas init>>
   end function vegas_init
 
 @ %def vegas_init
 <<Vegas: vegas init>>=
 @
 @ Prepare VEGAS to be run in parallel modues (when compiled with MPI).
 <<MPI: Vegas: vegas init>>=
   call self%prepare_parallel_integrate (MPI_COMM_WORLD, &
        duplicate_comm = .false., &
        parallel_mode = .true.)
 @
 @ Finalize the grid. Deallocate grid memory.
 <<Vegas: vegas: TBP>>=
   procedure, public :: final => vegas_final
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_final (self)
+      class(vegas_t), intent(inout) :: self
+    end subroutine vegas_final
 <<Vegas: procedures>>=
-  subroutine vegas_final (self)
+  module subroutine vegas_final (self)
     class(vegas_t), intent(inout) :: self
     deallocate (self%grid%x_upper)
     deallocate (self%grid%x_lower)
     deallocate (self%grid%delta_x)
     deallocate (self%d)
     deallocate (self%grid%xi)
     deallocate (self%box)
     deallocate (self%bin)
   end subroutine vegas_final
 @ %def vegas_final
 
 \section{Get-/Set-methods}
 \label{sec:set-get-methods}
 
 @ The VEGAS object prohibits direct access from outside. Communication is handle
 via get- or set-methods. Set the limits of integration. The defaults limits
 correspong the $n$-dimensionl unit hypercube.
 
 \textit{Remark:} After setting the limits, the grid is initialised, again.
 Previous results are lost due to recalculation of the overall jacobian.
 <<Vegas: vegas: TBP>>=
   procedure, public :: set_limits => vegas_set_limits
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_set_limits (self, x_lower, x_upper)
+      class(vegas_t), intent(inout) :: self
+      real(default), dimension(:), intent(in) :: x_lower
+      real(default), dimension(:), intent(in) :: x_upper
+    end subroutine vegas_set_limits
 <<Vegas: procedures>>=
-  subroutine vegas_set_limits (self, x_lower, x_upper)
+  module subroutine vegas_set_limits (self, x_lower, x_upper)
     class(vegas_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x_lower
     real(default), dimension(:), intent(in) :: x_upper
     if (size (x_lower) /= self%config%n_dim &
          & .or. size (x_upper) /= self%config%n_dim) then
        write (msg_buffer, "(A, I5, A, I5, A, I5)") &
             "VEGAS: [set_limits] n_dim of new lower/upper integration limit&
             & does not match previously set n_dim. ", self%config%n_dim, " =/=&
             & ", size (x_lower), " =/= ", size (x_upper)
        call msg_fatal ()
     end if
     if (any(x_upper < x_lower)) then
        call msg_fatal ("VEGAS: [set_limits] upper limits are smaller than lower limits.")
     end if
     if (any((x_upper - x_lower) > huge(0._default))) then
        call msg_fatal ("VEGAS: [set_limits] upper and lower limits exceed rendering.")
     end if
     self%grid%x_upper = x_upper
     self%grid%x_lower = x_lower
     self%grid%delta_x = self%grid%x_upper - self%grid%x_lower
     self%hypercube_volume = product (self%grid%delta_x)
     call self%reset_result ()
   end subroutine vegas_set_limits
 
 @ %def vegas_set_limits
 @ Set the number of calls. If the number of calls changed during different
 passes, we resize the grid preserving the probability density. We should reset
 the results after changing the number of calls which change the size of the grid
 and the running mode of VEGAS. But, this is a set method only for the number of
 calls.
 <<Vegas: vegas: TBP>>=
   procedure, public :: set_calls => vegas_set_n_calls
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_set_n_calls (self, n_calls)
+      class(vegas_t), intent(inout) :: self
+      integer, intent(in) :: n_calls
+    end subroutine vegas_set_n_calls
 <<Vegas: procedures>>=
-  subroutine vegas_set_n_calls (self, n_calls)
+  module subroutine vegas_set_n_calls (self, n_calls)
     class(vegas_t), intent(inout) :: self
     integer, intent(in) :: n_calls
     if (.not. (n_calls > 0)) then
        write (msg_buffer, "(A, I5)") &
             "VEGAS: [set_calls] number of calls must be a positive number. Keep&
             & number of calls = ", self%config%n_calls
        call msg_warning ()
     else
        self%config%n_calls = max (n_calls, self%config%n_calls_min)
        if (self%config%n_calls /= n_calls) then
           write (msg_buffer, "(A,I5)") &
                "VEGAS: [set calls] number of calls is too few, reset to ", self%config%n_calls
           call msg_warning ()
        end if
        call self%init_grid ()
     end if
   end subroutine vegas_set_n_calls
 
 @ %def vegas_set_n_calls
 @ Get the grid object and set [[n_bins]], [[n_dim]] inside grid container.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_grid => vegas_get_grid
+<<Vegas: sub interfaces>>=
+    module function vegas_get_grid (self) result (grid)
+      class(vegas_t), intent(in) :: self
+      type(vegas_grid_t) :: grid
+    end function vegas_get_grid
 <<Vegas: procedures>>=
-  type(vegas_grid_t) function vegas_get_grid (self) result (grid)
+  module function vegas_get_grid (self) result (grid)
     class(vegas_t), intent(in) :: self
+    type(vegas_grid_t) :: grid
     grid = self%grid
     grid%n_dim = self%config%n_dim
     grid%n_bins = self%config%n_bins
   end function vegas_get_grid
 
 @ %def vegas_get_grid
 @ Set grid. We need a set method for the parallelisation. We do some additional
 checks before copying the object. Be careful, we do not check on [[n_bins]],
 because the number of bins can change after setting [[n_calls]]. We remind you,
 that you will loose all your current progress, if you use set the grid. Hence,
 it will only be used when compiled with [[MPI]].
 <<MPI: Vegas: vegas: TBP>>=
   procedure, public :: set_grid => vegas_set_grid
+<<MPI: Vegas: sub interfaces>>=
+    module subroutine vegas_set_grid (self, grid)
+      class(vegas_t), intent(inout) :: self
+      type(vegas_grid_t), intent(in) :: grid
+    end subroutine vegas_set_grid
 <<MPI: Vegas: procedures>>=
-  subroutine vegas_set_grid (self, grid)
+  module subroutine vegas_set_grid (self, grid)
     class(vegas_t), intent(inout) :: self
     type(vegas_grid_t), intent(in) :: grid
     integer :: j
     logical :: success
     success = .true.
     success = (success .and. (grid%n_dim .eq. self%config%n_dim))
     success = (success .and. all (grid%x_lower .eq. self%grid%x_lower))
     success = (success .and. all (grid%x_upper .eq. self%grid%x_upper))
     success = (success .and. all (grid%delta_x .eq. self%grid%delta_x))
     if (success) then
        self%config%n_bins = grid%n_bins
        do j = 1, self%config%n_dim
           self%grid%xi(1, j) = 0._default
           self%grid%xi(2:self%config%n_bins, j) = grid%xi(2:grid%n_bins, j)
           self%grid%xi(self%config%n_bins + 1, j) = 1._default
        end do
     else
        call msg_bug ("VEGAS: set grid: boundary conditions do not match.")
     end if
   end subroutine vegas_set_grid
 
 @ %def vegas_set_grid
-@ Prepare a parallel integration.
+@
+Prepare a parallel integration.
 
-A parallel integration requires a communicator, which may be duplicated in order to provide a safe communication context for the current VEGAS instance.
-Furthermore, given an optional parameter, the behavior with regards to the parallel evaluation can be changed (e.g. embbed integration).
+A parallel integration requires a communicator, which may be
+duplicated in order to provide a safe communication context for the
+current VEGAS instance. Furthermore, given an optional parameter, the
+behavior with regards to the parallel evaluation can be changed
+(e.g. embbed integration).
 <<MPI: Vegas: vegas: TBP>>=
-  procedure, public :: prepare_parallel_integrate => vegas_prepare_parallel_integrate
+  procedure, public :: prepare_parallel_integrate => &
+       vegas_prepare_parallel_integrate
+<<MPI: Vegas: sub interfaces>>=
+    module subroutine vegas_prepare_parallel_integrate &
+         (self, comm, duplicate_comm, parallel_mode)
+      class(vegas_t), intent(inout) :: self
+      type(MPI_COMM), intent(in) :: comm
+      logical, intent(in), optional :: duplicate_comm
+      logical, intent(in), optional :: parallel_mode
+    end subroutine vegas_prepare_parallel_integrate
 <<MPI: Vegas: procedures>>=
-  subroutine vegas_prepare_parallel_integrate (self, comm, duplicate_comm, parallel_mode)
+  module subroutine vegas_prepare_parallel_integrate &
+       (self, comm, duplicate_comm, parallel_mode)
     class(vegas_t), intent(inout) :: self
     type(MPI_COMM), intent(in) :: comm
     logical, intent(in), optional :: duplicate_comm
     logical, intent(in), optional :: parallel_mode
     logical :: flag
     flag = .true.; if (present (duplicate_comm)) flag = duplicate_comm
     if (duplicate_comm) then
        call MPI_COMM_DUP (comm, self%comm)
     else
        self%comm = comm
     end if
     self%parallel_mode = .true.; if (present (parallel_mode)) &
          self%parallel_mode = parallel_mode
   end subroutine vegas_prepare_parallel_integrate
 
 @ %def vegas_prepare_parallel_integrate
 @ We check if it is senseful to parallelize the actual grid. In simple, this
 means that [[n_boxes]] has to be larger than 2. With the result that we could
 have an actual superimposed stratified grid. In advance, we can give the size of
 communicator [[n_size]] and check whether we have enough boxes to distribute.
 <<MPI: Vegas: vegas: TBP>>=
   procedure, public :: is_parallelizable => vegas_is_parallelizable
+<<MPI: Vegas: sub interfaces>>=
+    elemental module function vegas_is_parallelizable &
+         (self, opt_n_size) result (flag)
+      class(vegas_t), intent(in) :: self
+      logical :: flag
+      integer, intent(in), optional :: opt_n_size
+    end function vegas_is_parallelizable
 <<MPI: Vegas: procedures>>=
-  elemental logical function vegas_is_parallelizable (self, opt_n_size) result (flag)
+  elemental module function vegas_is_parallelizable &
+       (self, opt_n_size) result (flag)
     class(vegas_t), intent(in) :: self
+    logical :: flag
     integer, intent(in), optional :: opt_n_size
     integer :: n_size
     n_size = 2; if (present (opt_n_size)) n_size = opt_n_size
     flag = (self%config%n_boxes**floor (self%config%n_dim / 2.) >= n_size)
   end function vegas_is_parallelizable
 
 @ %def vegas_is_parallelizable
 @ Get the config object.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_config => vegas_get_config
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_get_config (self, config)
+      class(vegas_t), intent(in) :: self
+      type(vegas_config_t), intent(out) :: config
+    end subroutine vegas_get_config
 <<Vegas: procedures>>=
-  subroutine vegas_get_config (self, config)
+  module subroutine vegas_get_config (self, config)
     class(vegas_t), intent(in) :: self
     type(vegas_config_t), intent(out) :: config
     config = self%config
   end subroutine vegas_get_config
 
 @ %def vegas_get_config
 @ Set non-runtime dependent configuration. It will no be possible to change
 [[n_bins_max]].
 <<Vegas: vegas: TBP>>=
   procedure, public :: set_config => vegas_set_config
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_set_config (self, config)
+      class(vegas_t), intent(inout) :: self
+      class(vegas_config_t), intent(in) :: config
+    end subroutine vegas_set_config
 <<Vegas: procedures>>=
-  subroutine vegas_set_config (self, config)
+  module subroutine vegas_set_config (self, config)
     class(vegas_t), intent(inout) :: self
     class(vegas_config_t), intent(in) :: config
     self%config%alpha = config%alpha
     self%config%iterations = config%iterations
     self%config%mode = config%mode
     self%config%n_calls_min = config%n_calls_min
    end subroutine vegas_set_config
 
 @ %def vegas_set_config
 @ Get the result object.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_result => vegas_get_result
+<<Vegas: sub interfaces>>=
+    module function vegas_get_result (self) result (result)
+      type(vegas_result_t) :: result
+      class(vegas_t), intent(in) :: self
+    end function vegas_get_result
 <<Vegas: procedures>>=
-  type(vegas_result_t) function vegas_get_result (self) result (result)
+  module function vegas_get_result (self) result (result)
+    type(vegas_result_t) :: result
     class(vegas_t), intent(in) :: self
     result = self%result
   end function vegas_get_result
 
 @ %def vegas_get_result
 @ Set the result object. Be reminded, that you will loose your current results,
 if you are not careful! Hence, it will only be avaible during usage with
 [[MPI]].
 <<MPI: Vegas: vegas: TBP>>=
   procedure, public :: set_result => vegas_set_result
+<<MPI: Vegas: sub interfaces>>=
+    module subroutine vegas_set_result (self, result)
+      class(vegas_t), intent(inout) :: self
+      type(vegas_result_t), intent(in) :: result
+    end subroutine vegas_set_result
 <<MPI: Vegas: procedures>>=
-  subroutine vegas_set_result (self, result)
+  module subroutine vegas_set_result (self, result)
     class(vegas_t), intent(inout) :: self
     type(vegas_result_t), intent(in) :: result
     self%result = result
   end subroutine vegas_set_result
 
 @ %def vegas_set_result
 @ Get (actual) number of calls.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_calls => vegas_get_n_calls
+<<Vegas: sub interfaces>>=
+    elemental module function vegas_get_n_calls (self) result (n_calls)
+      class(vegas_t), intent(in) :: self
+      real(default) :: n_calls
+    end function vegas_get_n_calls
 <<Vegas: procedures>>=
-  elemental real(default) function vegas_get_n_calls (self) result (n_calls)
+  elemental module function vegas_get_n_calls (self) result (n_calls)
     class(vegas_t), intent(in) :: self
+    real(default) :: n_calls
     n_calls = self%config%n_calls
   end function vegas_get_n_calls
 
 @ %def vegas_get_n_calls
 @ Get the cumulative result of the integration. Recalculate weighted average of the integration.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_integral => vegas_get_integral
+<<Vegas: sub interfaces>>=
+    elemental module function vegas_get_integral (self) result (integral)
+      class(vegas_t), intent(in) :: self
+      real(default) :: integral
+    end function vegas_get_integral
 <<Vegas: procedures>>=
-  elemental real(default) function vegas_get_integral (self) result (integral)
+  elemental module function vegas_get_integral (self) result (integral)
     class(vegas_t), intent(in) :: self
+    real(default) :: integral
     integral = 0.
     if (self%result%sum_wgts > 0.) then
        integral = self%result%sum_int_wgtd / self%result%sum_wgts
     end if
   end function vegas_get_integral
 
 @ %def vegas_get_integral
 @ Get the cumulative variance of the integration. Recalculate the variance.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_variance => vegas_get_variance
+<<Vegas: sub interfaces>>=
+    elemental module function vegas_get_variance (self) result (variance)
+      class(vegas_t), intent(in) :: self
+      real(default) :: variance
+    end function vegas_get_variance
 <<Vegas: procedures>>=
-  elemental real(default) function vegas_get_variance (self) result (variance)
+  elemental module function vegas_get_variance (self) result (variance)
     class(vegas_t), intent(in) :: self
+    real(default) :: variance
     variance = 0.
     if (self%result%sum_wgts > 0.) then
        variance = 1.0 / self%result%sum_wgts
     end if
   end function vegas_get_variance
 
 @ %def vegas_get_variance
 @ Get efficiency.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_efficiency => vegas_get_efficiency
+<<Vegas: sub interfaces>>=
+    elemental module function vegas_get_efficiency (self) result (efficiency)
+      class(vegas_t), intent(in) :: self
+      real(default) :: efficiency
+    end function vegas_get_efficiency
 <<Vegas: procedures>>=
-  elemental real(default) function vegas_get_efficiency (self) result (efficiency)
+  elemental module function vegas_get_efficiency (self) result (efficiency)
     class(vegas_t), intent(in) :: self
+    real(default) :: efficiency
     efficiency = 0.
     if (self%result%efficiency > 0. ) then
        efficiency = self%result%efficiency
     end if
   end function vegas_get_efficiency
 
 @ %def vegas_get_efficiency
 @ Get [[f_max]].
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_max_abs_f => vegas_get_max_abs_f
+<<Vegas: sub interfaces>>=
+    elemental module function vegas_get_max_abs_f (self) result (max_abs_f)
+      class(vegas_t), intent(in) :: self
+      real(default) :: max_abs_f
+    end function vegas_get_max_abs_f
 <<Vegas: procedures>>=
-  elemental real(default) function vegas_get_max_abs_f (self) result (max_abs_f)
+  elemental module function vegas_get_max_abs_f (self) result (max_abs_f)
     class(vegas_t), intent(in) :: self
+    real(default) :: max_abs_f
     max_abs_f = 0.
     if (self%result%max_abs_f > 0.) then
        max_abs_f = self%result%max_abs_f
     end if
   end function vegas_get_max_abs_f
 
 @ %def vegas_get_max_abs_f
 @ Get sum of absolute (positive and negative) values.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_sum_abs_f_pos => vegas_get_sum_abs_f_pos
   procedure, public :: get_sum_abs_f_neg => vegas_get_sum_abs_f_neg
+<<Vegas: sub interfaces>>=
+    elemental module function vegas_get_sum_abs_f_pos (self) result (sum_abs_f)
+      class(vegas_t), intent(in) :: self
+      real(default) :: sum_abs_f
+    end function vegas_get_sum_abs_f_pos
+    elemental module function vegas_get_sum_abs_f_neg (self) result (sum_abs_f)    
+      class(vegas_t), intent(in) :: self
+      real(default) :: sum_abs_f
+    end function vegas_get_sum_abs_f_neg
 <<Vegas: procedures>>=
-  elemental real(default) function vegas_get_sum_abs_f_pos (self) result (sum_abs_f)
+  elemental module function vegas_get_sum_abs_f_pos (self) result (sum_abs_f)
     class(vegas_t), intent(in) :: self
+    real(default) :: sum_abs_f
     sum_abs_f = self%result%efficiency_pos * self%result%max_abs_f_pos
   end function vegas_get_sum_abs_f_pos
 
-  elemental real(default) function vegas_get_sum_abs_f_neg (self) result (sum_abs_f)
+  elemental module function vegas_get_sum_abs_f_neg (self) result (sum_abs_f)    
     class(vegas_t), intent(in) :: self
+    real(default) :: sum_abs_f
     sum_abs_f = self%result%efficiency_neg * self%result%max_abs_f_neg
   end function vegas_get_sum_abs_f_neg
 
 @ %def vegas_get_sum_abs_f_pos, vegas_get_sum_abs_f_neg
 @ Get [[f_max_pos]].
+<<Vegas: sub interfaces>>=
 <<Vegas: vegas: TBP>>=
-procedure, public :: get_max_abs_f_pos => vegas_get_max_abs_f_pos
+  procedure, public :: get_max_abs_f_pos => vegas_get_max_abs_f_pos
+<<Vegas: sub interfaces>>=
+    elemental module function vegas_get_max_abs_f_pos (self) result (max_abs_f)
+      class(vegas_t), intent(in) :: self
+      real(default) :: max_abs_f
+    end function vegas_get_max_abs_f_pos
 <<Vegas: procedures>>=
-elemental real(default) function vegas_get_max_abs_f_pos (self) result (max_abs_f)
-  class(vegas_t), intent(in) :: self
-  max_abs_f = 0.
-  if (self%result%max_abs_f_pos > 0.) then
-     max_abs_f = self%result%max_abs_f_pos
-  end if
-end function vegas_get_max_abs_f_pos
+  elemental module function vegas_get_max_abs_f_pos (self) result (max_abs_f)
+    class(vegas_t), intent(in) :: self
+    real(default) :: max_abs_f
+    max_abs_f = 0.
+    if (self%result%max_abs_f_pos > 0.) then
+       max_abs_f = self%result%max_abs_f_pos
+    end if
+  end function vegas_get_max_abs_f_pos
 
 @ %def vegas_get_max_abs_f_pos
 @ Get [[f_max_neg]].
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_max_abs_f_neg => vegas_get_max_abs_f_neg
+<<Vegas: sub interfaces>>=
+    elemental module function vegas_get_max_abs_f_neg (self) result (max_abs_f)
+      class(vegas_t), intent(in) :: self
+      real(default) :: max_abs_f
+    end function vegas_get_max_abs_f_neg
 <<Vegas: procedures>>=
-  elemental real(default) function vegas_get_max_abs_f_neg (self) result (max_abs_f)
+  elemental module function vegas_get_max_abs_f_neg (self) result (max_abs_f)
     class(vegas_t), intent(in) :: self
+    real(default) :: max_abs_f
     max_abs_f = 0.
     if (self%result%max_abs_f_neg > 0.) then
        max_abs_f = self%result%max_abs_f_neg
     end if
   end function vegas_get_max_abs_f_neg
 
 @ %def vegas_get_max_abs_f_neg
 @ Get event weight and excess.
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_evt_weight => vegas_get_evt_weight
   procedure, public :: get_evt_weight_excess => vegas_get_evt_weight_excess
+<<Vegas: sub interfaces>>=
+    module function vegas_get_evt_weight (self) result (evt_weight)
+      class(vegas_t), intent(in) :: self
+      real(default) :: evt_weight
+    end function vegas_get_evt_weight
+    module function vegas_get_evt_weight_excess (self) result (evt_weight_excess)
+      class(vegas_t), intent(in) :: self
+      real(default) :: evt_weight_excess
+    end function vegas_get_evt_weight_excess
 <<Vegas: procedures>>=
-  real(default) function vegas_get_evt_weight (self) result (evt_weight)
+  module function vegas_get_evt_weight (self) result (evt_weight)
     class(vegas_t), intent(in) :: self
+    real(default) :: evt_weight
     evt_weight = self%result%evt_weight
   end function vegas_get_evt_weight
 
-  real(default) function vegas_get_evt_weight_excess (self) result (evt_weight_excess)
+  module function vegas_get_evt_weight_excess (self) result (evt_weight_excess)
     class(vegas_t), intent(in) :: self
+    real(default) :: evt_weight_excess
     evt_weight_excess = self%result%evt_weight_excess
   end function vegas_get_evt_weight_excess
 
 @ %def vegas_get_evt_weight, vegas_get_evt_weight_excess
 @ Get and set distribution.
 
 We allow the distribution to be manipulated by an external call.
 The integration result cannot be changed by this, however, the error behavior may worsen and the efficiency may fall pretty low.
 But, also, the opposite is possible, see [[vamp2_equivalences]].
 <<Vegas: vegas: TBP>>=
   procedure, public :: get_distribution => vegas_get_distribution
   procedure, public :: set_distribution => vegas_set_distribution
+<<Vegas: sub interfaces>>=
+    module function vegas_get_distribution (self) result (d)
+      class(vegas_t), intent(in) :: self
+      real(default), dimension(:, :), allocatable :: d
+    end function vegas_get_distribution
+    module subroutine vegas_set_distribution (self, d)
+      class(vegas_t), intent(inout) :: self
+      real(default), dimension(:, :), intent(in) :: d
+    end subroutine vegas_set_distribution
 <<Vegas: procedures>>=
-  function vegas_get_distribution (self) result (d)
+  module function vegas_get_distribution (self) result (d)
     class(vegas_t), intent(in) :: self
     real(default), dimension(:, :), allocatable :: d
     d = self%d
   end function vegas_get_distribution
 
-  subroutine vegas_set_distribution (self, d)
+  module subroutine vegas_set_distribution (self, d)
     class(vegas_t), intent(inout) :: self
     real(default), dimension(:, :), intent(in) :: d
     if (size (d, dim = 2) /= self%config%n_dim) then
        call msg_bug ("[VEGAS] set_distribution: new distribution has wrong size of dimension")
     end if
     if (size (d, dim = 1) /= self%config%n_bins_max) then
        call msg_bug ("[VEGAS] set_distribution: new distribution has wrong number of bins")
     end if
     self%d = d
   end subroutine vegas_set_distribution
 
 @ %def vegas_get_distribution, vegas_set_distribution
 @ Allocate result handler from VEGAS integrator object.
+Gfortran 7/8/9 bug, has to remain in the main module:
 <<MPI: Vegas: vegas: TBP>>=
   procedure, public :: allocate_handler => vegas_allocate_handler
-<<MPI: Vegas: procedures>>=
+<<MPI: Vegas: main procedures>>=
   subroutine vegas_allocate_handler (self, handler_id, handler)
     class(vegas_t), intent(in), target :: self
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer, intent(out) :: handler
     allocate (vegas_handler_t :: handler)
     select type (handler)
     type is (vegas_handler_t)
        call handler%init (handler_id, result = self%result, d = self%d)
     end select
   end subroutine vegas_allocate_handler
 
 @ %def vegas_allocate_handler
 @ \section{Grid resize- and refinement}
 \label{sec:grid-resize-refin}
 
 Before integration the grid itself must be initialised. Given the number of
 [[n_calls]] and [[n_dim]] we prepare the grid for the integration.
 
 The grid is binned according to the VEGAS mode and [[n_calls]]. If the mode is
 not set to [[vegas_importance_only]], the grid is divided in to equal boxes. We
 try for 2 calls per box
 \begin{equation}
   boxes = \sqrt[n_{dim}]{\frac{calls}{2}}.
 \end{equation}
 If the numbers of boxes exceeds the number of bins, which is the case for low
 dimensions, the algorithm switches to stratified sampling. Otherwise, we are
 still using importance sampling, but keep the boxes for book keeping. If the
 number of bins changes from the previous invocation, bins are expanded or
 contracted accordingly, while preserving bin density.
 <<Vegas: vegas: TBP>>=
   procedure, private :: init_grid => vegas_init_grid
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_init_grid (self)
+      class(vegas_t), intent(inout) :: self
+    end subroutine vegas_init_grid
 <<Vegas: procedures>>=
-  subroutine vegas_init_grid (self)
+  module subroutine vegas_init_grid (self)
     class(vegas_t), intent(inout) :: self
     integer :: n_bins, n_boxes, box_per_bin, n_total_boxes
     real(default), dimension(:, :), allocatable :: w
     n_bins = self%config%n_bins_max
     n_boxes = 1
     if (self%config%mode /= VEGAS_MODE_IMPORTANCE_ONLY) then
        ! We try for 2 calls per box
        n_boxes = max (floor ((self%config%n_calls / 2.)**(1. / self%config%n_dim)), 1)
        self%config%mode = VEGAS_MODE_IMPORTANCE
        if (2 * n_boxes >= self%config%n_bins_max) then
           ! if n_bins/box < 2
           box_per_bin = max (n_boxes / self%config%n_bins_max, 1)
           n_bins =  min (n_boxes / box_per_bin, self%config%n_bins_max)
           n_boxes = box_per_bin * n_bins
           self%config%mode = VEGAS_MODE_STRATIFIED
        end if
     end if
     n_total_boxes = n_boxes**self%config%n_dim
     self%config%calls_per_box = max (floor (real (self%config%n_calls) / n_total_boxes), 2)
     self%config%n_calls = self%config%calls_per_box * n_total_boxes
     !  Total volume of x-space/(average n_calls per bin)
     self%jacobian = self%hypercube_volume * real(n_bins, default)&
          &**self%config%n_dim / real(self%config%n_calls, default)
     self%config%n_boxes = n_boxes
     if (n_bins /= self%config%n_bins) then
        allocate (w(self%config%n_bins, self%config%n_dim), source=1.0_default)
        call self%grid%resize (n_bins, w)
        self%config%n_bins = n_bins
     end if
   end subroutine vegas_init_grid
 
 @ %def vegas_init_grid
 @ Reset the cumulative result, and efficiency and max. grid values.
 <<Vegas: vegas: TBP>>=
   procedure, public :: reset_result => vegas_reset_result
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_reset_result (self)
+      class(vegas_t), intent(inout) :: self
+    end subroutine vegas_reset_result
 <<Vegas: procedures>>=
-  subroutine vegas_reset_result (self)
+  module subroutine vegas_reset_result (self)
     class(vegas_t), intent(inout) :: self
     call self%result%reset ()
   end subroutine vegas_reset_result
 
 @ %def vegas_reset_results
 @ Reset the grid. Purge the adapted grid and the distribution.
 Furthermore, reset the results. The maximal size of the grid remains.
 
 Note: Handle [[vegas_reset_grid]] with great care! Instead of reusing an old
 object, create a new one.
 <<Vegas: vegas: TBP>>=
   procedure, public :: reset_grid => vegas_reset_grid
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_reset_grid (self)
+      class(vegas_t), intent(inout) :: self
+    end subroutine vegas_reset_grid
 <<Vegas: procedures>>=
-  subroutine vegas_reset_grid (self)
+  module subroutine vegas_reset_grid (self)
     class(vegas_t), intent(inout) :: self
     self%config%n_bins = 1
     self%d = 0._default
     self%grid%xi = 0._default
     self%grid%xi(1, :) = 0._default
     self%grid%xi(2, :) = 1._default
     call self%reset_result ()
   end subroutine vegas_reset_grid
 
 @ %def vegas_reset_grid
 @ Refine the grid to match the distribution [[d]].
 
 Average the distribution over neighbouring bins, then contract or expand the bins.
 The averaging dampens high fluctuations amog the integrand or the variance.
 
 We make the type-bound procedure public accessible because the multi-channel
 integration refines each grid after integration over all grids.
 <<Vegas: vegas: TBP>>=
   procedure, public :: refine => vegas_refine_grid
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_refine_grid (self, average)
+      class(vegas_t), intent(inout) :: self
+      logical, intent(in), optional :: average
+    end subroutine vegas_refine_grid
 <<Vegas: procedures>>=
-  subroutine vegas_refine_grid (self, average)
+  module subroutine vegas_refine_grid (self, average)
     class(vegas_t), intent(inout) :: self
     logical, intent(in), optional :: average
     logical :: opt_average
     opt_average = .true.; if (present (average)) opt_average = average
     if (opt_average) call self%average_distribution ()
     call self%grid%resize (self%config%n_bins, self%d(:self%config%n_bins, :))
   end subroutine vegas_refine_grid
 
 @ %def vegas_refine_grid
 @ We average the collected values [[d]] of the (sq.) weighted [[f]] over
 neighbouring bins. The averaged [[d]] are then agian damped by a logarithm to
 enhance numerical stability. The results are then the refinement weights [[w]].
 
 We have to take care of the special case where we have a very low sampling rate.
 In those cases we can not be sure that the distribution is satisfying filled,
 although we have already averaged over neighbouring bins. This will lead to a
 squashing of the unfilled bins and such the boundaries of those will be pushed
 together. We circumvent this problem by setting those unfilled bins to the
 smallest representable value of a default real.
 
 The problem becomes very annoying in the multi-channel formualae where have to
 look up via binary search the corresponding probability of [[x]] and if the
 width is zero, the point will be neglected.
 
-Another issue arises when [[NaN]]s appear during sampling.
-A single [[NaN]] may spoil the complete adaption as the refinement depends on all weights for a single axis, c.f. [[vegas_grid_resize]] for the details of the mechanism.
-Therefore, if we find a single (or more) [[NaN]]s in the distribution of an axis, then we won't adapt that specific axis by setting the weights to one.
-Hence, we skip the adaption for parts of the grid, where [[NaN]] appeared during evaluation.
+Another issue arises when [[NaN]]s appear during sampling. A single
+[[NaN]] may spoil the complete adaption as the refinement depends on
+all weights for a single axis, c.f. [[vegas_grid_resize]] for the
+details of the mechanism. Therefore, if we find a single (or more)
+[[NaN]]s in the distribution of an axis, then we won't adapt that
+specific axis by setting the weights to one. Hence, we skip the
+adaption for parts of the grid, where [[NaN]] appeared during
+evaluation.
 <<Vegas: vegas: TBP>>=
   procedure, private :: average_distribution => vegas_average_distribution
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_average_distribution (self)
+      class(vegas_t), intent(inout) :: self
+    end subroutine vegas_average_distribution
 <<Vegas: procedures>>=
-  subroutine vegas_average_distribution (self)
+  module subroutine vegas_average_distribution (self)
     !! use, intrinsic :: ieee_arithmetic, only: ieee_is_nan
     class(vegas_t), intent(inout) :: self
     integer :: j
     ndim: do j = 1, self%config%n_dim
        associate (d => self%d(:self%config%n_bins, j), &
             n_bins => self%config%n_bins)
          !! Non-portable and compiler-flag sensistive test, may replace with ieee_is_nan
          if (any (d /= d)) then
             d = 1.0_default
             cycle ndim
          end if
          if (n_bins > 2) then
             d(1) = (d(1) + d(2)) / 2.0_default
             d(2:n_bins - 1) = (d(1:n_bins - 2) + d(2:n_bins - 1) + d(3:n_bins)) /&
                  & 3.0_default
             d(n_bins) = d(n_bins - 1) + d(n_bins) / 2.0_default
          end if
          if (all (d < tiny (d))) then
             d = 1.0_default; cycle ndim
          end if
          d = d / sum (d)
          where (d < tiny (1.0_default))
             d = tiny (1.0_default)
          end where
          where (d /= 1.0_default)
             d = ((d - 1.0_default) / log(d))**self%config%alpha
          elsewhere
             ! Analytic limes for d -> 1
             d = 1.0_default
          end where
        end associate
     end do ndim
   end subroutine vegas_average_distribution
 
 @ %def vegas_average_distribution
 @
 \section{Integration}
 \label{sec:integration}
 
 Integrate [[func]], in the previous set bounds [[x_lower]] to [[x_upper]], with [[n_calls]].
 Use results from previous invocations of [[integrate]] with [[opt_reset_result =
 .false.]] and better with subsequent calls.
 
 Before we walk through the hybercube, we initialise the grid (at a central position).
 
 We step through the (equidistant) boxes which ensure we do not miss any place in the n-dim. hypercube.
 In each box we sample [[calls_per_box]] random points and transform them to bin coordinates.
 The total integral and the total (sample) variance over each box $i$ is then calculated by
 \begin{align*}
   E(I)_{i} = \sum_{j}^{\text{calls per box}} I_{i, j}, \\
   V(I)_{i} = \text{calls per box} \frac{\sum_{j}^{\text{calls per box}}} I_{i, j}^{2} - (\sum_{j}^{\text{calls per box}} I_{i, j})**2 \frac{\text{calls per box}}{\text{calls per box} - 1}.
 \end{align*}
 The stratification of the $n$-dimensional hybercube allows a simple
 parallelisation of the algorithm (R. Kreckel, "Parallelization of adaptive MC integrators", Computer Physics Communications, vol. 106, no. 3, pp. 258–266, Nov. 1997.).
 We have to ensure that all boxes are sampled, but the number of boxes to
 distribute is too large. We allow each thread to sample a fraction $r$ of all
 boxes $k$ such that $r \ll k$. Furthermore, we constrain that the number of
 process $p$ is much smaller than $r$.
 
 The overall constraint is
 \begin{equation}
 p \ll r \ll k.
 \end{equation}
 We divide the intgeration into a parallel and
 a perpendicular subspace. The number of parallel dimensions is $D_{\parallel} =
 \lfloor \frac{D}{2} \rfloor$.
 <<Vegas: vegas: TBP>>=
   procedure, public :: integrate => vegas_integrate
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_integrate (self, func, rng, iterations, reset_result, &
+         refine_grid, verbose, result, abserr)
+      class(vegas_t), intent(inout) :: self
+      class(vegas_func_t), intent(inout) :: func
+      class(rng_t), intent(inout) :: rng
+      integer, intent(in), optional :: iterations
+      logical, intent(in), optional :: reset_result
+      logical, intent(in), optional :: refine_grid
+      logical, intent(in), optional :: verbose
+      real(default), optional, intent(out) :: result, abserr
+    end subroutine vegas_integrate
 <<Vegas: procedures>>=
-  subroutine vegas_integrate (self, func, rng, iterations, reset_result,&
-       & refine_grid, verbose, result, abserr)
+  module subroutine vegas_integrate (self, func, rng, iterations, reset_result, &
+       refine_grid, verbose, result, abserr)
     class(vegas_t), intent(inout) :: self
     class(vegas_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     integer, intent(in), optional :: iterations
     logical, intent(in), optional :: reset_result
     logical, intent(in), optional :: refine_grid
     logical, intent(in), optional :: verbose
     real(default), optional, intent(out) :: result, abserr
     integer :: it, k
     real(default), dimension(self%config%n_dim) :: x
     real(default) :: fval, fval_sq, bin_volume
     real(default) :: fval_box, fval_sq_box
     real(default) :: total_integral, total_sq_integral
     real(default) :: cumulative_int, cumulative_std
     real(default) :: sum_abs_f_pos, max_abs_f_pos
     real(default) :: sum_abs_f_neg, max_abs_f_neg
     logical :: opt_reset_result
     logical :: opt_refine_grid
     logical :: opt_verbose
     integer :: n_size
     integer :: n_dim_par
     logical :: box_success
   <<Vegas: vegas integrate: variables>>
     call set_options ()
     call self%init_grid ()
     if (opt_reset_result) call self%result%reset ()
     self%result%it_start = self%result%it_num
     cumulative_int = 0.
     cumulative_std = 0.
     n_dim_par = floor (self%config%n_dim / 2.)
     n_size = 1
   <<Vegas: vegas integrate: init>>
     if (opt_verbose) then
        call msg_message ("Results: [it, calls, integral, error, chi^2, eff.]")
     end if
     iteration: do it = 1, self%config%iterations
        self%result%it_num = self%result%it_start + it
        self%d = 0.
        self%box = 1
        self%bin = 1
        total_integral = 0.
        total_sq_integral = 0.
        sum_abs_f_pos = 0.
        max_abs_f_pos = 0.
        sum_abs_f_neg = 0.
        max_abs_f_neg = 0.
        box_success = .true.
        select type (rng)
        type is (rng_stream_t)
           call rng%next_substream ()
        end select
      <<Vegas: vegas integrate: presampling>>
        loop_over_par_boxes: do while (box_success)
           loop_over_perp_boxes: do while (box_success)
              fval_box = 0._default
              fval_sq_box = 0._default
              do k = 1, self%config%calls_per_box
                 call self%random_point (rng, x, bin_volume)
                 ! Call the function, yeah, call it...
                 fval = self%jacobian * bin_volume * func%evaluate (x)
                 fval_sq = fval**2
                 fval_box = fval_box + fval
                 fval_sq_box = fval_sq_box + fval_sq
                 if (fval > 0.) then
                    max_abs_f_pos = max(abs (fval), max_abs_f_pos)
                    sum_abs_f_pos = sum_abs_f_pos + abs(fval)
                 else
                    max_abs_f_neg = max(abs (fval), max_abs_f_neg)
                    sum_abs_f_neg = sum_abs_f_neg + abs(fval)
                 end if
                 if (self%config%mode /= VEGAS_MODE_STRATIFIED) then
                    call self%accumulate_distribution (fval_sq)
                 end if
              end do
              fval_sq_box = sqrt (fval_sq_box * self%config%calls_per_box)
              ! (a - b) * (a + b) = a**2 - b**2
              fval_sq_box = (fval_sq_box - fval_box) * (fval_sq_box + fval_box)
              if (fval_sq_box <= 0.0) fval_sq_box = fval_box**2 * epsilon (1.0_default)
              total_integral = total_integral + fval_box
              total_sq_integral = total_sq_integral + fval_sq_box
              if (self%config%mode == VEGAS_MODE_STRATIFIED) then
                 call self%accumulate_distribution (fval_sq_box)
              end if
              call increment_box_coord (self%box(n_dim_par + 1:self%config&
                   &%n_dim), box_success)
           end do loop_over_perp_boxes
           shift: do k = 1, n_size
              call increment_box_coord (self%box(1:n_dim_par), box_success)
              if (.not. box_success) exit shift
           end do shift
         <<Vegas: vegas integrate: loop>>
        end do loop_over_par_boxes
      <<Vegas: vegas integrate: postsampling>>
        associate (result => self%result)
          ! Compute final results for this iterations
          call result%update (total_integral, variance = &
                total_sq_integral / (self%config%calls_per_box - 1._default))
          call result%update_efficiency (n_calls = self%config%n_calls, &
               max_pos = max_abs_f_pos, max_neg = max_abs_f_neg, &
               sum_pos = sum_abs_f_pos, sum_neg = sum_abs_f_neg)
          cumulative_int = result%sum_int_wgtd / result%sum_wgts
          cumulative_std = sqrt (1 / result%sum_wgts)
        end associate
        if (opt_verbose) then
           write (msg_buffer, "(I0,1x,I0,1x, 4(E24.16E4,1x))") &
                & it, self%config%n_calls, cumulative_int, cumulative_std, &
                & self%result%chi2, self%result%efficiency
           call msg_message ()
        end if
        if (opt_refine_grid) then
           call self%refine (average = .true.)
        else
           !! Skip grid refinement, but average the (grid) distribution.
           !! \note Now, we always average and dampen the distribution,
           !!       even when not adapting (e.g. final pass).
           call self%average_distribution ()
        end if
     end do iteration
     if (present(result)) result = cumulative_int
     if (present(abserr)) abserr = abs(cumulative_std)
   contains
   <<Vegas: vegas integrate: procedures>>
   end subroutine vegas_integrate
 
 @ %def vegas_integrate
 @ Set optional arguments of [[vegas_integrate]].
 <<Vegas: vegas integrate: procedures>>=
   subroutine set_options ()
     if (present (iterations)) self%config%iterations = iterations
     opt_reset_result = .true.
     if (present (reset_result)) opt_reset_result = reset_result
     opt_refine_grid = .true.
     if (present (refine_grid)) opt_refine_grid = refine_grid
     opt_verbose = .false.
     if (present (verbose)) opt_verbose = verbose
   end subroutine set_options
 
 @ We define additional chunk, which will be used later on for inserting
 MPI/general MPI code. The code is then removed by additional noweb filter if
 not compiled with the correspondig compiler flag.
 
 Overall variables, some additionally introduced due to the MPI parallelization
 and needed in sequentiell verison.
 <<Vegas: vegas integrate: variables>>=
 @
 @ Overall initialization.
 <<Vegas: vegas integrate: init>>=
 @
 @ Reset all last-iteration results before sampling.
 <<Vegas: vegas integrate: presampling>>=
 @
 @ Adjust rng between parallel and perpendicular loops.
 <<Vegas: vegas integrate: loop>>=
 @
 <<Vegas: vegas integrate: postsampling>>=
 @ Increment the box coordinates by 1. If we reach the largest value for the
 current axis (starting with the largest dimension number), we reset the counter
 to 1 and increment the next axis counter by 1. And so on, until we reach the
 maximum counter value of the axis with the lowest dimension, then we set
 [[success]] to false and the box coord is set to 1.
 <<Vegas: vegas integrate: procedures>>=
   subroutine increment_box_coord (box, success)
     integer, dimension(:), intent(inout) :: box
     logical, intent(out) :: success
     integer :: j
     success = .true.
     do j = size (box), 1, -1
        box(j) = box(j) + 1
        if (box(j) <= self%config%n_boxes) return
        box(j) = 1
     end do
     success = .false.
   end subroutine increment_box_coord
 
 @ %def increment_box_coord
 @ We parallelize [[VEGAS]] in simple forward manner. The hyper-cube is dissambled
 in to equidistant boxes in which we sample the integrand [[calls_per_box]]
 times. The workload of calculating those boxes is distributed along the worker.
 
 The number of dimensions which will be parallelised are $\lfloor \frac{D}{2}
 \rfloor$, such MPI Variables for [[vegas_integrate]]. We have to duplicate all
 buffers for [[MPI_Ireduce]], because we cannot use the same send or receive
 buffer.
 
 We temporarily store a (empty) grid, before communicating.
 <<MPI: Vegas: vegas integrate: variables>>=
   integer :: rank
   logical :: parallel_mode
   type(vegas_grid_t) :: grid
 @ MPI procedure-specific initialization.
 
 Allow for (external) veto on parallel mode.
 <<MPI: Vegas: vegas integrate: init>>=
   parallel_mode = self%parallel_mode .and. self%is_parallelizable ()
   if (parallel_mode) then
      call MPI_Comm_size (self%comm, n_size)
      call MPI_Comm_rank (self%comm, rank)
   else
      n_size = 1
      rank = 0
   end if
 @ Pre-sampling communication.
 
 We make a copy of the (actual) grid, which is unfilled when non-root.
 The actual grid is then broadcasted among the workers and inserted into the
 [[VEGAS]] object.
 <<MPI: Vegas: vegas integrate: presampling>>=
   if (parallel_mode) then
      grid = self%get_grid ()
      call grid%broadcast (self%comm)
      call self%set_grid (grid)
 @ Start index of the boxes for different ranks. If the random number generator
 is RngStream, we can advance the current stream in such a way, that we will
 getting matching numbers. Iff [[n_boxes]] is larger than 2, otherwise
 parallelization is useless.
 <<MPI: Vegas: vegas integrate: presampling>>=
      do k = 1, rank
         call increment_box_coord (self%box(1:n_dim_par), box_success)
         if (.not. box_success) exit
      end do
      select type (rng)
      type is (rng_stream_t)
         call rng%advance_state (self%config%n_dim * self%config%calls_per_box&
              & * self%config%n_boxes**(self%config%n_dim - n_dim_par) * rank)
      end select
   end if
 @ Increment [[n_size]] times the box coordinates.
 <<MPI: Vegas: vegas integrate: loop>>=
   if (parallel_mode) then
      select type (rng)
      type is (rng_stream_t)
         call rng%advance_state (self%config%n_dim * self%config%calls_per_box&
              & * self%config%n_boxes**(self%config%n_dim - n_dim_par) * (n_size - 1))
      end select
   end if
 @ Call to [[vegas_integrate_collect]].
 <<MPI: Vegas: vegas integrate: postsampling>>=
   if (parallel_mode) then
      call vegas_integrate_collect ()
      if (rank /= 0) cycle iteration
   end if
 @ Reduce (in an non-blocking fashion) all sampled information via [[MPI_SUM]] or [[MPI_MAX]].
 <<MPI: Vegas: vegas integrate: procedures>>=
   subroutine vegas_integrate_collect ()
     integer :: j
     real(default) :: root_total_integral, root_total_sq_integral
     real(default) :: root_sum_abs_f_pos, root_max_abs_f_pos
     real(default) :: root_sum_abs_f_neg, root_max_abs_f_neg
     real(default), dimension(self%config%n_bins_max, self%config%n_dim) :: root_d
     type(MPI_Request), dimension(self%config%n_dim + 6) :: status
     root_d = 0._default
     root_sum_abs_f_pos = 0._default
     root_sum_abs_f_neg = 0._default
     root_max_abs_f_pos = 0._default
     root_sum_abs_f_neg = 0._default
     root_total_integral = 0._default
     root_total_sq_integral = 0._default
     call MPI_Ireduce (sum_abs_f_pos, root_sum_abs_f_pos, 1, MPI_DOUBLE_PRECISION,&
          & MPI_SUM, 0, self%comm, status(1))
     call MPI_Ireduce (sum_abs_f_neg, root_sum_abs_f_neg, 1, MPI_DOUBLE_PRECISION,&
          & MPI_SUM, 0, self%comm, status(2))
     call MPI_Ireduce (max_abs_f_pos, root_max_abs_f_pos, 1, MPI_DOUBLE_PRECISION,&
          & MPI_MAX, 0, self%comm, status(3))
     call MPI_Ireduce (max_abs_f_neg, root_max_abs_f_neg, 1, MPI_DOUBLE_PRECISION,&
          & MPI_MAX, 0, self%comm, status(4))
     call MPI_Ireduce (total_integral, root_total_integral, 1, MPI_DOUBLE_PRECISION,&
          & MPI_SUM, 0, self%comm, status(5))
     call MPI_Ireduce (total_sq_integral, root_total_sq_integral, 1,&
          & MPI_DOUBLE_PRECISION, MPI_SUM, 0, self%comm, status(6))
     do j = 1, self%config%n_dim
        call MPI_Ireduce (self%d(1:self%config%n_bins, j), root_d(1:self%config&
             &%n_bins, j), self%config%n_bins, MPI_DOUBLE_PRECISION, MPI_SUM, 0,&
             & self%comm, status(6 + j))
     end do
     call MPI_Waitall (self%config%n_dim + 6, status, MPI_STATUSES_IGNORE)
     if (rank == 0) sum_abs_f_pos = root_sum_abs_f_pos
     if (rank == 0) sum_abs_f_neg = root_sum_abs_f_neg
     if (rank == 0) max_abs_f_pos = root_max_abs_f_pos
     if (rank == 0) max_abs_f_neg = root_max_abs_f_neg
     if (rank == 0) total_integral = root_total_integral
     if (rank == 0) total_sq_integral = root_total_sq_integral
     if (rank == 0) self%d = root_d
   end subroutine vegas_integrate_collect
 
 @ %def vegas_integrate_collect
-@ Obtain a random point inside the $n$-dimensional hypercube, transform onto the correct interval and calculate the bin volume. The additional factor [[n_bins]] is already applied to the [[jacobian]] (per dimension).
+@ 
+Obtain a random point inside the $n$-dimensional hypercube, transform
+onto the correct interval and calculate the bin volume. The additional
+factor [[n_bins]] is already applied to the [[jacobian]] (per
+dimension).
 <<Vegas: vegas: TBP>>=
   procedure, private :: random_point => vegas_random_point
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_random_point (self, rng, x, bin_volume)
+      class(vegas_t), intent(inout) :: self
+      class(rng_t), intent(inout) :: rng
+      real(default), dimension(self%config%n_dim), intent(out) :: x
+      real(default), intent(out) :: bin_volume
+    end subroutine vegas_random_point
 <<Vegas: procedures>>=
-  subroutine vegas_random_point (self, rng, x, bin_volume)
+  module subroutine vegas_random_point (self, rng, x, bin_volume)
     class(vegas_t), intent(inout) :: self
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out) :: x
     real(default), intent(out) :: bin_volume
     integer :: j
     real(default) :: r, y, z, bin_width
     bin_volume = 1.
     ndim: do j = 1, self%config%n_dim
        call rng%generate (r)
        z = ((self%box(j) - 1 + r) / self%config%n_boxes) * self%config%n_bins + 1
        self%bin(j) = max (min (int (z), self%config%n_bins), 1)
        if (self%bin(j) == 1) then
           bin_width = self%grid%xi(2, j)
           y = (z - self%bin(j)) * bin_width
        else
           bin_width = self%grid%xi(self%bin(j) + 1, j) - self%grid%xi(self%bin(j), j)
           y = self%grid%xi(self%bin(j), j) + (z - self%bin(j)) * bin_width
        end if
        x(j) = self%grid%x_lower(j) + y * self%grid%delta_x(j)
        bin_volume = bin_volume * bin_width
     end do ndim
   end subroutine vegas_random_point
 
 @ %def vegas_random_point
 @ Obtain a random point inside the $n$-dimensional hyper-cube. We neglect
 stratification and generate the random point in the most simple way. Hence, we
 do not need to know in which box we are actually sampling. This is useful for
 only for event generation.
 <<Vegas: vegas: TBP>>=
   procedure, private :: simple_random_point => vegas_simple_random_point
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_simple_random_point (self, rng, x, bin_volume)
+      class(vegas_t), intent(inout) :: self
+      class(rng_t), intent(inout) :: rng
+      real(default), dimension(self%config%n_dim), intent(out) :: x
+      real(default), intent(out) :: bin_volume
+    end subroutine vegas_simple_random_point
 <<Vegas: procedures>>=
-  subroutine vegas_simple_random_point (self, rng, x, bin_volume)
+  module subroutine vegas_simple_random_point (self, rng, x, bin_volume)
     class(vegas_t), intent(inout) :: self
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out) :: x
     real(default), intent(out) :: bin_volume
     integer :: j, k
     real(default) :: r, y, z, bin_width
     bin_volume = 1.
     ndim: do j = 1, self%config%n_dim
        call rng%generate (r)
        z = r * self%config%n_bins + 1
        k = max (min (int (z), self%config%n_bins), 1)
        if (k == 1) then
           bin_width = self%grid%xi(2, j)
           y = (z - 1) * bin_width
        else
           bin_width = self%grid%xi(k + 1, j) - self%grid%xi(k, j)
           y = self%grid%xi(k, j) + (z - k) * bin_width
        end if
        x(j) = self%grid%x_lower(j) + y * self%grid%delta_x(j)
        bin_volume = bin_volume * bin_width
     end do ndim
   end subroutine vegas_simple_random_point
 
 @ %def vegas_simple_random_point
 @
 <<Vegas: vegas: TBP>>=
   procedure, private :: accumulate_distribution => vegas_accumulate_distribution
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_accumulate_distribution (self, y)
+      class(vegas_t), intent(inout) :: self
+      real(default), intent(in) :: y
+    end subroutine vegas_accumulate_distribution
 <<Vegas: procedures>>=
-  subroutine vegas_accumulate_distribution (self, y)
+  module subroutine vegas_accumulate_distribution (self, y)
     class(vegas_t), intent(inout) :: self
     real(default), intent(in) :: y
     integer :: j
     do j = 1, self%config%n_dim
        self%d(self%bin(j), j) = self%d(self%bin(j), j) + y
     end do
   end subroutine vegas_accumulate_distribution
 
 @ %def vegas_accumulate_distribution
 @ Generate weighted random event.
 
 The weight given by the overall jacobian
 \begin{equation}
- \Delta_{\text{jac}} = \prod_{j=1}^{d} \left( x_j^+ - x_j^- \right) \frac{N_{\text{bins}}^d}{N_{\text{calls}}}
+ \Delta_{\text{jac}} = \prod_{j=1}^{d} \left( x_j^+ - x_j^- \right)
+ \frac{N_{\text{bins}}^d}{N_{\text{calls}}}
 \end{equation}
-includes the overall non-changing factors $\frac{1}{N_{\text{calls}}}$-factor (divisions are
-expensive) and $N_{\text{bins}}^{d}$, the latter combined with [[bin_volume]]
-gives rise to the probability, see [[vegas_init_grid]] for details.
-We have to factor out $N_{\text{calls}}$ to retrieve the correct weight.
+includes the overall non-changing factors
+$\frac{1}{N_{\text{calls}}}$-factor (divisions are expensive) and
+$N_{\text{bins}}^{d}$, the latter combined with [[bin_volume]] gives
+rise to the probability, see [[vegas_init_grid]] for details. We have
+to factor out $N_{\text{calls}}$ to retrieve the correct weight.
 <<Vegas: vegas: TBP>>=
   procedure :: generate_weighted => vegas_generate_weighted_event
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_generate_weighted_event (self, func, rng, x)
+      class(vegas_t), intent(inout) :: self
+      class(vegas_func_t), intent(inout) :: func
+      class(rng_t), intent(inout) :: rng
+      real(default), dimension(self%config%n_dim), intent(inout) :: x
+    end subroutine vegas_generate_weighted_event
 <<Vegas: procedures>>=
-  subroutine vegas_generate_weighted_event (self, func, rng, x)
+  module subroutine vegas_generate_weighted_event (self, func, rng, x)
     class(vegas_t), intent(inout) :: self
     class(vegas_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(inout) :: x
     real(default) :: bin_volume
     call self%simple_random_point (rng, x, bin_volume)
     ! Cancel n_calls from jacobian with n_calls
     self%result%evt_weight = self%config%n_calls * self%jacobian * bin_volume &
          & * func%evaluate (x)
   end subroutine vegas_generate_weighted_event
 
 @ %def vegas_generate_weighted_event
 @ Generate random event. We accept on the rate
 \begin{equation}
   \frac{|f(x)|}{\underset{x}{\max} |f(x)|}.
 \end{equation}
-We keep separate maximum weights for positive and negative weights, and use
-them, accordingly.
-In the case of unweighted event generation, if the current weight exceeds the
-the maximum weight, we update the maximum, accordingly.
+We keep separate maximum weights for positive and negative weights,
+and use them, accordingly. In the case of unweighted event generation,
+if the current weight exceeds the the maximum weight, we update the
+maximum, accordingly.
 <<Vegas: vegas: TBP>>=
   procedure, public :: generate_unweighted=> vegas_generate_unweighted_event
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_generate_unweighted_event (self, func, rng, x)
+      class(vegas_t), intent(inout) :: self
+      class(vegas_func_t), intent(inout) :: func
+      class(rng_t), intent(inout) :: rng
+      real(default), dimension(self%config%n_dim), intent(out) :: x
+    end subroutine vegas_generate_unweighted_event
 <<Vegas: procedures>>=
-  subroutine vegas_generate_unweighted_event (self, func, rng, x)
+  module subroutine vegas_generate_unweighted_event (self, func, rng, x)
     class(vegas_t), intent(inout) :: self
     class(vegas_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out) :: x
     real(default) :: bin_volume
     real(default) :: max_abs_f
     real(default) :: r
     associate (result => self%result)
       generate: do
          call self%generate_weighted (func, rng, x)
          max_abs_f = merge (result%max_abs_f_pos, result%max_abs_f_neg, &
               & result%evt_weight > 0.)
          if (result%evt_weight > max_abs_f) then
             result%evt_weight_excess = &
                  & result%evt_weight / max_abs_f - 1._default
             exit generate
          end if
          call rng%generate (r)
          ! Do not use division, because max_abs_f could be zero.
          if (max_abs_f * r <= abs(result%evt_weight)) then
           exit generate
          end if
       end do generate
     end associate
   end subroutine vegas_generate_unweighted_event
 
 @ %def vegas_random_event
 \section{I/0 operation}
 \label{sec:i0-operation}
 @ Write grid to file. We use the original VAMP formater.
 <<Vegas: parameters>>=
   character(len=*), parameter, private :: &
      descr_fmt =         "(1X,A)", &
      integer_fmt =       "(1X,A18,1X,I15)", &
      integer_array_fmt = "(1X,I18,1X,I15)", &
      logical_fmt =       "(1X,A18,1X,L1)", &
      double_fmt =        "(1X,A18,1X,E24.16E4)", &
      double_array_fmt =  "(1X,I18,1X,E24.16E4)", &
      double_array_pac_fmt = "(1X,I18,1X,E16.8E4)", &
      double_array2_fmt =  "(1X,2(1X,I8),1X,E24.16E4)", &
      double_array2_pac_fmt = "(1X,2(1X,I8),1X,E16.8E4)"
 
 @ %def descr_fmt integer_fmt integer_array_fmt logical_fmt
 @ %def double_fmt double_array_fmt double_array2_fmt
 <<Vegas: vegas: TBP>>=
   procedure, public :: write_grid => vegas_write_grid
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_write_grid (self, unit)
+      class(vegas_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+    end subroutine vegas_write_grid
 <<Vegas: procedures>>=
-  subroutine vegas_write_grid (self, unit)
+  module subroutine vegas_write_grid (self, unit)
     class(vegas_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer :: u
     integer :: i, j
     u = given_output_unit (unit)
     write (u, descr_fmt) "begin type(vegas_t)"
     write (u, integer_fmt) "n_dim =", self%config%n_dim
     write (u, integer_fmt) "n_bins_max =", self%config%n_bins_max
     write (u, double_fmt) "alpha =", self%config%alpha
     write (u, integer_fmt) "iterations =", self%config%iterations
     write (u, integer_fmt) "mode =", self%config%mode
     write (u, integer_fmt) "calls_per_box =", self%config%calls_per_box
     write (u, integer_fmt) "n_calls =", self%config%n_calls
     write (u, integer_fmt) "n_calls_min =", self%config%n_calls_min
     write (u, integer_fmt) "n_boxes =", self%config%n_boxes
     write (u, integer_fmt) "n_bins =", self%config%n_bins
     write (u, integer_fmt) "it_start =", self%result%it_start
     write (u, integer_fmt) "it_num =", self%result%it_num
     write (u, integer_fmt) "samples =", self%result%samples
     write (u, double_fmt) "sum_int_wgtd =", self%result%sum_int_wgtd
     write (u, double_fmt) "sum_wgts =", self%result%sum_wgts
     write (u, double_fmt) "sum_chi =", self%result%sum_chi
     write (u, double_fmt) "chi2 =", self%result%chi2
     write (u, double_fmt) "efficiency =", self%result%efficiency
     write (u, double_fmt) "efficiency =", self%result%efficiency_pos
     write (u, double_fmt) "efficiency =", self%result%efficiency_neg
     write (u, double_fmt) "max_abs_f =", self%result%max_abs_f
     write (u, double_fmt) "max_abs_f_pos =", self%result%max_abs_f_pos
     write (u, double_fmt) "max_abs_f_neg =", self%result%max_abs_f_neg
     write (u, double_fmt) "result =", self%result%result
     write (u, double_fmt) "std =", self%result%std
     write (u, double_fmt) "hypercube_volume =", self%hypercube_volume
     write (u, double_fmt) "jacobian =", self%jacobian
     write (u, descr_fmt) "begin x_lower"
     do j = 1, self%config%n_dim
        write (u, double_array_fmt) j, self%grid%x_lower(j)
     end do
     write (u, descr_fmt) "end x_lower"
     write (u, descr_fmt) "begin x_upper"
     do j = 1, self%config%n_dim
        write (u, double_array_fmt) j, self%grid%x_upper(j)
     end do
     write (u, descr_fmt) "end x_upper"
     write (u, descr_fmt) "begin delta_x"
     do j = 1, self%config%n_dim
        write (u, double_array_fmt) j, self%grid%delta_x(j)
     end do
     write (u, descr_fmt) "end delta_x"
     write (u, integer_fmt) "n_bins =", self%config%n_bins
     write (u, descr_fmt) "begin bin"
     do j = 1, self%config%n_dim
        write (u, integer_array_fmt) j, self%bin(j)
     end do
     write (u, descr_fmt) "end n_bin"
     write (u, integer_fmt) "n_boxes =", self%config%n_boxes
     write (u, descr_fmt) "begin box"
     do j = 1, self%config%n_dim
        write (u, integer_array_fmt) j, self%box(j)
     end do
     write (u, descr_fmt) "end box"
     write (u, descr_fmt) "begin d"
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max
           write (u, double_array2_fmt) i, j, self%d(i, j)
        end do
     end do
     write (u, descr_fmt) "end d"
     write (u, descr_fmt) "begin xi"
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max + 1
           write (u,  double_array2_fmt) i, j, self%grid%xi(i, j)
        end do
     end do
     write (u, descr_fmt) "end xi"
     write (u, descr_fmt) "end type(vegas_t)"
   end subroutine vegas_write_grid
 
 @ %def vegas_write_grid
 @ Read grid configuration from file.
 <<Vegas: vegas: TBP>>=
   procedure, public :: read_grid => vegas_read_grid
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_read_grid (self, unit)
+      class(vegas_t), intent(out) :: self
+      integer, intent(in) :: unit
+    end subroutine vegas_read_grid
 <<Vegas: procedures>>=
-  subroutine vegas_read_grid (self, unit)
+  module subroutine vegas_read_grid (self, unit)
     class(vegas_t), intent(out) :: self
     integer, intent(in) :: unit
     integer :: i, j
     character(len=80) :: buffer
     integer :: ibuffer, jbuffer
     read (unit, descr_fmt) buffer
     read (unit, integer_fmt) buffer, ibuffer
     read (unit, integer_fmt) buffer, jbuffer
     select type(self)
     type is (vegas_t)
        self = vegas_t (n_dim = ibuffer, n_bins_max = jbuffer)
     end select
     read (unit, double_fmt) buffer, self%config%alpha
     read (unit, integer_fmt) buffer, self%config%iterations
     read (unit, integer_fmt) buffer, self%config%mode
     read (unit, integer_fmt) buffer, self%config%calls_per_box
     read (unit, integer_fmt) buffer, self%config%n_calls
     read (unit, integer_fmt) buffer, self%config%n_calls_min
     read (unit, integer_fmt) buffer, self%config%n_boxes
     read (unit, integer_fmt) buffer, self%config%n_bins
     self%grid%n_bins = self%config%n_bins
     read (unit, integer_fmt) buffer, self%result%it_start
     read (unit, integer_fmt) buffer, self%result%it_num
     read (unit, integer_fmt) buffer, self%result%samples
     read (unit, double_fmt) buffer, self%result%sum_int_wgtd
     read (unit, double_fmt) buffer, self%result%sum_wgts
     read (unit, double_fmt) buffer, self%result%sum_chi
     read (unit, double_fmt) buffer, self%result%chi2
     read (unit, double_fmt) buffer, self%result%efficiency
     read (unit, double_fmt) buffer, self%result%efficiency_pos
     read (unit, double_fmt) buffer, self%result%efficiency_neg
     read (unit, double_fmt) buffer, self%result%max_abs_f
     read (unit, double_fmt) buffer, self%result%max_abs_f_pos
     read (unit, double_fmt) buffer, self%result%max_abs_f_neg
     read (unit, double_fmt) buffer, self%result%result
     read (unit, double_fmt) buffer, self%result%std
     read (unit, double_fmt) buffer, self%hypercube_volume
     read (unit, double_fmt) buffer, self%jacobian
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, double_array_fmt) jbuffer, self%grid%x_lower(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, double_array_fmt) jbuffer, self%grid%x_upper(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, double_array_fmt) jbuffer, self%grid%delta_x(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, integer_fmt) buffer, self%config%n_bins
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, integer_array_fmt) jbuffer, self%bin(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, integer_fmt) buffer, self%config%n_boxes
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, integer_array_fmt) jbuffer, self%box(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max
           read (unit, double_array2_fmt) ibuffer, jbuffer, self%d(i, j)
        end do
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max + 1
           read (unit,  double_array2_fmt) ibuffer, jbuffer, self%grid%xi(i, j)
        end do
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
   end subroutine vegas_read_grid
 
 @ %def vegas_read_grid
 Read and write a grid from an unformatted file.
 <<Vegas: vegas: TBP>>=
   procedure :: write_binary_grid => vegas_write_binary_grid
   procedure :: read_binary_grid => vegas_read_binary_grid
+<<Vegas: sub interfaces>>=
+    module subroutine vegas_write_binary_grid (self, unit)
+      class(vegas_t), intent(in) :: self
+      integer, intent(in) :: unit
+    end subroutine vegas_write_binary_grid
+    module subroutine vegas_read_binary_grid (self, unit)
+      class(vegas_t), intent(out) :: self
+      integer, intent(in) :: unit
+    end subroutine vegas_read_binary_grid
 <<Vegas: procedures>>=
-  subroutine vegas_write_binary_grid (self, unit)
+  module subroutine vegas_write_binary_grid (self, unit)
     class(vegas_t), intent(in) :: self
     integer, intent(in) :: unit
     integer :: i, j
     write (unit) self%config%n_dim
     write (unit) self%config%n_bins_max
     write (unit) self%config%alpha
     write (unit) self%config%iterations
     write (unit) self%config%mode
     write (unit) self%config%calls_per_box
     write (unit) self%config%n_calls
     write (unit) self%config%n_calls_min
     write (unit) self%config%n_boxes
     write (unit) self%config%n_bins
     write (unit) self%result%it_start
     write (unit) self%result%it_num
     write (unit) self%result%samples
     write (unit) self%result%sum_int_wgtd
     write (unit) self%result%sum_wgts
     write (unit) self%result%sum_chi
     write (unit) self%result%chi2
     write (unit) self%result%efficiency
     write (unit) self%result%efficiency_pos
     write (unit) self%result%efficiency_neg
     write (unit) self%result%max_abs_f
     write (unit) self%result%max_abs_f_pos
     write (unit) self%result%max_abs_f_neg
     write (unit) self%result%result
     write (unit) self%result%std
     write (unit) self%hypercube_volume
     write (unit) self%jacobian
     do j = 1, self%config%n_dim
        write (unit) j, self%grid%x_lower(j)
     end do
     do j = 1, self%config%n_dim
        write (unit) j, self%grid%x_upper(j)
     end do
     do j = 1, self%config%n_dim
        write (unit) j, self%grid%delta_x(j)
     end do
     write (unit) self%config%n_bins
     do j = 1, self%config%n_dim
        write (unit) j, self%bin(j)
     end do
     write (unit) self%config%n_boxes
     do j = 1, self%config%n_dim
        write (unit) j, self%box(j)
     end do
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max
           write (unit) i, j, self%d(i, j)
        end do
     end do
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max + 1
           write (unit) i, j, self%grid%xi(i, j)
        end do
     end do
   end subroutine vegas_write_binary_grid
 
-  subroutine vegas_read_binary_grid (self, unit)
+  module subroutine vegas_read_binary_grid (self, unit)
     class(vegas_t), intent(out) :: self
     integer, intent(in) :: unit
     integer :: i, j
     integer :: ibuffer, jbuffer
     read (unit) ibuffer
     read (unit) jbuffer
     select type(self)
     type is (vegas_t)
        self = vegas_t (n_dim = ibuffer, n_bins_max = jbuffer)
     end select
     read (unit) self%config%alpha
     read (unit) self%config%iterations
     read (unit) self%config%mode
     read (unit) self%config%calls_per_box
     read (unit) self%config%n_calls
     read (unit) self%config%n_calls_min
     read (unit) self%config%n_boxes
     read (unit) self%config%n_bins
     self%grid%n_bins = self%config%n_bins
     read (unit) self%result%it_start
     read (unit) self%result%it_num
     read (unit) self%result%samples
     read (unit) self%result%sum_int_wgtd
     read (unit) self%result%sum_wgts
     read (unit) self%result%sum_chi
     read (unit) self%result%chi2
     read (unit) self%result%efficiency
     read (unit) self%result%efficiency_pos
     read (unit) self%result%efficiency_neg
     read (unit) self%result%max_abs_f
     read (unit) self%result%max_abs_f_pos
     read (unit) self%result%max_abs_f_neg
     read (unit) self%result%result
     read (unit) self%result%std
     read (unit) self%hypercube_volume
     read (unit) self%jacobian
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%grid%x_lower(j)
     end do
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%grid%x_upper(j)
     end do
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%grid%delta_x(j)
     end do
     read (unit) self%config%n_bins
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%bin(j)
     end do
     read (unit) self%config%n_boxes
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%box(j)
     end do
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max
           read (unit) ibuffer, jbuffer, self%d(i, j)
        end do
     end do
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max + 1
           read (unit) ibuffer, jbuffer, self%grid%xi(i, j)
        end do
     end do
   end subroutine vegas_read_binary_grid
 
 @ %def vegas_write_binary_grid, vegas_read_binary_grid
 
 \section{Unit tests}
 \label{sec:unit-tests}
 
 Test module, followed by the corresponding implementation module.
 
 <<[[vegas_ut.f90]]>>=
 <<File header>>
 
 module vegas_ut
   use unit_tests
   use vegas_uti
 
 <<Standard module head>>
 
 <<Vegas: public test>>
 
 contains
 <<Vegas: test driver>>
 end module vegas_ut
 @ %def vegas_ut
 @
 <<[[vegas_uti.f90]]>>=
 <<File header>>
 
 module vegas_uti
 <<Use kinds>>
   use io_units
   use constants, only: pi
   use format_defs, only: FMT_10, FMT_12
   use rng_base
   use rng_stream
   use vegas
 
 <<Standard module head>>
 
 <<Vegas: test declaration>>
 
 <<Vegas: test types>>
 
 contains
 <<Vegas: tests>>
 end module vegas_uti
 @ %def vegas_uti
 @ API: driver for the unit tests below.
 <<Vegas: public test>>=
   public :: vegas_test
 <<Vegas: test driver>>=
   subroutine vegas_test (u, results)
     integer, intent(in) :: u
     type(test_results_t), intent(inout) :: results
     <<Vegas: execute tests>>
   end subroutine vegas_test
 
 @ %def vegas_test
 @
 \subsubsection{Test function}
 \label{sec:test-function}
 
 We use the example from the Monte Carlo Examples of the GSL library
 \begin{equation}
   I = \int_{-pi}^{+pi} {dk_x/(2 pi)}  \int_{-pi}^{+pi} {dk_y/(2 pi)}  \int_{-pi}^{+pi} {dk_z/(2 pi)}  1 / (1 - cos(k_x)cos(k_y)cos(k_z)).
 \end{equation}
 The integral is reduced to region (0,0,0) $\rightarrow$ ($\pi$, $\pi$, $\pi$) and multiplied by 8.
 <<Vegas: test types>>=
   type, extends (vegas_func_t) :: vegas_test_func_t
      !
    contains
    <<Vegas: vegas test func: TBP>>
   end type vegas_test_func_t
 
 @ %def vegas_test_func_t
 @ Evaluate the integrand.
 <<Vegas: vegas test func: TBP>>=
   procedure, public :: evaluate => vegas_test_func_evaluate
 <<Vegas: tests>>=
   real(default) function vegas_test_func_evaluate (self, x) result (f)
     class(vegas_test_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     f = 1.0 / (pi**3)
     f = f / ( 1.0 - cos (x(1)) * cos (x(2)) * cos (x(3)))
   end function vegas_test_func_evaluate
 
 @ %def vegas_test_func_evaluate
 @ The second test function is the normalised n-dim.\@ gaussian distribution.
 <<Vegas: test types>>=
   type, extends (vegas_func_t) :: vegas_gaussian_test_func_t
      !
    contains
    <<Vegas: vegas gaussian test func: TBP>>
   end type vegas_gaussian_test_func_t
 
 @ %def vegas_gaussian_test_func_t
 @ Evaluate the integrand.
 <<Vegas: vegas gaussian test func: TBP>>=
   procedure, public :: evaluate => vegas_gaussian_evaluate
 <<Vegas: tests>>=
   real(default) function vegas_gaussian_evaluate (self, x) result (f)
     class(vegas_gaussian_test_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     real(default), parameter :: inv_sqrt_pi = 1._default / sqrt(pi)
     f = inv_sqrt_pi**size (x)
     f = f * exp (- dot_product(x, x))
   end function vegas_gaussian_evaluate
 
 @ %def vegas_gaussian_evaluate
 @ The third test function is a three-dimensional polynomial function which
 factories. The function is defined in such a way that the integral in the unit
 range is normalised to zero.
 \begin{equation}
   f(x) = - \frac{8}{3} (x + 1)*(y-1)*z
 \end{equation}
 <<Vegas: test types>>=
   type, extends (vegas_func_t) :: vegas_polynomial_func_t
      !
    contains
    <<Vegas: vegas polynomial func: TBP>>
    end type vegas_polynomial_func_t
 
 @ %def vegas_polynomial_func_t
 <<Vegas: vegas polynomial func: TBP>>=
   procedure, public :: evaluate => vegas_polynomial_evaluate
 <<Vegas: tests>>=
   real(default) function vegas_polynomial_evaluate (self, x) result (f)
     class(vegas_polynomial_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     f = - 8. / 3. * (x(1) + 1.) * (x(2) - 1.) * x(3)
   end function vegas_polynomial_evaluate
 
 @ %def vegas_polynomial_evaluate
 @
 \subsubsection{MC Integrator check}
 \label{sec:mc-integrator-check}
 
 Initialise the VEGAS MC integrator and call to [[vegas_init_grid]] for the
 initialisation of the grid.
 <<Vegas: execute tests>>=
   call test (vegas_1, "vegas_1", "VEGAS initialisation and&
        & grid preparation", u, results)
 <<Vegas: test declaration>>=
   public :: vegas_1
 <<Vegas: tests>>=
   subroutine vegas_1 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower = 0., &
          x_upper = pi
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_1"
     write (u, "(A)") "*   Purpose: initialise the VEGAS MC integrator and the grid"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_test_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vegas_1
 @ %def vegas_1
 @
 \subsubsection{Configuration and result check}
 \label{sec:conf-result-check}
 
 Initialise the MC integrator. Get and write the config object, also the (empty) result object.
 <<Vegas: execute tests>>=
   call test (vegas_2, "vegas_2", "VEGAS configuration and result object", u, results)
 <<Vegas: test declaration>>=
   public :: vegas_2
 <<Vegas: tests>>=
   subroutine vegas_2 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_result_t) :: mc_integrator_result
 
     write (u, "(A)") "* Test output: vegas_2"
     write (u, "(A)") "*   Purpose: use transparent containers for&
          & configuration and result."
     write (u, "(A)")
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 10"
     write (u, "(A)")
 
     mc_integrator = vegas_t (10)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000 (Importance Sampling)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS config object and write out"
     write (u, "(A)")
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS empty result object and write out"
     write (u, "(A)")
 
     mc_integrator_result = mc_integrator%get_result ()
     call mc_integrator_result%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_2
 @ %def vegas_2
 @
 \subsubsection{Grid check}
 \label{sec:conf-result-check}
 
 Initialise the MC integrator. Get and write the config object. Integrate the gaussian distribution.
 Get and write the result object. Before and after integration get the grid
 object and output both.
 Repeat with different number of dimensions.
 <<Vegas: execute tests>>=
   call test (vegas_3, "vegas_3", "VEGAS integration of multi-dimensional gaussian", u, results)
 <<Vegas: test declaration>>=
   public :: vegas_3
 <<Vegas: tests>>=
   subroutine vegas_3 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower_3 = -10._default, &
          x_upper_3 = 10._default
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_grid_t) :: mc_integrator_grid
     type(vegas_result_t) :: mc_integrator_result
 
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_3"
     write (u, "(A)") "*   Purpose: Integrate gaussian distribution."
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_gaussian_test_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower_3, x_upper_3)
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS config object and write out"
     write (u, "(A)")
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS grid object and write out"
     write (u, "(A)")
 
     mc_integrator_grid = mc_integrator%get_grid ()
     call mc_integrator_grid%write (u, pacify = .true.)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 20000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS result object and write out"
     write (u, "(A)")
 
     mc_integrator_result = mc_integrator%get_result ()
     call mc_integrator_result%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS grid object and write out"
     write (u, "(A)")
 
     mc_integrator_grid = mc_integrator%get_grid ()
     call mc_integrator_grid%write (u, pacify = .true.)
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_3
 @ %def vegas_3
 \subsubsection{Three-dimensional integration with polynomial function}
 \label{sec:conf-result-check}
 
 Initialise the MC integrator. Get and write the config object. Integrate the
 factorisable polynomial function. Get and write the result object. Repeat with
 different number of dimensions.
 <<Vegas: execute tests>>=
   call test (vegas_4, "vegas_4", "VEGAS integration of three&
        &-dimensional factorisable polynomial function", u, results)
 <<Vegas: test declaration>>=
   public :: vegas_4
 <<Vegas: tests>>=
   subroutine vegas_4 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower_3 = 0._default, &
          x_upper_3 = 1._default
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_result_t) :: mc_integrator_result
 
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_4"
     write (u, "(A)") "*   Purpose: Integrate gaussian distribution."
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_polynomial_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 2000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower_3, x_upper_3)
     call mc_integrator%set_calls (2000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
 
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 20000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (20000)
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_4
 
 @ %def vegas_4
 @
 \subsubsection{Event generation}
 
 Initialise the MC integrator. Integrate the gaussian distribution.
 Get and write the result object. Finally, generate events in accordance to the
 adapted grid and print them out.
 <<Vegas: execute tests>>=
   call test (vegas_5, "vegas_5", "VEGAS integration and event&
        & generation of multi-dimensional gaussian", u, results)
 <<Vegas: test declaration>>=
   public :: vegas_5
 <<Vegas: tests>>=
   subroutine vegas_5 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(1), parameter :: x_lower_1 = -10._default, &
          x_upper_1 = 10._default
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_result_t) :: mc_integrator_result
 
     integer :: i, u_event
     real(default), dimension(1) :: event, mean, delta, M2
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_5"
     write (u, "(A)") "*   Purpose: Integrate gaussian distribution."
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 1"
     write (u, "(A)")
 
     allocate (vegas_gaussian_test_func_t :: func)
     mc_integrator = vegas_t (1)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 20000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower_1, x_upper_1)
     call mc_integrator%set_calls (20000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, verbose=.true., result=result, abserr=abserr)
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") &
          & "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%integrate (func, rng, 3, verbose=.true., result=result, abserr=abserr)
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") &
          & "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Generate 10000 events based on the adaptation and&
          & calculate mean and variance"
     write (u, "(A)")
 
     mean = 0._default
     M2 = 0._default
     do i = 1, 10000
        call mc_integrator%generate_unweighted (func, rng, event)
        delta = event - mean
        mean = mean + delta / i
        M2 = M2 + delta * (event - mean)
     end do
 
     write (u, "(2X,A)") "Result:"
     write (u, "(4X,A," // FMT_12 //")") &
          & "mean               = ", mean
     write (u, "(4X,A," // FMT_12 //")") &
          & "(sample) std. dev. = ", sqrt (M2 / (9999))
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_5
 
 @ %def vegas_5
 @
 \subsubsection{Grid I/O}
 \label{sec:grid-io}
 
 Initialise the MC integrator. Get and write the config object. Integrate the
 factorisable polynomial function. Get and write the result object. Write grid to
 file and start with fresh grid.
 <<Vegas: execute tests>>=
   call test (vegas_6, "vegas_6", "VEGAS integrate and write grid, &
        & read grid and continue", u, results)
 <<Vegas: test declaration>>=
   public :: vegas_6
 <<Vegas: tests>>=
   subroutine vegas_6 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower_3 = 0._default, &
          x_upper_3 = 1._default
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_result_t) :: mc_integrator_result
 
     real(default) :: result, abserr
     integer :: unit
 
     write (u, "(A)") "* Test output: vegas_6"
     write (u, "(A)") "*   Purpose: Write and read grid, and continue."
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_polynomial_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 2000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower_3, x_upper_3)
     call mc_integrator%set_calls (2000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
 
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Write grid to file vegas_io.grid"
     write (u, "(A)")
 
     unit = free_unit ()
     open (unit, file = "vegas_io.grid", &
          action = "write", status = "replace")
     call mc_integrator%write_grid (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Read grid from file vegas_io.grid"
     write (u, "(A)")
 
     call mc_integrator%final ()
     open (unit, file = "vegas_io.grid", &
          action = "read", status = "old")
     call mc_integrator%read_grid (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 20000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (20000)
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_6
 
 @ %def vegas_6
 @
 \subsubsection{Numeric stability}
 \label{sec:numeric-stability}
 
 We wrap a previous testing function to produce a single [[NaN]].
 <<Vegas: test types>>=
   type, extends (vegas_test_func_t) :: vegas_nan_test_func_t
     private
     logical :: evaluate_to_nan = .true.
    contains
    <<Vegas: vegas nan test func: TBP>>
   end type vegas_nan_test_func_t
 
 @ %def vegas_nan_test_func_t
 @ Evaluate the integrand.
 <<Vegas: vegas nan test func: TBP>>=
   procedure, public :: evaluate => vegas_nan_test_func_evaluate
 <<Vegas: tests>>=
   real(default) function vegas_nan_test_func_evaluate (self, x) result (f)
     use, intrinsic :: ieee_arithmetic, only: ieee_value, ieee_quiet_nan
     class(vegas_nan_test_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     if (self%evaluate_to_nan) then
         f = ieee_value(1.0_default, ieee_quiet_nan)
         self%evaluate_to_nan = .false.
     else
         f = self%vegas_test_func_t%evaluate (x)
     end if
   end function vegas_nan_test_func_evaluate
 
 @ %def vegas_test_func_evaluate
 
 Initialise the VEGAS MC integrator. Run a integration pass and insert a single
 [[NaN]] that leads to a [[NaN]] result. However, the integration grid will be
 fine.
 
 Proceed with a second pass, and get the correct result.
 <<Vegas: execute tests>>=
   !!! Disabled for the moment as NAGFOR stops execution on NaNs as intended
   ! call test (vegas_7, "vegas_7", "VEGAS NaN stability test", u, results)
 <<Vegas: test declaration>>=
   public :: vegas_7
 <<Vegas: tests>>=
   subroutine vegas_7 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower = 0., &
          x_upper = pi
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_7"
     write (u, "(A)") "*   Purpose: initialise the VEGAS MC integrator and the grid"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_nan_test_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vegas_7
 
 @ %def vegas_7
 \clearpage
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{VAMP2}
 \label{sec:vamp2}
 
 We concentrate all configuration and run-time data in a derived-type, such that,
 [[mci_t]] can spwan each time a distinctive MCI VEGAS integrator object.
 
 <<[[vamp2.f90]]>>=
 <<File header>>
 
 module vamp2
 
 <<Use kinds>>
-  use io_units
-  use format_utils, only: pac_fmt
-  use format_utils, only: write_separator, write_indent
-  use format_defs, only: FMT_17
   use diagnostics
-  use iterator
   use rng_base
-  use rng_stream, only: rng_stream_t
 
   use vegas
 
 <<VAMP2: modules>>
 
 <<Standard module head>>
 
 <<VAMP2: public>>
 
 <<VAMP2: parameters>>
 
 <<VAMP2: types>>
 
 <<VAMP2: interfaces>>
 
+  interface
+<<VAMP2: sub interfaces>>
+  end interface
+
 contains
 
-<<VAMP2: procedures>>
+<<VAMP2: main procedures>>
 
 end module vamp2
 @ %def vamp2
+@
+<<[[vamp2_sub.f90]]>>=
+<<File header>>
+
+submodule (vamp2) vamp2_s
+
+  use io_units
+  use format_utils, only: pac_fmt
+  use format_utils, only: write_separator, write_indent
+  use format_defs, only: FMT_17
+  use iterator
+  use rng_stream, only: rng_stream_t
+
+  implicit none
+
+contains
+
+<<VAMP2: procedures>>
+
+end submodule vamp2_s
+
+@ %def vamp2_s
+@
 <<VAMP2: modules>>=
 @
 <<MPI: VAMP2: modules>>=
   use request_base
   use request_simple
   use request_caller
   use balancer_base
   use request_callback
   use mpi_f08 !NODEP!
 @
 \subsection{Type: vamp2\_func\_t}
 \label{sec:vamp2-func}
 
 We extend [[vegas_func_t]] with the multi-channel weights and the
 [[vegas_grid_t]], such that, the overall multi-channel weight can be calculated
 by the function itself.
 
 We add an additional logicial [[valid_x]], if it is set to [[.false.]], we do
 not compute weighted function and just set the weighted integrand to zero.
 This behavior is in particular very useful, if a mapping is prohibited or fails.
 Or in the case of WHIZARD, a phase cut is applied.
 <<VAMP2: public>>=
   public :: vamp2_func_t
 <<VAMP2: types>>=
   type, abstract, extends(vegas_func_t) :: vamp2_func_t
      integer :: current_channel = 0
      integer :: n_dim = 0
      integer :: n_channel = 0
      integer :: n_calls = 0
      logical :: valid_x = .false.
      real(default), dimension(:, :), allocatable :: xi
      real(default), dimension(:), allocatable :: det
      real(default), dimension(:), allocatable :: wi
      real(default), dimension(:), allocatable :: gi
      type(vegas_grid_t), dimension(:), allocatable :: grids
-     real(default) :: g = 0.
+     real(default) :: g = 0._default
    contains
    <<VAMP2: vamp2 func: TBP>>
   end type vamp2_func_t
 
 @ %def vamp2_func_t
 @ Init.
 <<VAMP2: vamp2 func: TBP>>=
   procedure, public :: init => vamp2_func_init
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_func_init (self, n_dim, n_channel)
+      class(vamp2_func_t), intent(out) :: self
+      integer, intent(in) :: n_dim
+      integer, intent(in) :: n_channel
+    end subroutine vamp2_func_init
 <<VAMP2: procedures>>=
-  subroutine vamp2_func_init (self, n_dim, n_channel)
+  module subroutine vamp2_func_init (self, n_dim, n_channel)
     class(vamp2_func_t), intent(out) :: self
     integer, intent(in) :: n_dim
     integer, intent(in) :: n_channel
     self%n_dim = n_dim
     self%n_channel = n_channel
     allocate (self%xi(n_dim, n_channel), source=0._default)
     allocate (self%det(n_channel), source=1._default)
     allocate (self%wi(n_channel), source=0._default)
     allocate (self%gi(n_channel), source=0._default)
     allocate (self%grids(n_channel))
   end subroutine vamp2_func_init
 
 @ %def vamp2_func_init
 @ Set current channel.
 <<VAMP2: vamp2 func: TBP>>=
   procedure, public :: set_channel => vamp2_func_set_channel
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_func_set_channel (self, channel)
+      class(vamp2_func_t), intent(inout) :: self
+      integer, intent(in) :: channel
+    end subroutine vamp2_func_set_channel
 <<VAMP2: procedures>>=
-  subroutine vamp2_func_set_channel (self, channel)
+  module subroutine vamp2_func_set_channel (self, channel)
     class(vamp2_func_t), intent(inout) :: self
     integer, intent(in) :: channel
     self%current_channel = channel
   end subroutine vamp2_func_set_channel
 
 @ %def vamp2_func_set_channel
 @ Get number of function calls for which $f \neq 0$.
 <<VAMP2: vamp2 func: TBP>>=
   procedure, public :: get_n_calls => vamp2_func_get_n_calls
+<<VAMP2: sub interfaces>>=
+    module function vamp2_func_get_n_calls (self) result (n_calls)
+      class(vamp2_func_t), intent(in) :: self
+      integer :: n_calls
+    end function vamp2_func_get_n_calls
 <<VAMP2: procedures>>=
-  integer function vamp2_func_get_n_calls (self) result (n_calls)
+  module function vamp2_func_get_n_calls (self) result (n_calls)
     class(vamp2_func_t), intent(in) :: self
+    integer :: n_calls
     n_calls = self%n_calls
   end function vamp2_func_get_n_calls
 
 @ %def vamp2_func_get_func_calls
 @ Reset number of calls.
 <<VAMP2: vamp2 func: TBP>>=
   procedure, public :: reset_n_calls => vamp2_func_reset_n_calls
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_func_reset_n_calls (self)
+      class(vamp2_func_t), intent(inout) :: self
+    end subroutine vamp2_func_reset_n_calls
 <<VAMP2: procedures>>=
-  subroutine vamp2_func_reset_n_calls (self)
+  module subroutine vamp2_func_reset_n_calls (self)
     class(vamp2_func_t), intent(inout) :: self
     self%n_calls = 0
   end subroutine vamp2_func_reset_n_calls
 
 @ %def vamp2_func_reset_n_calls
 @ Evaluate mappings. We defer this method to be implemented by the user.
 The result must be written to [[xi]] and [[det]].
 
 The mapping is defined by $\phi : U \rightarrow M$. We map $x \in M$ to the
 different mappings of the hypercube $U_{i}$, such that $x_{i} \in U_{i}$.
 
 The mapping should determine, whether [[x]] is a valid point, e.g. can be
 mapped, or is restricted otherwise.
 <<VAMP2: vamp2 func: TBP>>=
   procedure(vamp2_func_evaluate_maps), deferred :: evaluate_maps
 <<VAMP2: interfaces>>=
  abstract interface
     subroutine vamp2_func_evaluate_maps (self, x)
       import :: vamp2_func_t, default
       class(vamp2_func_t), intent(inout) :: self
       real(default), dimension(:), intent(in) :: x
     end subroutine vamp2_func_evaluate_maps
  end interface
 
 @ %def vamp2_evaluate_func
 @ Evaluate channel weights.
 
 The calling procedure must handle the case of a vanishing overall probability
 density where either a channel weight or a channel probability vanishes.
 <<VAMP2: vamp2 func: TBP>>=
   procedure, private :: evaluate_weight => vamp2_func_evaluate_weight
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_func_evaluate_weight (self)
+      class(vamp2_func_t), intent(inout) :: self
+    end subroutine vamp2_func_evaluate_weight
 <<VAMP2: procedures>>=
-  subroutine vamp2_func_evaluate_weight (self)
+  module subroutine vamp2_func_evaluate_weight (self)
     class(vamp2_func_t), intent(inout) :: self
     integer :: ch
-    self%g = 0.
-    self%gi = 0.
+    self%g = 0
+    self%gi = 0
     !$OMP PARALLEL DO PRIVATE(ch) SHARED(self)
     do ch = 1, self%n_channel
        if (self%wi(ch) /= 0) then
           self%gi(ch) = self%grids(ch)%get_probability (self%xi(:, ch))
        end if
     end do
     !$OMP END PARALLEL DO
     if (self%gi(self%current_channel) /= 0) then
        do ch = 1, self%n_channel
           if (self%wi(ch) /= 0 .and. self%det(ch) /= 0) then
              self%g = self%g + self%wi(ch) * self%gi(ch) / self%det(ch)
           end if
        end do
        self%g = self%g / self%gi(self%current_channel)
     end if
   end subroutine vamp2_func_evaluate_weight
 
 @ %def vamp2_func_evaluate_weight
 @ Evaluate function at [[x]]. We call this procedure in [[vamp2_func_evaluate]].
 <<VAMP2: vamp2 func: TBP>>=
   procedure(vamp2_func_evaluate_func), deferred :: evaluate_func
 <<VAMP2: interfaces>>=
   abstract interface
      real(default) function vamp2_func_evaluate_func (self, x) result (f)
        import :: vamp2_func_t, default
        class(vamp2_func_t), intent(in) :: self
        real(default), dimension(:), intent(in) :: x
      end function vamp2_func_evaluate_func
   end interface
 
 @ %def vamp2_func_evaluate_func
 <<VAMP2: vamp2 func: TBP>>=
   procedure, public :: evaluate => vamp2_func_evaluate
+<<VAMP2: sub interfaces>>=
+    module function vamp2_func_evaluate (self, x) result (f)
+      class(vamp2_func_t), intent(inout) :: self
+      real(default) :: f
+      real(default), dimension(:), intent(in) :: x
+    end function vamp2_func_evaluate
 <<VAMP2: procedures>>=
-  real(default) function vamp2_func_evaluate (self, x) result (f)
+  module function vamp2_func_evaluate (self, x) result (f)
     class(vamp2_func_t), intent(inout) :: self
+    real(default) :: f
     real(default), dimension(:), intent(in) :: x
     call self%evaluate_maps (x)
     f = 0.
     self%gi = 0.
     self%g = 1
     if (self%valid_x) then
        call self%evaluate_weight ()
        if (self%g /= 0) then
           f = self%evaluate_func (x) / self%g
           self%n_calls = self%n_calls + 1
        end if
     end if
   end function vamp2_func_evaluate
 
 @ %def vamp2_func_evaluate
 \subsection{Type: vamp2\_config\_t}
 \label{sec:vamp2-config}
 
 This is a transparent container which incorporates and extends the definitions
 in [[vegas_config]]. The parent object can then be used to parametrise the VEGAS
 grids directly, where the new parameters are exclusively used in the
 multi-channel implementation of VAMP2.
 [[n_calls_min]] is calculated by [[n_calls_min_per_channel]] and [[n_channel]].
 The channels weights (and the result [[n_calls]] for each channel) are
 calculated regarding [[n_calls_threshold]].
 <<VAMP2: public>>=
   public :: vamp2_config_t
 <<VAMP2: types>>=
   type, extends(vegas_config_t) :: vamp2_config_t
      integer :: n_channel = 0
      integer :: n_calls_min_per_channel = 20
      integer :: n_calls_threshold = 10
      integer :: n_chains = 0
      logical :: stratified = .true.
      logical :: equivalences = .false.
      real(default) :: beta = 0.5_default
      real(default) :: accuracy_goal = 0._default
      real(default) :: error_goal = 0._default
      real(default) :: rel_error_goal = 0._default
    contains
    <<VAMP2: vamp2 config: TBP>>
   end type vamp2_config_t
 
 @ %def vamp2_config_t
 @ Write.
 <<VAMP2: vamp2 config: TBP>>=
   procedure, public :: write => vamp2_config_write
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_config_write (self, unit, indent)
+      class(vamp2_config_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+      integer, intent(in), optional :: indent
+    end subroutine vamp2_config_write
 <<VAMP2: procedures>>=
-  subroutine vamp2_config_write (self, unit, indent)
+  module subroutine vamp2_config_write (self, unit, indent)
     class(vamp2_config_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call self%vegas_config_t%write (unit, indent)
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of channels                               = ", self%n_channel
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Min. number of calls per channel (setting calls) = ", &
          & self%n_calls_min_per_channel
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Threshold number of calls (adapting weights)     = ", &
          & self%n_calls_threshold
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of chains                                 = ", self%n_chains
     call write_indent (u, ind)
     write (u, "(2x,A,L1)") &
          & "Stratified                                       = ", self%stratified
     call write_indent (u, ind)
     write (u, "(2x,A,L1)") &
          & "Equivalences                                     = ", self%equivalences
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Adaption power (beta)                            = ", self%beta
     if (self%accuracy_goal > 0) then
        call write_indent (u, ind)
        write (u, "(2x,A," // FMT_17 // ")") &
             & "accuracy_goal                                 = ", self%accuracy_goal
     end if
     if (self%error_goal > 0) then
        call write_indent (u, ind)
        write (u, "(2x,A," // FMT_17 // ")") &
             & "error_goal                                    = ", self%error_goal
     end if
     if (self%rel_error_goal > 0) then
        call write_indent (u, ind)
        write (u, "(2x,A," // FMT_17 // ")") &
             & "rel_error_goal                                = ", self%rel_error_goal
     end if
   end subroutine vamp2_config_write
 
 @ %def vamp2_config_write
 @
 \subsection{Type: vamp2\_result\_t}
 \label{sec:vamp2-result}
 
 This is a transparent container which incorporates and extends the definitions
 of [[vegas_result_t]].
 
 <<VAMP2: public>>=
   public :: vamp2_result_t
 <<VAMP2: types>>=
   type, extends(vegas_result_t) :: vamp2_result_t
    contains
    <<VAMP2: vamp2 result: TBP>>
   end type vamp2_result_t
 
 @ %def vamp2_result_t
 @ Output.
 <<VAMP2: vamp2 result: TBP>>=
   procedure, public :: write => vamp2_result_write
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_result_write (self, unit, indent)
+      class(vamp2_result_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+      integer, intent(in), optional :: indent
+    end subroutine vamp2_result_write
 <<VAMP2: procedures>>=
-  subroutine vamp2_result_write (self, unit, indent)
+  module subroutine vamp2_result_write (self, unit, indent)
     class(vamp2_result_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call self%vegas_result_t%write (unit, indent)
   end subroutine vamp2_result_write
 
 @ %def vamp2_result_write
 @
 \subsection{Type: vamp2\_equivalences\_t}
 \label{sec:vamp2-eqv}
 
 <<VAMP2: parameters>>=
 integer, parameter, public :: &
      VEQ_IDENTITY = 0, VEQ_INVERT = 1, VEQ_SYMMETRIC = 2, VEQ_INVARIANT = 3
 @
 @ Channel equivalences. Store retrieving and sourcing channel.
 <<VAMP2: types>>=
   type :: vamp2_equi_t
      integer :: ch
      integer :: ch_src
      integer, dimension(:), allocatable :: perm
      integer, dimension(:), allocatable :: mode
    contains
    <<VAMP2: vamp2 equi: TBP>>
   end type vamp2_equi_t
 
 @ %def vamp2_equi_t
 @ Write equivalence.
 <<VAMP2: vamp2 equi: TBP>>=
   procedure :: write => vamp2_equi_write
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_equi_write (self, unit, indent)
+      class(vamp2_equi_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+      integer, intent(in), optional :: indent
+    end subroutine vamp2_equi_write
 <<VAMP2: procedures>>=
-  subroutine vamp2_equi_write (self, unit, indent)
+  module subroutine vamp2_equi_write (self, unit, indent)
     class(vamp2_equi_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call write_indent (u, ind)
     write (u, "(2(A,1X,I0))") "src:", self%ch_src, "-> dest:", self%ch
     call write_indent (u, ind)
     write (u, "(A,99(1X,I0))") "Perm: ", self%perm
     call write_indent (u, ind)
     write (u, "(A,99(1X,I0))") "Mode: ", self%mode
   end subroutine vamp2_equi_write
 
 @ %def vamp2_equi_write
 @
 <<VAMP2: public>>=
   public :: vamp2_equivalences_t
 <<VAMP2: types>>=
   type :: vamp2_equivalences_t
       private
       integer :: n_eqv = 0
       integer :: n_channel = 0
       integer :: n_dim = 0
       type(vamp2_equi_t), dimension(:), allocatable :: eqv
       integer, dimension(:), allocatable :: map
       integer, dimension(:), allocatable :: multiplicity
       integer, dimension(:), allocatable :: symmetry
       logical, dimension(:), allocatable :: independent
       integer, dimension(:), allocatable :: equivalent_to_ch
-      logical, dimension(:, :), allocatable :: dim_is_invariant
+      logical, dimension(:,:), allocatable :: dim_is_invariant
    contains
    <<VAMP2: vamp2 equivalences: TBP>>
   end type vamp2_equivalences_t
 
 @ %def vamp2_equivalences_t
 @ Constructor.
 <<VAMP2: interfaces>>=
   interface vamp2_equivalences_t
      module procedure vamp2_equivalences_init
   end interface vamp2_equivalences_t
 
+<<VAMP2: sub interfaces>>=
+    module function vamp2_equivalences_init (n_eqv, n_channel, &
+         n_dim) result (eqv)
+      type(vamp2_equivalences_t) :: eqv
+      integer, intent(in) :: n_eqv, n_channel, n_dim
+    end function vamp2_equivalences_init
 <<VAMP2: procedures>>=
-  type(vamp2_equivalences_t) function vamp2_equivalences_init (&
-       n_eqv, n_channel, n_dim) result (eqv)
+  module function vamp2_equivalences_init (n_eqv, n_channel, &
+       n_dim) result (eqv)
+    type(vamp2_equivalences_t) :: eqv
     integer, intent(in) :: n_eqv, n_channel, n_dim
     eqv%n_eqv = n_eqv
     eqv%n_channel = n_channel
     eqv%n_dim = n_dim
     allocate (eqv%eqv(n_eqv))
     allocate (eqv%map(n_channel), source = 0)
     allocate (eqv%multiplicity(n_channel), source = 0)
     allocate (eqv%symmetry(n_channel), source = 0)
     allocate (eqv%independent(n_channel), source = .true.)
     allocate (eqv%equivalent_to_ch(n_channel), source = 0)
     allocate (eqv%dim_is_invariant(n_dim, n_channel), source = .false.)
   end function vamp2_equivalences_init
 
 @ %def vamp2_equivlences_init
 @ Write equivalences.
 <<VAMP2: vamp2 equivalences: TBP>>=
   procedure :: write => vamp2_equivalences_write
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_equivalences_write (self, unit, indent)
+      class(vamp2_equivalences_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+      integer, intent(in), optional :: indent
+    end subroutine vamp2_equivalences_write
 <<VAMP2: procedures>>=
-  subroutine vamp2_equivalences_write (self, unit, indent)
+  module subroutine vamp2_equivalences_write (self, unit, indent)
     class(vamp2_equivalences_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind, i_eqv, ch
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     write (u, "(A)") "Inequivalent channels:"
     if (allocated (self%independent)) then
        do ch = 1, self%n_channel
           if (self%independent(ch)) then
              write (u, "(2X,A,1x,I0,A,4x,A,I0,4x,A,I0,4x,A,999(L1))") &
                   "Channel", ch, ":", &
                   "Mult. = ", self%multiplicity(ch), &
                   "Symm. = ", self%symmetry(ch), &
                   "Invar.: ", self%dim_is_invariant(:, ch)
           end if
        end do
     else
        write (u, "(A)") "[not allocated]"
     end if
     write (u, "(A)") "Equivalence list:"
     if (allocated (self%eqv)) then
        do i_eqv = 1, self%n_eqv
           write (u, "(2X,A,1X,I0)") "i_eqv:", i_eqv
           call self%eqv(i_eqv)%write (unit, indent = ind + 4)
        end do
     else
        write (u, "(A)") "[not allocated]"
     end if
   end subroutine vamp2_equivalences_write
 
 @ %def vamp2_equivalences_write
 @ Is allocated.
 <<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: is_allocated => vamp2_equivalences_is_allocated
+<<VAMP2: sub interfaces>>=
+    module function vamp2_equivalences_is_allocated (self) result (yorn)
+      class(vamp2_equivalences_t), intent(in) :: self
+      logical :: yorn
+    end function vamp2_equivalences_is_allocated
 <<VAMP2: procedures>>=
-  logical function vamp2_equivalences_is_allocated (self) result (yorn)
+  module function vamp2_equivalences_is_allocated (self) result (yorn)
     class(vamp2_equivalences_t), intent(in) :: self
+    logical :: yorn
     yorn = allocated (self%eqv)
   end function vamp2_equivalences_is_allocated
 
 @ %def vamp2_equivalences_is_allocated
 @ Get source channel and destination channel for given equivalence.
 <<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: get_channels => vamp2_equivalences_get_channels
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_equivalences_get_channels (eqv, i_eqv, dest, src)
+      class(vamp2_equivalences_t), intent(in) :: eqv
+      integer, intent(in) :: i_eqv
+      integer, intent(out) :: dest, src
+    end subroutine vamp2_equivalences_get_channels
 <<VAMP2: procedures>>=
-  subroutine vamp2_equivalences_get_channels (eqv, i_eqv, dest, src)
-   class(vamp2_equivalences_t), intent(in) :: eqv
-   integer, intent(in) :: i_eqv
-   integer, intent(out) :: dest, src
-   dest = eqv%eqv(i_eqv)%ch
-   src = eqv%eqv(i_eqv)%ch_src
+  module subroutine vamp2_equivalences_get_channels (eqv, i_eqv, dest, src)
+    class(vamp2_equivalences_t), intent(in) :: eqv
+    integer, intent(in) :: i_eqv
+    integer, intent(out) :: dest, src
+    dest = eqv%eqv(i_eqv)%ch
+    src = eqv%eqv(i_eqv)%ch_src
   end subroutine vamp2_equivalences_get_channels
 
 @ %def vamp2_equivalences_get_channels
 @
 <<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: get_mode => vamp2_equivalences_get_mode
   procedure, public :: get_perm => vamp2_equivalences_get_perm
+<<VAMP2: sub interfaces>>=
+    module function vamp2_equivalences_get_mode (eqv, i_eqv) result (mode)
+      class(vamp2_equivalences_t), intent(in) :: eqv
+      integer, intent(in) :: i_eqv
+      integer, dimension(:), allocatable :: mode
+    end function vamp2_equivalences_get_mode
+    module function vamp2_equivalences_get_perm (eqv, i_eqv) result (perm)
+      class(vamp2_equivalences_t), intent(in) :: eqv
+      integer, intent(in) :: i_eqv
+      integer, dimension(:), allocatable :: perm
+    end function vamp2_equivalences_get_perm
 <<VAMP2: procedures>>=
-  function vamp2_equivalences_get_mode (eqv, i_eqv) result (mode)
+  module function vamp2_equivalences_get_mode (eqv, i_eqv) result (mode)
     class(vamp2_equivalences_t), intent(in) :: eqv
     integer, intent(in) :: i_eqv
     integer, dimension(:), allocatable :: mode
     mode = eqv%eqv(i_eqv)%mode
   end function vamp2_equivalences_get_mode
 
-  function vamp2_equivalences_get_perm (eqv, i_eqv) result (perm)
+  module function vamp2_equivalences_get_perm (eqv, i_eqv) result (perm)
     class(vamp2_equivalences_t), intent(in) :: eqv
     integer, intent(in) :: i_eqv
     integer, dimension(:), allocatable :: perm
     perm = eqv%eqv(i_eqv)%perm
   end function vamp2_equivalences_get_perm
 
 @ %def vamp2_equivalences_get_perm, vamp2_equivalences_get_mode
 @
 <<VAMP2: vamp2 equivalences: TBP>>=
-  procedure, public :: set_equivalence => vamp2_equivalences_set_equivalence
+  procedure, public :: set_equivalence => &
+       vamp2_equivalences_set_equivalence
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_equivalences_set_equivalence &
+         (eqv, i_eqv, dest, src, perm, mode)
+      class(vamp2_equivalences_t), intent(inout) :: eqv
+      integer, intent(in) :: i_eqv
+      integer, intent(in) :: dest, src
+      integer, dimension(:), intent(in) :: perm, mode
+    end subroutine vamp2_equivalences_set_equivalence
 <<VAMP2: procedures>>=
-  subroutine vamp2_equivalences_set_equivalence &
+  module subroutine vamp2_equivalences_set_equivalence &
        (eqv, i_eqv, dest, src, perm, mode)
     class(vamp2_equivalences_t), intent(inout) :: eqv
     integer, intent(in) :: i_eqv
     integer, intent(in) :: dest, src
     integer, dimension(:), intent(in) :: perm, mode
     integer :: i
     if (dest < 1 .or. dest > eqv%n_channel)  call msg_bug &
        ("VAMP2: set_equivalences: destination channel out of range.")
     if (src < 1 .or. src > eqv%n_channel)  call msg_bug &
          ("VAMP2: set_equivalences: source channel out of range.")
     if (size(perm) /= eqv%n_dim)  call msg_bug &
          ("VAMP2: set_equivalences: size(perm) does not match n_dim.")
     if (size(mode) /= eqv%n_dim)  call msg_bug &
          ("VAMP2: set_equivalences: size(mode) does not match n_dim.")
     eqv%eqv(i_eqv)%ch = dest
     eqv%eqv(i_eqv)%ch_src = src
     allocate (eqv%eqv(i_eqv)%perm (size (perm)))
     do i = 1, size (perm)
        eqv%eqv(i_eqv)%perm(i) = perm(i)
     end do
     allocate (eqv%eqv(i_eqv)%mode (size (mode)))
     do i = 1, size (mode)
        eqv%eqv(i_eqv)%mode(i) = mode(i)
     end do
   end subroutine vamp2_equivalences_set_equivalence
 
 @ %def vamp2_equivalences_set_equivalence
 @ Freeze equivalences.
 <<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: freeze => vamp2_equivalences_freeze
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_equivalences_freeze (self)
+      class(vamp2_equivalences_t), intent(inout) :: self
+    end subroutine vamp2_equivalences_freeze
 <<VAMP2: procedures>>=
-  subroutine vamp2_equivalences_freeze (self)
+  module subroutine vamp2_equivalences_freeze (self)
     class(vamp2_equivalences_t), intent(inout) :: self
     integer :: i_eqv, ch, upper, lower
     ch = 0
     do i_eqv = 1, self%n_eqv
        if (ch /= self%eqv(i_eqv)%ch) then
           ch = self%eqv(i_eqv)%ch
           self%map(ch) = i_eqv
        end if
     end do
     do ch = 1, self%n_channel
        lower = self%map(ch)
        if (ch == self%n_channel) then
           upper = self%n_eqv
        else
           upper = self%map(ch + 1) - 1
        end if
        associate (eqv => self%eqv, n_eqv => size (self%eqv(lower:upper)))
          if (.not. all(eqv(lower:upper)%ch == ch) .or. &
               eqv(lower)%ch_src > ch) then
             do i_eqv = lower, upper
                call self%eqv(i_eqv)%write ()
             end do
             call msg_bug ("VAMP2: vamp2_equivalences_freeze: &
                  &equivalence order is not correct.")
          end if
          self%symmetry(ch) = count (eqv(lower:upper)%ch_src == ch)
          if (mod (n_eqv, self%symmetry(ch)) /= 0) then
             do i_eqv = lower, upper
                call self%eqv(i_eqv)%write ()
             end do
             call msg_bug ("VAMP2: vamp2_equivalences_freeze: &
                  &permutation count is not correct.")
          end if
          self%multiplicity(ch) = n_eqv / self%symmetry(ch)
          self%independent(ch) = all (eqv(lower:upper)%ch_src >= ch)
          self%equivalent_to_ch(ch) = eqv(lower)%ch_src
          self%dim_is_invariant(:, ch) = eqv(lower)%mode == VEQ_INVARIANT
        end associate
     end do
   end subroutine vamp2_equivalences_freeze
 
 @ %def vamp2_equivalences_freeze
 @
 \subsection{Type: vamp2\_t}
 \label{sec:vamp2-t}
 
 <<VAMP2: public>>=
   public :: vamp2_t
 <<VAMP2: types>>=
   type :: vamp2_t
      private
      type(vamp2_config_t) :: config
      type(vegas_t), dimension(:), allocatable :: integrator
      integer, dimension(:), allocatable :: chain
      real(default), dimension(:), allocatable :: weight
      real(default), dimension(:), allocatable :: integral
      real(default), dimension(:), allocatable :: variance
      real(default), dimension(:), allocatable :: efficiency
      type(vamp2_result_t) :: result
      type(vamp2_equivalences_t) :: equivalences
      logical :: event_prepared
      real(default), dimension(:), allocatable :: event_weight
    <<VAMP2: vamp2: type>>
    contains
    <<VAMP2: vamp2: TBP>>
   end type vamp2_t
 <<MPI: VAMP2: vamp2: type>>=
   class(request_base_t), allocatable :: request
 <<VAMP2: interfaces>>=
   interface vamp2_t
      module procedure vamp2_init
   end interface vamp2_t
 
 @ %def vamp2_t
 @ Constructor.
+<<VAMP2: sub interfaces>>=
+    module function vamp2_init (n_channel, n_dim, alpha, beta, n_bins_max,&
+         & n_calls_min_per_channel, iterations, mode) result (self)
+      type(vamp2_t) :: self
+      integer, intent(in) :: n_channel
+      integer, intent(in) :: n_dim
+      integer, intent(in), optional :: n_bins_max
+      integer, intent(in), optional :: n_calls_min_per_channel
+      real(default), intent(in), optional :: alpha
+      real(default), intent(in), optional :: beta
+      integer, intent(in), optional :: iterations
+      integer, intent(in), optional :: mode
+    end function vamp2_init
 <<VAMP2: procedures>>=
-  type(vamp2_t) function vamp2_init (n_channel, n_dim, alpha, beta, n_bins_max,&
+  module function vamp2_init (n_channel, n_dim, alpha, beta, n_bins_max,&
        & n_calls_min_per_channel, iterations, mode) result (self)
+    type(vamp2_t) :: self
     integer, intent(in) :: n_channel
     integer, intent(in) :: n_dim
     integer, intent(in), optional :: n_bins_max
     integer, intent(in), optional :: n_calls_min_per_channel
     real(default), intent(in), optional :: alpha
     real(default), intent(in), optional :: beta
     integer, intent(in), optional :: iterations
     integer, intent(in), optional :: mode
     integer :: ch
     self%config%n_dim = n_dim
     self%config%n_channel = n_channel
     call set_options ()
     allocate (self%chain(n_channel), source=0)
     allocate (self%integrator(n_channel))
     allocate (self%weight(n_channel), source=0._default)
     do ch = 1, n_channel
        self%integrator(ch) = vegas_t (n_dim, alpha, n_bins_max, 1, mode)
     end do
     self%weight = 1._default / self%config%n_channel
     call self%reset_result ()
     allocate (self%event_weight(self%config%n_channel), source = 0._default)
     self%event_prepared = .false.
   contains
     subroutine set_options ()
       if (present (n_bins_max)) self%config%n_bins_max = n_bins_max
-      if (present (n_calls_min_per_channel)) self%config%n_calls_min_per_channel = n_calls_min_per_channel
+      if (present (n_calls_min_per_channel)) &
+           self%config%n_calls_min_per_channel = n_calls_min_per_channel
       if (present (alpha)) self%config%alpha = alpha
       if (present (beta)) self%config%beta = beta
       if (present (iterations)) self%config%iterations = iterations
       if (present (mode)) self%config%mode = mode
     end subroutine set_options
   end function vamp2_init
 
 @ %def vamp2_init
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: final => vamp2_final
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_final (self)
+      class(vamp2_t), intent(inout) :: self
+    end subroutine vamp2_final
 <<VAMP2: procedures>>=
-  subroutine vamp2_final (self)
+  module subroutine vamp2_final (self)
     class(vamp2_t), intent(inout) :: self
     integer :: ch
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%final ()
     end do
   end subroutine vamp2_final
 
 @ %def vamp2_final
 @ Output.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: write => vamp2_write
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_write (self, unit, indent)
+      class(vamp2_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+      integer, intent(in), optional :: indent
+    end subroutine vamp2_write
 <<VAMP2: procedures>>=
-  subroutine vamp2_write (self, unit, indent)
+  module subroutine vamp2_write (self, unit, indent)
     class(vamp2_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind, ch
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call write_indent (u, ind)
     write (u, "(A)") "VAMP2: VEGAS AMPlified 2"
     call write_indent (u, ind)
     call self%config%write (unit, indent)
     call self%result%write (unit, indent)
   end subroutine vamp2_write
 
 @ %def vamp2_write
 @ Get the config object.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: get_config => vamp2_get_config
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_get_config (self, config)
+      class(vamp2_t), intent(in) :: self
+      type(vamp2_config_t), intent(out) :: config
+    end subroutine vamp2_get_config
 <<VAMP2: procedures>>=
-  subroutine vamp2_get_config (self, config)
+  module subroutine vamp2_get_config (self, config)
     class(vamp2_t), intent(in) :: self
     type(vamp2_config_t), intent(out) :: config
     config = self%config
   end subroutine vamp2_get_config
 
 @ %def vamp2_get_config
 @ Set non-runtime dependent configuration. It will no be possible to change
 [[n_bins_max]].
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: set_config => vamp2_set_config
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_set_config (self, config)
+      class(vamp2_t), intent(inout) :: self
+      class(vamp2_config_t), intent(in) :: config
+    end subroutine vamp2_set_config
 <<VAMP2: procedures>>=
-  subroutine vamp2_set_config (self, config)
+  module subroutine vamp2_set_config (self, config)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_config_t), intent(in) :: config
     integer :: ch
     self%config%equivalences = config%equivalences
     self%config%n_calls_min_per_channel = config%n_calls_min_per_channel
     self%config%n_calls_threshold = config%n_calls_threshold
     self%config%n_calls_min = config%n_calls_min
     self%config%beta = config%beta
     self%config%accuracy_goal = config%accuracy_goal
     self%config%error_goal = config%error_goal
     self%config%rel_error_goal = config%rel_error_goal
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%set_config (config)
     end do
   end subroutine vamp2_set_config
 
 @ %def vamp2_set_config
 @ Set the overall number of calls. The number of calls each channel is scaled by
 the channel weights
 \begin{equation}
   N_i = \alpha_i N.
 \end{equation}
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: set_calls => vamp2_set_n_calls
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_set_n_calls (self, n_calls)
+      class(vamp2_t), intent(inout) :: self
+      integer, intent(in) :: n_calls
+    end subroutine vamp2_set_n_calls
 <<VAMP2: procedures>>=
-  subroutine vamp2_set_n_calls (self, n_calls)
+  module subroutine vamp2_set_n_calls (self, n_calls)
     class(vamp2_t), intent(inout) :: self
     integer, intent(in) :: n_calls
     integer :: ch
     self%config%n_calls_min = self%config%n_calls_min_per_channel &
          & * self%config%n_channel
     self%config%n_calls = max(n_calls, self%config%n_calls_min)
     if (self%config%n_calls > n_calls) then
        write (msg_buffer, "(A,I0)") "VAMP2: [set_calls] number of calls too few,&
             & reset to = ", self%config%n_calls
        call msg_message ()
     end if
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%set_calls (max (nint (self%config%n_calls *&
             & self%weight(ch)), self%config%n_calls_min_per_channel))
     end do
   end subroutine vamp2_set_n_calls
 
 @ %def vamp2_set_n_calls
 @ Set limits. We only support same limits for all channels.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: set_limits => vamp2_set_limits
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_set_limits (self, x_upper, x_lower)
+      class(vamp2_t), intent(inout) :: self
+      real(default), dimension(:), intent(in) :: x_upper
+      real(default), dimension(:), intent(in) :: x_lower
+    end subroutine vamp2_set_limits
 <<VAMP2: procedures>>=
-  subroutine vamp2_set_limits (self, x_upper, x_lower)
+  module subroutine vamp2_set_limits (self, x_upper, x_lower)
     class(vamp2_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x_upper
     real(default), dimension(:), intent(in) :: x_lower
     integer :: ch
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%set_limits (x_upper, x_lower)
     end do
   end subroutine vamp2_set_limits
 
 @ %def vamp2_set_limits
 @ Set [[n_chains]] and the (actual) chains. [[chain]] must have size
 [[n_channels]] and each elements must store an index to a corresponding chain.
 This means, that channels with equal index correspond to the same chain, and we
 refer to those as chained weights, where we average the contributions of the
 chained weights in [[vamp2_adapt_weights]].
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: set_chain => vamp2_set_chain
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_set_chain (self, n_chains, chain)
+      class(vamp2_t), intent(inout) :: self
+      integer, intent(in) :: n_chains
+      integer, dimension(:), intent(in) :: chain
+    end subroutine vamp2_set_chain
 <<VAMP2: procedures>>=
-  subroutine vamp2_set_chain (self, n_chains, chain)
+  module subroutine vamp2_set_chain (self, n_chains, chain)
     class(vamp2_t), intent(inout) :: self
     integer, intent(in) :: n_chains
     integer, dimension(:), intent(in) :: chain
     if (size (chain) /= self%config%n_channel) then
        call msg_bug ("VAMP2: set chain: size of chain array does not match n_channel.")
     else
        call msg_message ("VAMP2: set chain: use chained weights.")
     end if
     self%config%n_chains = n_chains
     self%chain = chain
   end subroutine vamp2_set_chain
 
 @ %def vamp2_set_chain
 @ Set channel equivalences.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: set_equivalences => vamp2_set_equivalences
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_set_equivalences (self, equivalences)
+      class(vamp2_t), intent(inout) :: self
+      type(vamp2_equivalences_t), intent(in) :: equivalences
+    end subroutine vamp2_set_equivalences
 <<VAMP2: procedures>>=
-  subroutine vamp2_set_equivalences (self, equivalences)
+  module subroutine vamp2_set_equivalences (self, equivalences)
     class(vamp2_t), intent(inout) :: self
     type(vamp2_equivalences_t), intent(in) :: equivalences
     self%equivalences = equivalences
   end subroutine vamp2_set_equivalences
 
 @ %def vamp2_set_equivalences
 @ Move allocated (and prepared!) request object into VAMP2.
+Gfortran 7/8/9 bug, has to remain in the main module.
 <<MPI: VAMP2: vamp2: TBP>>=
   generic, public :: allocate_request => allocate_request_by_method, &
      allocate_request_by_object
   procedure, private :: allocate_request_by_method => vamp2_allocate_request_by_method
   procedure, private :: allocate_request_by_object => vamp2_allocate_request_by_object
-<<MPI: VAMP2: procedures>>=
+<<MPI: VAMP2: main procedures>>=
   subroutine vamp2_allocate_request_by_method (self, method)
     class(vamp2_t), intent(inout) :: self
     character(len=*), intent(in) :: method
     class(request_base_t), allocatable :: request
     select case (trim(method))
     case("simple", "Simple", "SIMPLE")
        allocate (request_simple_t :: request)
     case("load", "Load", "LOAD")
        allocate (request_caller_t :: request)
     case default
        call msg_bug ("VAMP2: Unknown method for MPI request module.")
     end select
     select type (request)
     type is (request_simple_t)
        call request%init (MPI_COMM_WORLD, n_channels = self%config%n_channel)
     type is (request_caller_t)
        call request%init (MPI_COMM_WORLD, n_channels = self%config%n_channel)
     end select
     call self%allocate_request_by_object (request)
   end subroutine vamp2_allocate_request_by_method
 
   subroutine vamp2_allocate_request_by_object (self, request)
     class(vamp2_t), intent(inout) :: self
     class(request_base_t), allocatable, intent(inout) :: request
     !! Only output in case of "parallel" integration.
     if (request%has_workers ()) then
        select type (request)
        type is (request_simple_t)
           call msg_message ("VAMP2: Simple Request Balancing.")
        type is (request_caller_t)
           call msg_message ("VAMP2: Request with load balancing.")
        class default
           call msg_bug ("VAMP2: Unknown extension of request_base.")
        end select
     end if
     call move_alloc (request, self%request)
   end subroutine vamp2_allocate_request_by_object
 
 @ %def vamp2_allocate_request
 @ Get [[n_calls]] calculated by [[VEGAS]].
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: get_n_calls => vamp2_get_n_calls
+<<VAMP2: sub interfaces>>=
+    elemental module function vamp2_get_n_calls (self) result (n_calls)
+      class(vamp2_t), intent(in) :: self
+      real(default) :: n_calls
+    end function vamp2_get_n_calls
 <<VAMP2: procedures>>=
-  elemental real(default) function vamp2_get_n_calls (self) result (n_calls)
+  elemental module function vamp2_get_n_calls (self) result (n_calls)
     class(vamp2_t), intent(in) :: self
+    real(default) :: n_calls
     n_calls = sum (self%integrator%get_calls ())
   end function vamp2_get_n_calls
 
 @ %def vamp2_get_n_calls
 @ Get the cumulative result of the integration. Recalculate weighted average of the integration.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: get_integral => vamp2_get_integral
+<<VAMP2: sub interfaces>>=
+    elemental module function vamp2_get_integral (self) result (integral)
+      class(vamp2_t), intent(in) :: self
+      real(default) :: integral
+    end function vamp2_get_integral
 <<VAMP2: procedures>>=
-  elemental real(default) function vamp2_get_integral (self) result (integral)
+  elemental module function vamp2_get_integral (self) result (integral)
     class(vamp2_t), intent(in) :: self
+    real(default) :: integral
     integral = 0.
     if (self%result%sum_wgts > 0.) then
        integral = self%result%sum_int_wgtd / self%result%sum_wgts
     end if
   end function vamp2_get_integral
 
 @ %def vamp2_get_integral
 @ Get the cumulative variance of the integration. Recalculate the variance.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: get_variance => vamp2_get_variance
+<<VAMP2: sub interfaces>>=
+    elemental module function vamp2_get_variance (self) result (variance)
+      class(vamp2_t), intent(in) :: self
+      real(default) :: variance
+    end function vamp2_get_variance
 <<VAMP2: procedures>>=
-  elemental real(default) function vamp2_get_variance (self) result (variance)
+  elemental module function vamp2_get_variance (self) result (variance)
     class(vamp2_t), intent(in) :: self
-    variance = 0.
+    real(default) :: variance
+    variance = 0
     if (self%result%sum_wgts > 0.) then
        variance = 1.0 / self%result%sum_wgts
     end if
   end function vamp2_get_variance
 
 @ %def vamp2_get_variance
 @ Get efficiency.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: get_efficiency => vamp2_get_efficiency
+<<VAMP2: sub interfaces>>=
+    elemental module function vamp2_get_efficiency (self) result (efficiency)
+      class(vamp2_t), intent(in) :: self
+      real(default) :: efficiency
+    end function vamp2_get_efficiency
 <<VAMP2: procedures>>=
-  elemental real(default) function vamp2_get_efficiency (self) result (efficiency)
+  elemental module function vamp2_get_efficiency (self) result (efficiency)
     class(vamp2_t), intent(in) :: self
+    real(default) :: efficiency
     efficiency = 0.
     if (self%result%efficiency > 0.) then
        efficiency = self%result%efficiency
     end if
   end function vamp2_get_efficiency
 
 @ %def vamp2_get_efficiency
 @ Get event weight and event weight excess.
 <<VAMP2: vamp2: TBP>>=
   procedure :: get_evt_weight => vamp2_get_evt_weight
   procedure :: get_evt_weight_excess => vamp2_get_evt_weight_excess
+<<VAMP2: sub interfaces>>=
+    module function vamp2_get_evt_weight (self) result (evt_weight)
+      class(vamp2_t), intent(in) :: self
+      real(default) :: evt_weight
+    end function vamp2_get_evt_weight
+    module function vamp2_get_evt_weight_excess (self) result (evt_weight_excess)
+      class(vamp2_t), intent(in) :: self
+      real(default) :: evt_weight_excess
+    end function vamp2_get_evt_weight_excess
 <<VAMP2: procedures>>=
-  real(default) function vamp2_get_evt_weight (self) result (evt_weight)
+  module function vamp2_get_evt_weight (self) result (evt_weight)
     class(vamp2_t), intent(in) :: self
+    real(default) :: evt_weight
     evt_weight = self%result%evt_weight
   end function vamp2_get_evt_weight
 
-  real(default) function vamp2_get_evt_weight_excess (self) result (evt_weight_excess)
+  module function vamp2_get_evt_weight_excess (self) result (evt_weight_excess)
     class(vamp2_t), intent(in) :: self
+    real(default) :: evt_weight_excess
     evt_weight_excess = self%result%evt_weight_excess
   end function vamp2_get_evt_weight_excess
 
 @ %def vamp2_get_evt_weight, vamp2_get_evt_weight_excess
 @ Get procedure to retrieve channel-th grid.
 <<VAMP2: vamp2: TBP>>=
   procedure :: get_grid => vamp2_get_grid
+<<VAMP2: sub interfaces>>=
+    module function vamp2_get_grid (self, channel) result (grid)
+      class(vamp2_t), intent(in) :: self
+      type(vegas_grid_t) :: grid
+      integer, intent(in) :: channel
+    end function vamp2_get_grid
 <<VAMP2: procedures>>=
-  type(vegas_grid_t) function vamp2_get_grid (self, channel) result (grid)
+  module function vamp2_get_grid (self, channel) result (grid)
     class(vamp2_t), intent(in) :: self
+    type(vegas_grid_t) :: grid
     integer, intent(in) :: channel
     if (channel < 1 .or. channel > self%config%n_channel) &
          call msg_bug ("VAMP2: vamp2_get_grid: channel index < 1 or > n_channel.")
     grid = self%integrator(channel)%get_grid ()
   end function vamp2_get_grid
 
 @ %def vamp2_get_grid
 @ Adapt. We adapt the weights due the contribution of variances with $\beta >
 0$.
 \begin{equation}
   \alpha_i = \frac{\alpha_i V_i^\beta}{\sum_i \alpha_i V_i^\beta}
 \end{equation}
-If [[n_calls_threshold]] is set, we rescale the weights in such a way, that the
-[[n_calls]] for each channel are greater than [[n_calls_threshold]].
-We calculate the distance of the weights to the [[weight_min]] and reset those weights
-which are less than [[weight_mins]] to this value. The other values are
-accordingly resized to fit the boundary condition of the partition of unity.
+If [[n_calls_threshold]] is set, we rescale the weights in such a way,
+that the [[n_calls]] for each channel are greater than
+[[n_calls_threshold]]. We calculate the distance of the weights to the
+[[weight_min]] and reset those weights which are less than
+[[weight_mins]] to this value. The other values are accordingly
+resized to fit the boundary condition of the partition of unity.
 <<VAMP2: vamp2: TBP>>=
   procedure, private :: adapt_weights => vamp2_adapt_weights
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_adapt_weights (self)
+      class(vamp2_t), intent(inout) :: self
+    end subroutine vamp2_adapt_weights
 <<VAMP2: procedures>>=
-  subroutine vamp2_adapt_weights (self)
+  module subroutine vamp2_adapt_weights (self)
     class(vamp2_t), intent(inout) :: self
     integer :: n_weights_underflow
     real(default) :: weight_min, sum_weights_underflow
     self%weight = self%weight * self%integrator%get_variance ()**self%config%beta
     if (sum (self%weight) == 0) self%weight = real(self%config%n_calls, default)
     if (self%config%n_chains > 0) then
        call chain_weights ()
     end if
     self%weight = self%weight / sum(self%weight)
     if (self%config%n_calls_threshold /= 0) then
        weight_min = real(self%config%n_calls_threshold, default) &
             & / self%config%n_calls
        sum_weights_underflow = sum (self%weight, self%weight < weight_min)
        n_weights_underflow = count (self%weight < weight_min)
        where (self%weight < weight_min)
           self%weight = weight_min
        elsewhere
           self%weight = self%weight * (1. - n_weights_underflow * weight_min) &
                & / (1. - sum_weights_underflow)
        end where
     end if
     call self%set_calls (self%config%n_calls)
   contains
   <<VAMP2: vamp2 adapt weights: procedures>>
   end subroutine vamp2_adapt_weights
 
 @ %def vamp2_adapt_weights
 @ We average the weights over their respective chain members.
 <<VAMP2: vamp2 adapt weights: procedures>>=
   subroutine chain_weights ()
     integer :: ch
     real(default) :: average
     do ch = 1, self%config%n_chains
        average = max (sum (self%weight, self%chain == ch), 0._default)
        if (average /= 0) then
           average = average / count (self%chain == ch)
           where (self%chain == ch)
              self%weight = average
           end where
        end if
     end do
   end subroutine chain_weights
 
 @ %def chain_weights
 <<VAMP2: vamp2: TBP>>=
   procedure, private :: apply_equivalences => vamp2_apply_equivalences
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_apply_equivalences (self)
+      class(vamp2_t), intent(inout) :: self
+    end subroutine vamp2_apply_equivalences
 <<VAMP2: procedures>>=
-  subroutine vamp2_apply_equivalences (self)
+  module subroutine vamp2_apply_equivalences (self)
     class(vamp2_t), intent(inout) :: self
     integer :: ch, ch_src, j, j_src, i_eqv
     real(default), dimension(:, :, :), allocatable :: d
     real(default), dimension(:, :), allocatable :: d_src
     integer, dimension(:), allocatable :: mode, perm
     if (.not. self%equivalences%is_allocated ()) then
        call msg_bug ("VAMP2: vamp2_apply_equivalences: &
             &cannot apply not-allocated equivalences.")
     end if
     allocate (d(self%config%n_bins_max, self%config%n_dim, &
          self%config%n_channel), source=0._default)
     associate (eqv => self%equivalences, nb => self%config%n_bins_max)
       do i_eqv = 1, self%equivalences%n_eqv
          call eqv%get_channels (i_eqv, ch, ch_src)
          d_src = self%integrator(ch_src)%get_distribution ()
          mode = eqv%get_mode (i_eqv)
          perm = eqv%get_perm (i_eqv)
          do j = 1, self%config%n_dim
             select case (mode (j))
             case (VEQ_IDENTITY)
                d(:, j, ch) = d(:, j, ch) + &
                     d_src(:, perm(j))
             case (VEQ_INVERT)
                d(:, j, ch) = d(:, j, ch) + &
                     d_src(nb:1:-1, perm(j))
             case (VEQ_SYMMETRIC)
                d(:, j, ch) = d(:, j, ch) + &
                     d_src(:, perm(j)) / 2. + &
                     d_src(nb:1:-1, perm(j)) / 2.
             case (VEQ_INVARIANT)
                d(:, j, ch) = 1._default
             end select
          end do
       end do
     end associate
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%set_distribution (d(:, :, ch))
     end do
   end subroutine vamp2_apply_equivalences
 
 @ %def vamp2_apply_equivalences
 @ Reset the cumulative result.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: reset_result => vamp2_reset_result
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_reset_result (self)
+      class(vamp2_t), intent(inout) :: self
+    end subroutine vamp2_reset_result
 <<VAMP2: procedures>>=
-  subroutine vamp2_reset_result (self)
+  module subroutine vamp2_reset_result (self)
     class(vamp2_t), intent(inout) :: self
     call self%result%reset ()
   end subroutine vamp2_reset_result
 
 @ %def vamp2_reset_result
 @ Integrate. We integrate each channel separately and combine the results
 \begin{align}
   I & = \sum_i \alpha_i I_i, \\
   \sigma^2 & = \sum_i \alpha_i^2 \sigma^2_i.
 \end{align}
 Although, the (population) variance is given by
 \begin{equation}
   \begin{split}
     \sigma^2 & = \frac{1}{N} \left( \sum_i \alpha_i I^2_i - I^2 \right) \\
     & = \frac{1}{N - 1} \left( \sum_i \left( N_i \sigma^2_i + I^2_i \right) -I^2
     \right) \\
     & = \frac{1}{N - 1} \left( \sum_i \alpha_i \sigma^2_i + \alpha_i I^2_i - I^2
     \right),
   \end{split}
 \end{equation}
 where we used $\sigma^2_i = \frac{1}{N} \left( \langle I^2_i \rangle - \langle
   I_i \rangle^2 \right)$, we use the approximation for numeric stability.
 The population variance relates to sample variance
 \begin{equation}
   s^2 = \frac{n}{n - 1} \sigma^2,
 \end{equation}
 which gives an unbiased error estimate.
 
 Beside those adaption to multichannel, the overall processing of
 [[total_integral]], [[total_sq_integral]] and [[total_variance]] is the same as
 in [[vegas_integrate]].
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: integrate => vamp2_integrate
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_integrate (self, func, rng, iterations, &
+         reset_result, refine_grids, adapt_weights, verbose, result, abserr)
+      class(vamp2_t), intent(inout) :: self
+      class(vamp2_func_t), intent(inout) :: func
+      class(rng_t), intent(inout) :: rng
+      integer, intent(in), optional :: iterations
+      logical, intent(in), optional :: reset_result
+      logical, intent(in), optional :: refine_grids
+      logical, intent(in), optional :: adapt_weights
+      logical, intent(in), optional :: verbose
+      real(default), optional, intent(out) :: result, abserr
+    end subroutine vamp2_integrate
 <<VAMP2: procedures>>=
-  subroutine vamp2_integrate (self, func, rng, iterations, reset_result,&
-       & refine_grids, adapt_weights, verbose, result, abserr)
+  module subroutine vamp2_integrate (self, func, rng, iterations, &
+       reset_result, refine_grids, adapt_weights, verbose, result, abserr)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     integer, intent(in), optional :: iterations
     logical, intent(in), optional :: reset_result
     logical, intent(in), optional :: refine_grids
     logical, intent(in), optional :: adapt_weights
     logical, intent(in), optional :: verbose
     real(default), optional, intent(out) :: result, abserr
     integer :: it, ch
     type(iterator_t) :: channel_iterator
     real(default) :: cumulative_int, cumulative_std
     logical :: opt_reset_result
     logical :: opt_adapt_weights
     logical :: opt_refine_grids
     logical :: opt_verbose
   <<VAMP2: vamp2 integrate: variables>>
     call set_options ()
     if (opt_verbose) then
        call msg_message ("Results: [it, calls, integral, error, chi^2, eff.]")
     end if
     if (opt_reset_result) call self%reset_result ()
   <<VAMP2: vamp2 integrate: init>>
     iteration: do it = 1, self%config%iterations
        call channel_iterator%init (1, self%config%n_channel)
        call self%prepare_integrate_iteration (func)
      <<VAMP2: vamp2 integrate: presampling>>
      channel: do
         <<VAMP2: vamp2 integrate: sampling>>
           call func%set_channel (ch)
           call self%integrator(ch)%integrate ( &
                & func, rng, iterations, refine_grid = .false., verbose = .false.)
         <<VAMP2: vamp2 integrate: callback>>
           call channel_iterator%next_step ()
        end do channel
      <<VAMP2: vamp2 integrate: postsampling>>
        call self%compute_result_and_efficiency ()
        associate (result => self%result)
          cumulative_int = result%sum_int_wgtd / result%sum_wgts
          cumulative_std = sqrt (1 / result%sum_wgts)
          if (opt_verbose) then
             write (msg_buffer, "(I0,1x,I0,1x, 4(E24.16E4,1x))") &
                  & it, self%config%n_calls, cumulative_int, cumulative_std, &
                  & result%chi2, result%efficiency
             call msg_message ()
          end if
        end associate
        if (opt_adapt_weights) then
           call self%adapt_weights ()
        end if
        if (opt_refine_grids) then
           if (self%config%equivalences .and. self%equivalences%is_allocated ()) then
              call self%apply_equivalences ()
           end if
           do ch = 1, self%config%n_channel
              !! When we apply the grid refinement outside of VEGAS, then we do not average over distribution
              !! as VEGAS averaged the distribution internally.
              call self%integrator(ch)%refine (average = .false.)
           end do
        end if
     end do iteration
     if (present (result)) result = cumulative_int
     if (present (abserr)) abserr = abs (cumulative_std)
   contains
   <<VAMP2: vamp2 integrate: procedures>>
   end subroutine vamp2_integrate
 
 @ %def vamp2_integrate
 @ Set optional parameters.
 <<VAMP2: vamp2 integrate: procedures>>=
   subroutine set_options ()
     if (present (iterations)) self%config%iterations = iterations
     opt_reset_result = .true.
     if (present (reset_result)) opt_reset_result = reset_result
     opt_adapt_weights = .true.
     if (present (adapt_weights)) opt_adapt_weights = adapt_weights
     opt_refine_grids = .true.
     if (present (refine_grids)) opt_refine_grids = refine_grids
     opt_verbose = .false.
     if (present (verbose)) opt_verbose = verbose
   end subroutine set_options
 
 @ %def set_options
 @
 We define additional chunks, which we use to insert parallel/MPI code.
 <<VAMP2: vamp2 integrate: variables>>=
 @
 <<VAMP2: vamp2 integrate: init>>=
 @
 <<VAMP2: vamp2 integrate: presampling>>=
 @ Conditional handling.
 We introduce a different behavior for the MPI-/non-MPI variant.
 <<NOMPI: VAMP2: vamp2 integrate: sampling>>=
   if (.not. channel_iterator%is_iterable ()) exit channel
   ch = channel_iterator%get_current ()
 @
 <<VAMP2: vamp2 integrate: callback>>=
 @
 <<VAMP2: vamp2 integrate: postsampling>>=
 @
 <<MPI: VAMP2: vamp2 integrate: variables>>=
   type(request_t) :: request
 @
 Verify that we have an allocated request object, else fallback to simple method.
 Be aware that we keep the fallback silent (!) in order to keep any possible testing output uninterrupted.
 <<MPI: VAMP2: vamp2 integrate: init>>=
   if (.not. allocated (self%request)) then
      call self%allocate_request_by_method ("simple")
   end if
 @
 <<MPI: VAMP2: vamp2 integrate: presampling>>=
   if (self%request%is_master ()) then
      select type (req => self%request)
      type is (request_caller_t)
         request%terminate = .true.
         call update_iter_and_rng (request, channel_iterator, rng)
         !! channel_iter is already drained for master.
         !! Do not descent into channel integration (later on).
         call req%handle_workload ()
      end select
   end if
 @ Conditional handling for the MPI-version of sampling.
 
 We have a request object and an advanced [[channel_iterator]] and have to consider three cases:
 \begin{enumerate}
 \item [[channel_iterator]] drained,
 \item [[request]] terminated,
 \item [[request]] received.
 \end{enumerate}
 When the [[channel_iterator]] is drained by [[update_iter_and_rng]], we send a terminate request to the master
 and await in the next cycle of channel loop the terminated request.
 When the request is terminated, we can gracefully exit the channel loop as [[channel_iterator]] and [[rng]] are already advanced by [[update_iter_and_rng]].
 When we received a genuine request, then, we proceed as prophisied.
 <<MPI: VAMP2: vamp2 integrate: sampling>>=
   select type (req => self%request)
   type is (request_caller_t)
      if (self%request%is_master ()) exit channel
   end select
   call self%request%request_workload (request)
   call update_iter_and_rng (request, channel_iterator, rng)
   if (request%terminate) then
      exit channel
   else if (.not. channel_iterator%is_iterable ()) then
      select type (req => self%request)
      type is (request_caller_t)
         call req%request_terminate ()
         cycle channel
      class is (request_base_t)
         exit channel
      end select
   end if
   if (request%group) call MPI_BARRIER (request%comm)
   ch = request%handler_id
   call self%integrator(ch)%prepare_parallel_integrate(request%comm, &
        duplicate_comm = .false., &
        parallel_mode = request%group)
 @
 <<MPI: VAMP2: vamp2 integrate: callback>>=
   if (request%group_master) then
      if (.not. self%request%is_master ())  &
           call allocate_handler (self%request, ch)
      call self%request%handle_and_release_workload (request)
   else
      call self%request%release_workload (request)
   end if
 @
 <<MPI: VAMP2: vamp2 integrate: postsampling>>=
   call reduce_func_calls (func)
   call self%request%await_handler ()
   call self%request%barrier ()
   if (.not. self%request%is_master ()) cycle
 @
 <<MPI: VAMP2: vamp2 integrate: procedures>>=
   subroutine allocate_handler (req, ch)
     class(request_base_t), intent(inout) :: req
     integer, intent(in) :: ch
     class(request_handler_t), pointer :: vegas_result_handler
     call self%integrator(ch)%allocate_handler (&
          handler_id = ch,&
          handler = vegas_result_handler)
     call req%add_handler (ch, vegas_result_handler)
   end subroutine allocate_handler
 
   !! Advance the random number generator for the skipped channels.
   !!
   !! We set current_channel = request%handler_id, hence, we need to advance
   !! the random number generator until th iterator returns the same channel.
   subroutine update_iter_and_rng (request, iter, rng)
     type(request_t), intent(in) :: request
     type(iterator_t), intent(inout) :: iter
     class(rng_t), intent(inout) :: rng
     advance: do while (iter%is_iterable ())
        !! Advance up to iterator%end when in terminate mode,
        !! else advance until we hit the previous channel (i.e. request%handler_id - 1):
        !! Proof: current_channel <= request%handler_id - 1
        if (.not. request%terminate) then
           if (iter%get_current () >= request%handler_id) &
                exit advance
        end if
        select type (rng)
        type is (rng_stream_t)
           call rng%next_substream ()
        end select
        call iter%next_step ()
     end do advance
   end subroutine update_iter_and_rng
 
   subroutine reduce_func_calls (func)
     class(vamp2_func_t), intent(inout) :: func
     type(MPI_COMM) :: comm
     integer :: root_n_calls
     call self%request%get_external_comm (comm)
     call MPI_reduce (func%n_calls, root_n_calls, 1, MPI_INTEGER, &
          MPI_SUM, 0, comm)
     if (self%request%is_master ()) then
        func%n_calls = root_n_calls
     else
        call func%reset_n_calls ()
     end if
   end subroutine reduce_func_calls
 @
 @ Prepeare current integration's iteration.
 
 Prepare iteration, i.e. provide weights and grids to function object.
+Gfortran 7/8/9 bug, only for the MPI version, has to remain in the
+main module:
 <<VAMP2: vamp2: TBP>>=
   procedure, private :: prepare_integrate_iteration => &
        vamp2_prepare_integrate_iteration
-<<VAMP2: procedures>>=
+<<VAMP2: main procedures>>=
   subroutine vamp2_prepare_integrate_iteration (self, func)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_func_t), intent(inout) :: func
   <<VAMP2: vamp2 prepare integrate iteration: body>>
     call fill_func_with_weights_and_grids (func)
   contains
     subroutine fill_func_with_weights_and_grids (func)
       class(vamp2_func_t), intent(inout) :: func
       integer :: ch
       do ch = 1, self%config%n_channel
          func%wi(ch) = self%weight(ch)
          !! \todo Use pointers instead of a deep copy.
          func%grids(ch) = self%integrator(ch)%get_grid ()
       end do
     end subroutine fill_func_with_weights_and_grids
   <<VAMP2: vamp2 prepare integrate iteration: procedures>>
   end subroutine vamp2_prepare_integrate_iteration
 
 @ %def vamp2_prepare_integrate_iteration
 <<VAMP2: vamp2 prepare integrate iteration: body>>=
 @
 <<VAMP2: vamp2 prepare integrate iteration: procedures>>=
 @
 <<MPI: VAMP2: vamp2 prepare integrate iteration: body>>=
   if (.not. allocated (self%request)) then
      call msg_bug ("VAMP2: prepare integration iteration failed: unallocated request.")
   end if
   call broadcast_weights_and_grids ()
   select type (req => self%request)
   type is (request_simple_t)
      call req%update (self%integrator%is_parallelizable ())
      call init_all_handler (req)
      call call_all_handler (req)
      !! Add all handlers, call all handlers.
   type is (request_caller_t)
      if (req%is_master ()) then
         call req%update_balancer (self%weight, self%integrator%is_parallelizable ())
         call init_all_handler (req)
      else
         call self%request%reset ()
      end if
   class default
      call msg_bug ("VAMP2: prepare integration iteration failed: unknown request type.")
   end select
 @
 <<MPI: VAMP2: vamp2 prepare integrate iteration: procedures>>=
   subroutine broadcast_weights_and_grids ()
     type(vegas_grid_t) :: grid
     type(MPI_COMM) :: comm
     integer :: ch
     call self%request%get_external_comm (comm)
     call MPI_BCAST (self%weight, self%config%n_channel, &
          MPI_DOUBLE_PRECISION, 0, comm)
     do ch = 1, self%config%n_channel
        grid = self%integrator(ch)%get_grid ()
        call grid%broadcast (comm)
        call self%integrator(ch)%set_grid (grid)
     end do
     call self%set_calls (self%config%n_calls)
   end subroutine broadcast_weights_and_grids
 
   subroutine init_all_handler (req)
     class(request_base_t), intent(inout) :: req
     class(request_handler_t), pointer :: vegas_result_handler
     integer :: ch
     !! The master worker needs always all handler (callback objects)
     !! in order to perform the communication to the client handler (callbacks).
     if (.not. req%is_master ()) return
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%allocate_handler (&
             handler_id = ch,&
             handler = vegas_result_handler)
        call req%add_handler (ch, vegas_result_handler)
     end do
   end subroutine init_all_handler
 
   subroutine call_all_handler (req)
     class(request_base_t), intent(inout) :: req
     integer :: ch
     if (.not. req%is_master ()) return
     do ch = 1, self%config%n_channel
        select type (req)
        type is (request_simple_t)
           call req%call_handler (handler_id = ch, &
                source_rank = req%get_request_master (ch))
        end select
     end do
   end subroutine call_all_handler
 @
 @ Compute the result and efficiency of the current status of the integrator.
 <<VAMP2: vamp2: TBP>>=
   procedure, private :: compute_result_and_efficiency => &
        vamp2_compute_result_and_efficiency
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_compute_result_and_efficiency (self)
+      class(vamp2_t), intent(inout) :: self
+    end subroutine vamp2_compute_result_and_efficiency
 <<VAMP2: procedures>>=
-  subroutine vamp2_compute_result_and_efficiency (self)
+  module subroutine vamp2_compute_result_and_efficiency (self)
     class(vamp2_t), intent(inout) :: self
     real(default) :: total_integral, total_variance
     real(default) :: max_abs_f_pos, max_abs_f_neg, &
          sum_abs_f_pos, sum_abs_f_neg
     call compute_integral_and_variance (total_integral, total_variance)
     call self%result%update (total_integral, total_variance)
     call compute_efficiency (max_pos = max_abs_f_pos, max_neg = max_abs_f_neg, &
          sum_pos = sum_abs_f_pos, sum_neg = sum_abs_f_neg)
     !! Do not average of number of calls, we have already averaged the efficiencies of all channels.
     call self%result%update_efficiency (n_calls  = 1, &
          max_pos = max_abs_f_pos, max_neg = max_abs_f_neg, &
          sum_pos = sum_abs_f_pos, sum_neg = sum_abs_f_neg)
   contains
     subroutine compute_integral_and_variance (integral, variance)
       real(default), intent(out) :: integral, variance
       real(default) :: sq_integral
       integral = dot_product (self%weight, self%integrator%get_integral ())
       sq_integral = dot_product (self%weight, self%integrator%get_integral ()**2)
       variance = self%config%n_calls * dot_product (self%weight**2, self%integrator%get_variance ())
       variance = sqrt (variance + sq_integral)
       variance = 1._default / self%config%n_calls * &
            & (variance + integral) * (variance - integral)
     end subroutine compute_integral_and_variance
 
     !> We compute the weight-averaged sum and maximum of the channel (integration) weights \f$w_{i,c}\f$.
     !!
     !! The averaged integration weight and maximum are
     !! \f[
     !!  \langle w \rangle = \sum_i \alpha_i \frac{\sum_j w_{i, j}}{N_i},
     !! \f]
     !! \f[
     !!  \langle \max w \rangle = \sum_i \alpha_i |\max_j w_{i, j}|.
     !! \f]
     subroutine compute_efficiency (max_pos, max_neg, &
          sum_pos, sum_neg)
       real(default), intent(out) :: max_pos, max_neg
       real(default), intent(out) :: sum_pos, sum_neg
       max_abs_f_pos = dot_product (self%weight, self%integrator%get_max_abs_f_pos ())
       max_abs_f_neg = dot_product (self%weight, self%integrator%get_max_abs_f_neg ())
       sum_abs_f_pos = dot_product (self%weight, &
            self%integrator%get_sum_abs_f_pos () / self%integrator%get_calls ())
       sum_abs_f_neg = dot_product (self%weight, &
            self%integrator%get_sum_abs_f_neg () / self%integrator%get_calls ())
     end subroutine compute_efficiency
   end subroutine vamp2_compute_result_and_efficiency
 
 @ %def vamp2_compute_result_and_efficiency
-@ Generate event from multi-channel weight $w(x) = f(x) / g(x)$.
+@ 
+Generate event from multi-channel weight $w(x) = f(x) / g(x)$.
 
-We select a channel using the a-priori weights and $f_{i}^{\text{max}}$, to
-flatten possible unbalanced channel weight(s).
+We select a channel using the a-priori weights and
+$f_{i}^{\text{max}}$, to flatten possible unbalanced channel
+weight(s).
 
-An additional rescale factor [[opt_event_rescale]] is applied to [[f_max]], iff set.
+An additional rescale factor [[opt_event_rescale]] is applied to
+[[f_max]], iff set.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: generate_weighted => vamp2_generate_weighted_event
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_generate_weighted_event (self, func, rng, x)
+      class(vamp2_t), intent(inout) :: self
+      class(vamp2_func_t), intent(inout) :: func
+      class(rng_t), intent(inout) :: rng
+      real(default), dimension(self%config%n_dim), intent(out)  :: x
+    end subroutine vamp2_generate_weighted_event
 <<VAMP2: procedures>>=
-  subroutine vamp2_generate_weighted_event (&
-       self, func, rng, x)
+  module subroutine vamp2_generate_weighted_event (self, func, rng, x)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out)  :: x
     integer :: ch, i
     real(default) :: r
     if (.not. self%event_prepared) then
        call prepare_event ()
     end if
     call rng%generate (r)
     nchannel: do ch = 1, self%config%n_channel
        r = r - self%event_weight(ch)
        if (r <= 0._default) exit nchannel
     end do nchannel
     ch = min (ch, self%config%n_channel)
     call func%set_channel (ch)
     call self%integrator(ch)%generate_weighted (func, rng, x)
     ! Norm weight by f_max, hidden in event_weight(ch), else by 1
     self%result%evt_weight = self%integrator(ch)%get_evt_weight () &
          * self%weight(ch) / self%event_weight(ch)
   contains
   <<VAMP2: vamp2 generate event: procedures>>
   end subroutine vamp2_generate_weighted_event
 
 @ %def vamp2_generate_weighted_event
 @ Generate unweighted events.
 
-After selecting a channel $ch$ by the inversion method using a random number $r \in [0, 1]$
+After selecting a channel $ch$ by the inversion method using a random
+number $r \in [0, 1]$
 \begin{align}
   F^{-1}(r) := \operatorname{inf} \{ c \in \{1, \dots, N_c\} | F(c) \geq r \},
   F(c) = \sum_{c^\prime \leq c} \alpha_c^\prime,
 \end{align}
 we try for an event from the previously selected channel.
 If the event is rejected, we also reject the selected channel.
 
-The inversion method is implemented as a loop over the channel weights \(\alpha_i\) until \(\sum_{c}^{c^prime} \alpha_c - r \leq 0\), the last value of the loop index [[ch]] is then \(c^\prime\).
+The inversion method is implemented as a loop over the channel weights
+\(\alpha_i\) until \(\sum_{c}^{c^prime} \alpha_c - r \leq 0\), the
+last value of the loop index [[ch]] is then \(c^\prime\).
 <<VAMP2: vamp2: TBP>>=
-  procedure, public :: generate_unweighted => vamp2_generate_unweighted_event
+  procedure, public :: generate_unweighted => &
+       vamp2_generate_unweighted_event
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_generate_unweighted_event (self, func, rng, &
+         x, opt_event_rescale)
+      class(vamp2_t), intent(inout) :: self
+      class(vamp2_func_t), intent(inout) :: func
+      class(rng_t), intent(inout) :: rng
+      real(default), dimension(self%config%n_dim), intent(out)  :: x
+      real(default), intent(in), optional :: opt_event_rescale
+    end subroutine vamp2_generate_unweighted_event
 <<VAMP2: procedures>>=
-  subroutine vamp2_generate_unweighted_event ( &
-       & self, func, rng, x, opt_event_rescale)
+  module subroutine vamp2_generate_unweighted_event (self, func, rng, &
+       x, opt_event_rescale)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out)  :: x
     real(default), intent(in), optional :: opt_event_rescale
     integer :: ch, i
     real(default) :: r, max_abs_f, event_rescale
     event_rescale = 1._default
     if (present (opt_event_rescale)) then
        event_rescale = opt_event_rescale
     end if
     if (.not. self%event_prepared) then
        call prepare_event ()
     end if
     generate: do
        call rng%generate (r)
        nchannel: do ch = 1, self%config%n_channel
           r = r - self%event_weight(ch)
           if (r <= 0._default) exit nchannel
        end do nchannel
        ch = min (ch, self%config%n_channel)
        call func%set_channel (ch)
        call self%integrator(ch)%generate_weighted (func, rng, x)
        self%result%evt_weight = self%integrator(ch)%get_evt_weight ()
        max_abs_f = merge ( &
             self%integrator(ch)%get_max_abs_f_pos (), &
             self%integrator(ch)%get_max_abs_f_neg (), &
             self%result%evt_weight > 0.)
        self%result%evt_weight_excess = 0._default
        if (self%result%evt_weight > max_abs_f) then
           self%result%evt_weight_excess = self%result%evt_weight / max_abs_f - 1._default
           exit generate
        end if
        call rng%generate (r)
        ! Do not use division, because max_abs_f could be zero.
        if (event_rescale * max_abs_f * r <= abs(self%result%evt_weight)) then
           exit generate
        end if
     end do generate
   contains
   <<VAMP2: vamp2 generate event: procedures>>
   end subroutine vamp2_generate_unweighted_event
 
 @ %def vamp2_generate_event
 Prepare event generation. We have to set the channel weights and the grids for
 the integrand's object.
 
 We use an ansatz proposed by T. Ohl in the original VAMP code where we do not have to accept on
 \begin{equation*}
    \frac{w_i(x)}{\sum_i \alpha_i \operatorname*{max}_{x} w_i(x)},
 \end{equation*}
 after we have selected a channel by the weights $\alpha_i$. But rather, we use a
 more efficient way where we rescale the channel weights $\alpha_i$
 \begin{equation*}
    \alpha_i \rightarrow \alpha_i \frac{\operatorname*{max}_x w_i(x)}{\sum_i \alpha_i \operatorname*{max}_{x} w_i(x)}.
 \end{equation*}
 The overall magic is to insert a "1" and to  move the uneasy part into the channel selection,
 such that we can generate events likewise in the single channel mode.
 We generate an unweighted event by
 \begin{equation*}
    \frac{w_i(x)}{\operatorname*{max}_{x} w_i{x}},
 \end{equation*}
 after we have selected a channel by the rescaled event channel weights.
 The overall normalization $\operatorname*{max}_{i, x}$ is not needed because we normalize the event channel weights
 to one and therefore the overall normalization cancels.
 <<VAMP2: vamp2 generate event: procedures>>=
   subroutine prepare_event ()
     integer :: i
     self%event_prepared = .false.
     do i = 1, self%config%n_channel
        func%wi(i) = self%weight(i)
        func%grids(i) = self%integrator(i)%get_grid ()
     end do
     if (any (self%integrator%get_max_abs_f () > 0)) then
        self%event_weight = self%weight * self%integrator%get_max_abs_f ()
     else
        self%event_weight = self%weight
     end if
     self%event_weight = self%event_weight / sum (self%event_weight)
     self%event_prepared = .true.
   end subroutine prepare_event
 
 @ %def prepare_event
 @ Write grids to unit.
 <<VAMP2: parameters>>=
   character(len=*), parameter, private :: &
      descr_fmt =         "(1X,A)", &
      integer_fmt =       "(1X,A18,1X,I15)", &
      integer_array_fmt = "(1X,I18,1X,I15)", &
      logical_fmt =       "(1X,A18,1X,L1)", &
      double_fmt =        "(1X,A18,1X,E24.16E4)", &
      double_array_fmt =  "(1X,I18,1X,E24.16E4)", &
      double_array_pac_fmt = "(1X,I18,1X,E16.8E4)", &
      double_array2_fmt = "(1X,2(1X,I8),1X,E24.16E4)", &
      double_array2_pac_fmt = "(1X,2(1X,I8),1X,E16.8E4)"
 @ %def descr_fmt integer_fmt integer_array_fmt logical_fmt
 @ %def double_fmt double_array_fmt double_array2_fmt
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: write_grids => vamp2_write_grids
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_write_grids (self, unit)
+      class(vamp2_t), intent(in) :: self
+      integer, intent(in), optional :: unit
+    end subroutine vamp2_write_grids
 <<VAMP2: procedures>>=
-  subroutine vamp2_write_grids (self, unit)
+  module subroutine vamp2_write_grids (self, unit)
     class(vamp2_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer :: u
     integer :: ch
     u = given_output_unit (unit)
     write (u, descr_fmt) "begin type(vamp2_t)"
     write (u, integer_fmt) "n_channel =", self%config%n_channel
     write (u, integer_fmt) "n_dim =", self%config%n_dim
     write (u, integer_fmt) "n_calls_min_ch =", self%config%n_calls_min_per_channel
     write (u, integer_fmt) "n_calls_thres =", self%config%n_calls_threshold
     write (u, integer_fmt) "n_chains =", self%config%n_chains
     write (u, logical_fmt) "stratified =", self%config%stratified
     write (u, double_fmt) "alpha =", self%config%alpha
     write (u, double_fmt) "beta =", self%config%beta
     write (u, integer_fmt) "n_bins_max =", self%config%n_bins_max
     write (u, integer_fmt) "iterations =", self%config%iterations
     write (u, integer_fmt) "n_calls =", self%config%n_calls
     write (u, integer_fmt) "it_start =", self%result%it_start
     write (u, integer_fmt) "it_num =", self%result%it_num
     write (u, integer_fmt) "samples =", self%result%samples
     write (u, double_fmt) "sum_int_wgtd =", self%result%sum_int_wgtd
     write (u, double_fmt) "sum_wgts =", self%result%sum_wgts
     write (u, double_fmt) "sum_chi =", self%result%sum_chi
     write (u, double_fmt) "chi2 =", self%result%chi2
     write (u, double_fmt) "efficiency =", self%result%efficiency
     write (u, double_fmt) "efficiency_pos =", self%result%efficiency_pos
     write (u, double_fmt) "efficiency_neg =", self%result%efficiency_neg
     write (u, double_fmt) "max_abs_f =", self%result%max_abs_f
     write (u, double_fmt) "max_abs_f_pos =", self%result%max_abs_f_pos
     write (u, double_fmt) "max_abs_f_neg =", self%result%max_abs_f_neg
     write (u, double_fmt) "result =", self%result%result
     write (u, double_fmt) "std =", self%result%std
     write (u, descr_fmt) "begin weight"
     do ch = 1, self%config%n_channel
        write (u, double_array_fmt) ch, self%weight(ch)
     end do
     write (u, descr_fmt) "end weight"
     if (self%config%n_chains > 0) then
        write (u, descr_fmt) "begin chain"
        do ch = 1, self%config%n_channel
           write (u, integer_array_fmt) ch, self%chain(ch)
        end do
        write (u, descr_fmt) "end chain"
     end if
     write (u, descr_fmt) "begin integrator"
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%write_grid (unit)
     end do
     write (u, descr_fmt) "end integrator"
     write (u, descr_fmt) "end type(vamp2_t)"
   end subroutine vamp2_write_grids
 
 @ %def vamp2_write_grids
 @ Read grids from unit.
 <<VAMP2: vamp2: TBP>>=
   procedure, public :: read_grids => vamp2_read_grids
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_read_grids (self, unit)
+      class(vamp2_t), intent(out) :: self
+      integer, intent(in), optional :: unit
+    end subroutine vamp2_read_grids
 <<VAMP2: procedures>>=
-  subroutine vamp2_read_grids (self, unit)
+  module subroutine vamp2_read_grids (self, unit)
     class(vamp2_t), intent(out) :: self
     integer, intent(in), optional :: unit
     integer :: u
     integer :: ibuffer, jbuffer, ch
     character(len=80) :: buffer
     read (unit, descr_fmt) buffer
     read (unit, integer_fmt) buffer, ibuffer
     read (unit, integer_fmt) buffer, jbuffer
     select type (self)
     type is (vamp2_t)
        self = vamp2_t (n_channel = ibuffer, n_dim = jbuffer)
     end select
     read (unit, integer_fmt) buffer, self%config%n_calls_min_per_channel
     read (unit, integer_fmt) buffer, self%config%n_calls_threshold
     read (unit, integer_fmt) buffer, self%config%n_chains
     read (unit, logical_fmt) buffer, self%config%stratified
     read (unit, double_fmt) buffer, self%config%alpha
     read (unit, double_fmt) buffer, self%config%beta
     read (unit, integer_fmt) buffer, self%config%n_bins_max
     read (unit, integer_fmt) buffer, self%config%iterations
     read (unit, integer_fmt) buffer, self%config%n_calls
     read (unit, integer_fmt) buffer, self%result%it_start
     read (unit, integer_fmt) buffer, self%result%it_num
     read (unit, integer_fmt) buffer, self%result%samples
     read (unit, double_fmt) buffer, self%result%sum_int_wgtd
     read (unit, double_fmt) buffer, self%result%sum_wgts
     read (unit, double_fmt) buffer, self%result%sum_chi
     read (unit, double_fmt) buffer, self%result%chi2
     read (unit, double_fmt) buffer, self%result%efficiency
     read (unit, double_fmt) buffer, self%result%efficiency_pos
     read (unit, double_fmt) buffer, self%result%efficiency_neg
     read (unit, double_fmt) buffer, self%result%max_abs_f
     read (unit, double_fmt) buffer, self%result%max_abs_f_pos
     read (unit, double_fmt) buffer, self%result%max_abs_f_neg
     read (unit, double_fmt) buffer, self%result%result
     read (unit, double_fmt) buffer, self%result%std
     read (unit, descr_fmt) buffer
     do ch = 1, self%config%n_channel
        read (unit, double_array_fmt) ibuffer, self%weight(ch)
     end do
     read (unit, descr_fmt) buffer
     if (self%config%n_chains > 0) then
        read (unit, descr_fmt) buffer
        do ch = 1, self%config%n_channel
           read (unit, integer_array_fmt) ibuffer, self%chain(ch)
        end do
        read (unit, descr_fmt) buffer
     end if
     read (unit, descr_fmt) buffer
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%read_grid (unit)
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
   end subroutine vamp2_read_grids
 
 @ %def vamp2_read_grids
 @ Read and write grids from an unformatted file.
 <<VAMP2: vamp2: TBP>>=
 procedure :: write_binary_grids => vamp2_write_binary_grids
 procedure :: read_binary_grids => vamp2_read_binary_grids
+<<VAMP2: sub interfaces>>=
+    module subroutine vamp2_write_binary_grids (self, unit)
+      class(vamp2_t), intent(in) :: self
+      integer, intent(in) :: unit
+    end subroutine vamp2_write_binary_grids
+    module subroutine vamp2_read_binary_grids (self, unit)
+      class(vamp2_t), intent(out) :: self
+      integer, intent(in) :: unit
+    end subroutine vamp2_read_binary_grids
 <<VAMP2: procedures>>=
-  subroutine vamp2_write_binary_grids (self, unit)
+  module subroutine vamp2_write_binary_grids (self, unit)
     class(vamp2_t), intent(in) :: self
     integer, intent(in) :: unit
     integer :: ch
     write (unit)
     write (unit) self%config%n_channel
     write (unit) self%config%n_dim
     write (unit) self%config%n_calls_min_per_channel
     write (unit) self%config%n_calls_threshold
     write (unit) self%config%n_chains
     write (unit) self%config%stratified
     write (unit) self%config%alpha
     write (unit) self%config%beta
     write (unit) self%config%n_bins_max
     write (unit) self%config%iterations
     write (unit) self%config%n_calls
     write (unit) self%result%it_start
     write (unit) self%result%it_num
     write (unit) self%result%samples
     write (unit) self%result%sum_int_wgtd
     write (unit) self%result%sum_wgts
     write (unit) self%result%sum_chi
     write (unit) self%result%chi2
     write (unit) self%result%efficiency
     write (unit) self%result%efficiency_pos
     write (unit) self%result%efficiency_neg
     write (unit) self%result%max_abs_f
     write (unit) self%result%max_abs_f_pos
     write (unit) self%result%max_abs_f_neg
     write (unit) self%result%result
     write (unit) self%result%std
     do ch = 1, self%config%n_channel
        write (unit) ch, self%weight(ch)
     end do
     if (self%config%n_chains > 0) then
        do ch = 1, self%config%n_channel
           write (unit) ch, self%chain(ch)
        end do
     end if
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%write_binary_grid (unit)
     end do
   end subroutine vamp2_write_binary_grids
 
-  subroutine vamp2_read_binary_grids (self, unit)
+  module subroutine vamp2_read_binary_grids (self, unit)
     class(vamp2_t), intent(out) :: self
     integer, intent(in) :: unit
     integer :: ch, ibuffer, jbuffer
     read (unit)
     read (unit) ibuffer
     read (unit) jbuffer
     select type (self)
     type is (vamp2_t)
        self = vamp2_t (n_channel = ibuffer, n_dim = jbuffer)
     end select
     read (unit) self%config%n_calls_min_per_channel
     read (unit) self%config%n_calls_threshold
     read (unit) self%config%n_chains
     read (unit) self%config%stratified
     read (unit) self%config%alpha
     read (unit) self%config%beta
     read (unit) self%config%n_bins_max
     read (unit) self%config%iterations
     read (unit) self%config%n_calls
     read (unit) self%result%it_start
     read (unit) self%result%it_num
     read (unit) self%result%samples
     read (unit) self%result%sum_int_wgtd
     read (unit) self%result%sum_wgts
     read (unit) self%result%sum_chi
     read (unit) self%result%chi2
     read (unit) self%result%efficiency
     read (unit) self%result%efficiency_pos
     read (unit) self%result%efficiency_neg
     read (unit) self%result%max_abs_f
     read (unit) self%result%max_abs_f_pos
     read (unit) self%result%max_abs_f_neg
     read (unit) self%result%result
     read (unit) self%result%std
     do ch = 1, self%config%n_channel
        read (unit) ibuffer, self%weight(ch)
     end do
     if (self%config%n_chains > 0) then
        do ch = 1, self%config%n_channel
           read (unit) ibuffer, self%chain(ch)
        end do
     end if
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%read_binary_grid (unit)
     end do
   end subroutine vamp2_read_binary_grids
 
 @ %def vamp2_write_binary_grids, vamp2_read_binary_grids
 @
 \section{Unit tests}
 \label{sec:unit-tests}
 
 Test module, followed by the corresponding implementation module.
 
 <<[[vamp2_ut.f90]]>>=
 <<File header>>
 
 module vamp2_ut
   use unit_tests
   use vamp2_uti
 
 <<Standard module head>>
 
 <<VAMP2: public test>>
 
 contains
 <<VAMP2: test driver>>
 end module vamp2_ut
 @ %def vamp2_ut
 @
 <<[[vamp2_uti.f90]]>>=
 <<File header>>
 
 module vamp2_uti
+
 <<Use kinds>>
   use io_units
   use constants, only: pi
   use numeric_utils, only: nearly_equal
   use format_defs, only: FMT_12
   use rng_base
   use rng_stream
   use vegas, only: vegas_func_t, vegas_grid_t, operator(==)
   use vamp2
 
 <<Standard module head>>
 
 <<VAMP2: test declaration>>
 
 <<VAMP2: test types>>
 
 contains
 <<VAMP2: tests>>
 end module vamp2_uti
 @ %def vamp2_uti
 @ API: driver for the unit tests below.
 <<VAMP2: public test>>=
   public :: vamp2_test
 <<VAMP2: test driver>>=
   subroutine vamp2_test (u, results)
     integer, intent(in) :: u
     type(test_results_t), intent(inout) :: results
     <<VAMP2: execute tests>>
   end subroutine vamp2_test
 
 @ %def vamp2_test
 @
 \subsubsection{Test function}
 \label{sec:test-function}
 
 We use the example from the Monte Carlo Examples of the GSL library
 \begin{equation}
   I = \int_{-pi}^{+pi} {dk_x/(2 pi)}  \int_{-pi}^{+pi} {dk_y/(2 pi)}  \int_{-pi}^{+pi} {dk_z/(2 pi)}  1 / (1 - cos(k_x)cos(k_y)cos(k_z)).
 \end{equation}
 The integral is reduced to region (0,0,0) $\rightarrow$ ($\pi$, $\pi$, $\pi$) and multiplied by 8.
 <<VAMP2: test types>>=
    type, extends (vamp2_func_t) :: vamp2_test_func_t
      !
    contains
    <<VAMP2: vamp2 test func: TBP>>
 end type vamp2_test_func_t
 
 @ %def vegas_test_func_t
 @
 <<VAMP2: vamp2 test func: TBP>>=
   procedure, public :: evaluate_maps => vamp2_test_func_evaluate_maps
 <<VAMP2: tests>>=
   subroutine vamp2_test_func_evaluate_maps (self, x)
     class(vamp2_test_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     self%xi(:, 1) = x
     self%det(1) = 1
     self%valid_x = .true.
   end subroutine vamp2_test_func_evaluate_maps
 
 @ %def vamp2_test_func_evaluate_maps
 @ Evaluate the integrand.
 <<VAMP2: vamp2 test func: TBP>>=
   procedure, public :: evaluate_func => vamp2_test_func_evaluate
 <<VAMP2: tests>>=
   real(default) function vamp2_test_func_evaluate (self, x) result (f)
     class(vamp2_test_func_t), intent(in) :: self
     real(default), dimension(:), intent(in) :: x
     f = 1.0 / (pi**3)
     f = f / ( 1.0 - cos (x(1)) * cos (x(2)) * cos (x(3)))
   end function vamp2_test_func_evaluate
 
 @ %def vamp2_test_func_evaluate
 @ The second test function implements
 \begin{equation}
   f(\vec{x}) = 4 \sin^{2}(\pi x_{1})\sin^{2}(\pi x_{2}) + 2\sin^2(\pi v),
 \end{equation}
 where
 \begin{align}
   x = u^{v} & y = u^{1 - v} \\
   u = xy & v = \frac{1}{2} \left( 1 + \frac{\log(x/y}{\log(xy)} \right).
 \end{align}
 The jacobian is $\frac{\partial (x, y)}{\partial (u, v)}$.
 
 <<VAMP2: test types>>=
   type, extends(vamp2_func_t) :: vamp2_test_func_2_t
      !
    contains
    <<VAMP2: vamp2 test func 2: TBP>>
   end type vamp2_test_func_2_t
 
 @ %def vamp2_test_func_2_t
 @ Evaluate maps.
 <<VAMP2: vamp2 test func 2: TBP>>=
   procedure :: evaluate_maps => vamp2_test_func_2_evaluate_maps
 <<VAMP2: tests>>=
   subroutine vamp2_test_func_2_evaluate_maps (self, x)
     class(vamp2_test_func_2_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     select case (self%current_channel)
     case (1)
        self%xi(:, 1) = x
        self%xi(1, 2) = x(1) * x(2)
        self%xi(2, 2) = 0.5 * ( 1. + log(x(1) / x(2)) / log(x(1) * x(2)))
     case (2)
        self%xi(1, 1) = x(1)**x(2)
        self%xi(2, 1) = x(1)**(1. - x(2))
        self%xi(:, 2) = x
     end select
     self%det(1) = 1.
     self%det(2) = abs (log(self%xi(1, 2)))
     self%valid_x = .true.
   end subroutine vamp2_test_func_2_evaluate_maps
 
 @ %def vamp2_test_func_2_evaluate_maps
 @ Evaluate func.
 <<VAMP2: vamp2 test func 2: TBP>>=
   procedure :: evaluate_func => vamp2_test_func_2_evaluate_func
 <<VAMP2: tests>>=
   real(default) function vamp2_test_func_2_evaluate_func (self, x) result (f)
     class(vamp2_test_func_2_t), intent(in) :: self
     real(default), dimension(:), intent(in) :: x
     f = 4. * sin(pi * self%xi(1, 1))**2 * sin(pi * self%xi(2, 1))**2 &
          + 2. * sin(pi * self%xi(2, 2))**2
   end function vamp2_test_func_2_evaluate_func
 
 @ %def vamp2_test_func_2_evaluate_func
 @ The third test function implements
 \begin{equation}
   f(\vec{x}) = 5 x_{1}^4 + 5 (1 - x_{1})^4,
 \end{equation}
 where
 \begin{equation}
   x_1 = u^{1 / 5} \quad \vee \quad  x_1 =  1 - v^{1 / 5}
 \end{equation}
 The jacobians are $\frac{\partial x_1}{\partial u} = \frac{1}{5}
 u^{-\frac{4}{5}}$ and $\frac{\partial x_1}{\partial v} = \frac{1}{5}
 v^{-\frac{4}{5}}$.
 <<VAMP2: test types>>=
   type, extends(vamp2_func_t) :: vamp2_test_func_3_t
      !
    contains
    <<VAMP2: vamp2 test func 3: TBP>>
   end type vamp2_test_func_3_t
 
 @ %def vamp2_test_func_3_t
 @ Evaluate maps.
 <<VAMP2: vamp2 test func 3: TBP>>=
   procedure :: evaluate_maps => vamp2_test_func_3_evaluate_maps
 <<VAMP2: tests>>=
   subroutine vamp2_test_func_3_evaluate_maps (self, x)
     class(vamp2_test_func_3_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     real(default) :: u, v, xx
     select case (self%current_channel)
     case (1)
        u = x(1)
        xx = u**0.2_default
        v = (1 - xx)**5._default
     case (2)
        v = x(1)
        xx = 1 - v**0.2_default
        u = xx**5._default
     end select
     self%det(1) = 0.2_default * u**(-0.8_default)
     self%det(2) = 0.2_default * v**(-0.8_default)
     self%xi(:, 1) = [u]
     self%xi(:, 2) = [v]
     self%valid_x = .true.
   end subroutine vamp2_test_func_3_evaluate_maps
 
 @ %def vamp2_test_func_3_evaluate_maps
 @ Evaluate func.
 <<VAMP2: vamp2 test func 3: TBP>>=
   procedure :: evaluate_func => vamp2_test_func_3_evaluate_func
 <<VAMP2: tests>>=
   real(default) function vamp2_test_func_3_evaluate_func (self, x) result (f)
     class(vamp2_test_func_3_t), intent(in) :: self
     real(default), dimension(:), intent(in) :: x
     real(default) :: xx
     select case (self%current_channel)
     case (1)
        xx = x(1)**0.2_default
     case (2)
        xx = 1 - x(1)**0.2_default
     end select
     f = 5 * xx**4 + 5 * (1 - xx)**4
   end function vamp2_test_func_3_evaluate_func
 
 @ %def vamp2_test_func_3_evaluate_func
 @
 \subsubsection{MC Integrator check}
 \label{sec:mc-integrator-check}
 
 We reproduce the first test case of VEGAS.
 Initialise the VAMP2 MC integrator and call to [[vamp2_init_grid]] for the initialisation of the grid.
 <<VAMP2: execute tests>>=
   call test (vamp2_1, "vamp2_1", "VAMP2 initialisation and&
        & grid preparation", u, results)
 <<VAMP2: test declaration>>=
   public :: vamp2_1
 <<VAMP2: tests>>=
   subroutine vamp2_1 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower = 0., &
          x_upper = pi
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vamp2_1"
     write (u, "(A)") "*   Purpose: initialise the VAMP2 MC integrator and the grid"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 1 and n_dim = 3"
     write (u, "(A)")
 
     allocate (vamp2_test_func_t :: func)
     call func%init (n_dim = 3, n_channel = 1)
     mc_integrator = vamp2_t (1, 3)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_1
 
 @ %def vamp2_1
 @ Integrate a function with two-dimensional argument and two channels.
 <<VAMP2: execute tests>>=
   call test (vamp2_2, "vamp2_2", "VAMP2 intgeration of two-dimensional &
        & function with two channels", u, results)
 <<VAMP2: test declaration>>=
   public :: vamp2_2
 <<VAMP2: tests>>=
   subroutine vamp2_2 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(2), parameter :: x_lower = 0., &
          x_upper = 1.
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vamp2_2"
     write (u, "(A)") "*   Purpose:  intgeration of two-dimensional &
        & function with two channels"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 1 and n_dim = 3"
     write (u, "(A)")
 
     allocate (vamp2_test_func_2_t :: func)
     call func%init (n_dim = 2, n_channel = 2)
     mc_integrator = vamp2_t (2, 2)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (1000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, verbose = .true., result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (200)
     call mc_integrator%integrate (func, rng, 3, verbose = .true., result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_2
 
 @ %def vamp2_2
 @ Integrate a function with two-dimensional argument and two channels.
 <<VAMP2: execute tests>>=
   call test (vamp2_3, "vamp2_3", "VAMP2 intgeration of two-dimensional &
        & function with two channels", u, results)
 <<VAMP2: test declaration>>=
   public :: vamp2_3
 <<VAMP2: tests>>=
   subroutine vamp2_3 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(2), parameter :: x_lower = 0., &
          x_upper = 1.
     real(default) :: result, abserr
     integer :: unit
 
     write (u, "(A)") "* Test output: vamp2_3"
     write (u, "(A)") "*   Purpose:  intgeration of two-dimensional &
        & function with two channels"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 1 and n_dim = 3"
     write (u, "(A)")
 
     allocate (vamp2_test_func_2_t :: func)
     call func%init (n_dim = 2, n_channel = 2)
     mc_integrator = vamp2_t (2, 2)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 20000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (20000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 20000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Write grid to file vamp2_3.grids"
     write (u, "(A)")
 
     unit = free_unit ()
     open (unit, file = "vamp2_3.grids", &
          action = "write", status = "replace")
     call mc_integrator%write_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Read grid from file vamp2_3.grids"
     write (u, "(A)")
 
     call mc_integrator%final ()
 
     unit = free_unit ()
     open (unit, file = "vamp2_3.grids", &
          action = "read", status = "old")
     call mc_integrator%read_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 5000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (5000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_3
 
 @ %def vamp2_3
 @ Integrate a function with two-dimensional argument and two channels.
 Use chained weights, although we average over each weight itself.
 <<VAMP2: execute tests>>=
   call test (vamp2_4, "vamp2_4", "VAMP2 intgeration of two-dimensional &
        & function with two channels with chains", u, results)
 <<VAMP2: test declaration>>=
   public :: vamp2_4
 <<VAMP2: tests>>=
   subroutine vamp2_4 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(2), parameter :: x_lower = 0., &
          x_upper = 1.
     real(default) :: result, abserr
     integer :: unit
 
     write (u, "(A)") "* Test output: vamp2_4"
     write (u, "(A)") "*   Purpose:  intgeration of two-dimensional &
        & function with two channels with chains"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 2 and n_dim = 2"
     write (u, "(A)")
 
     allocate (vamp2_test_func_2_t :: func)
     call func%init (n_dim = 2, n_channel = 2)
     mc_integrator = vamp2_t (2, 2)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 20000 and set chains"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (20000)
     call mc_integrator%set_chain (2, [1, 2])
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Write grid to file vamp2_4.grids"
     write (u, "(A)")
 
     unit = free_unit ()
     open (unit, file = "vamp2_4.grids", &
          action = "write", status = "replace")
     call mc_integrator%write_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Read grid from file vamp2_4.grids"
     write (u, "(A)")
 
     call mc_integrator%final ()
 
     unit = free_unit ()
     open (unit, file = "vamp2_4.grids", &
          action = "read", status = "old")
     call mc_integrator%read_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 5000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (5000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_4
 
 @ %def vamp2_4
 @
 <<VAMP2: execute tests>>=
   call test (vamp2_5, "vamp2_5", "VAMP2 intgeration of two-dimensional &
        & function with two channels with equivalences", u, results)
 <<VAMP2: test declaration>>=
   public :: vamp2_5
 <<VAMP2: tests>>=
   subroutine vamp2_5 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(1), parameter :: x_lower = 0., &
          x_upper = 1.
     real(default) :: result, abserr
     integer :: unit
     type(vamp2_config_t) :: config
     type(vamp2_equivalences_t) :: eqv
     type(vegas_grid_t), dimension(2) :: grid
 
     write (u, "(A)") "* Test output: vamp2_5"
     write (u, "(A)") "*   Purpose:  intgeration of two-dimensional &
        & function with two channels with equivalences"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 2 and n_dim = 1"
     write (u, "(A)")
 
     allocate (vamp2_test_func_3_t :: func)
     call func%init (n_dim = 1, n_channel = 2)
     config%equivalences = .true.
     mc_integrator = vamp2_t (n_channel = 2, n_dim = 1)
     call mc_integrator%set_config (config)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 20000 and set chains"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (20000)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise equivalences"
     write (u, "(A)")
 
     eqv = vamp2_equivalences_t (n_eqv = 4, n_channel = 2, n_dim = 1)
     call eqv%set_equivalence &
          (i_eqv = 1, dest = 2, src = 1, perm = [1], mode = [VEQ_IDENTITY])
     call eqv%set_equivalence &
          (i_eqv = 2, dest = 1, src = 2, perm = [1], mode = [VEQ_IDENTITY])
     call eqv%set_equivalence &
          (i_eqv = 3, dest = 1, src = 1, perm = [1], mode = [VEQ_IDENTITY])
     call eqv%set_equivalence &
          (i_eqv = 4, dest = 2, src = 2, perm = [1], mode = [VEQ_IDENTITY])
     call eqv%write (u)
     call mc_integrator%set_equivalences (eqv)
 
     write (u, "(A)")
     write (u, "(A)") &
          "* Integrate with n_it = 3 and n_calls = 10000 (Grid-only Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, &
          adapt_weights = .false., result=result, abserr=abserr)
     if (nearly_equal &
          (result, 2.000_default, rel_smallness = 0.003_default)) then
        write (u,  "(2x,A)") "Result: 2.000 [ok]"
     else
        write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ",A)") &
             "Result: ", result, " +/- ", abserr, " [not ok]"
     end if
 
     write (u, "(A)")
     write (u, "(A)") "* Compare the grids of both channels"
     write (u, "(A)")
 
     grid(1) = mc_integrator%get_grid(channel = 1)
     grid(2) = mc_integrator%get_grid(channel = 2)
 
     write (u, "(2X,A,1X,L1)") "Equal grids =", (grid(1) == grid(2))
 
     write (u, "(A)")
     write (u, "(A)") "* Write grid to file vamp2_5.grids"
     write (u, "(A)")
 
     unit = free_unit ()
     open (unit, file = "vamp2_5.grids", &
          action = "write", status = "replace")
     call mc_integrator%write_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 5000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (5000)
     call mc_integrator%integrate (func, rng, 3, adapt_weights = .false., &
          refine_grids = .false., result=result, abserr=abserr)
     if (nearly_equal &
          (result, 2.000_default, rel_smallness = 0.003_default)) then
        write (u,  "(2x,A)") "Result: 2.000 [ok]"
     else
        write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ",A)") &
             "Result: ", result, " +/- ", abserr, " [not ok]"
     end if
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_5
 
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{MPI Request}
 
 \begin{description}
 \item[Request] Direct interface for requests.
 \item[Balancer] Sublidiary interfaces and types for implementation
   into [[request]].
 \item[Callback] Direct interface for non-blocking communication in
   callback fashion used in subsidiary in [[request]].
 \end{description}
 
 \subsection{Request Base}
 
 \begin{description}
 \item[request] Container type for current request, contains handler
   index and status of request.
 \item[request group cache] Caching for commnunicator groups, tries to
   reduce the costly calls to [[MPI_CREATE_COMM_GROUP]].
 \item[request base] Base type for request handling, defines procedures
   and basic request scheme for direct implementation into [[VAMP2]].
 \end{description}
 
 When a data type has an associated communicator field, it always must
 duplicate the communicator in order to ensure communication
 encapsulation. Only in special case (e.g. outside of object
 communication is required) the parent communicator should be directly
 used. For example, that is the case for the group cache, as it exports
 its newly created communicator to the outside of the request library.
 
 <<[[request_base.f90]]>>=
 <<File header>>
 module request_base
-  use io_units
-
-  use diagnostics
 
   use balancer_base
   use request_callback
   use mpi_f08 !NODEP!
 
 <<Standard module head>>
 
 <<Request base: public>>
 
 <<Request base: types>>
 
 <<Request base: interfaces>>
 
+  interface
+<<Request base: sub interfaces>>
+  end interface
+
+end module request_base
+@ %def request_base
+@
+<<[[request_base_sub.f90]]>>=
+<<File header>>
+
+submodule (request_base) request_base_s
+
+  use io_units
+  use diagnostics
+
+  implicit none
+
 contains
 
 <<Request base: procedures>>
 
-end module request_base
-@ %def request_base
+end submodule request_base_s
+
+@ %def request_base_s
 @
 <<Request base: public>>=
   public :: request_t
 <<Request base: types>>=
   type :: request_t
      integer :: handler_id = 0
      logical :: terminate = .false.
      logical :: group = .false.
      logical :: group_master = .false.
      logical :: callback = .false.
      type(MPI_COMM) :: comm
   end type request_t
 
 @ %def request_t
 @
 <<Request base: types>>=
   type :: request_group_cache_t
      private
      type(MPI_COMM) :: parent_comm
      type(MPI_GROUP) :: parent_group
      type(MPI_COMM) :: comm
      type(MPI_GROUP) :: group
      integer, dimension(:), allocatable :: rank
    contains
    <<Request base: group cache: TBP>>
   end type request_group_cache_t
 
 @ %def request_group_cache_t
 @
 Base type for requesting and back calling.
 
 A short remark on the semantics of point-to-point communication (as
 its intended use in this implemenation): MPI is strict regarding the
 order of the messages (for blocking communication), as those can not
 overtake each other, p. 41. For non-blocking communication, the
 standard extends it such that the order depends on "the execution
 order of the calls that initiate the communication". The
 non-overtaking requirement is then extended to this definition of
 order. 
 
 Color method for tracking communication? We restrict each slave to a
 single callback (at once), which can communicate during the next
 request computation. Before handling the next (current) callback, the
 former one has to finish (with a wait call on the client-side). On the
 server-side, we only test on the finishing, reporting a communication
 failure. 
 
 Furthermore, the implementation has to take care that the order of
 the communication calls on the master and slave code always matches!
 
 Therefore, we need to secure the order of communication calls. First,
 we let the master initiate all callback communication *before*
 polling. This fixiates the order. Second, we require that the
 implementation of the polling honors this order.
 <<Request base: public>>=
   public :: request_base_t
 <<Request base: types>>=
   type, abstract :: request_base_t
      type(MPI_COMM) :: comm
      type(MPI_COMM) :: external_comm !! communicator for use outside of request, however, just a duplicate of comm.
      class(balancer_base_t), allocatable :: balancer
      type(request_group_cache_t) :: cache
      type(request_handler_manager_t) :: handler
    contains
    <<Request base: base: TBP>>
   end type request_base_t
 
 @ %def request_base_t
 @
 <<Request base: interfaces>>=
   !> The basic idea behind the request mechanism is that each associated worker can request a workload either from a predefined local stack, from local stealing or from a global queue.
   !!
   !! We note that it is not necessary to differentiate between master and worker on this level of abstraction.
   !! Hence, the request interface ignores any notion regarding a possible parallelization concept.
   abstract interface
      subroutine request_base_deferred_write (req, unit)
        import :: request_base_t
        class(request_base_t), intent(in) :: req
        integer, intent(in), optional :: unit
      end subroutine request_base_deferred_write
 
      !> Verify if request object has workers.
      !!
      !! An implementation shall return if there at least two workers, or otherwisely stated,
      !! one master and one slave at least, when both are used as computing ranks.
      logical function request_base_has_workers (req) result (flag)
        import :: request_base_t
        class(request_base_t), intent(in) :: req
      end function request_base_has_workers
   end interface
 
 @ %def request_base_has_workers
 @
 <<Request base: interfaces>>=
      !> Request workload and returns an request_t object.
      !!
      !! The request_t object has an associated handler_id and provide several ways
      !! to indicate whether the execution is to be terminated, or the request has an associated communictor.
      !! Finally, whether we expect that the handler id will be connected to an callback.
      !!
      !! \param[out] request Request container.
   abstract interface
      subroutine request_base_request_workload (req, request)
        import :: request_base_t, request_t
        class(request_base_t), intent(inout) :: req
        type(request_t), intent(out) :: request
      end subroutine request_base_request_workload
   end interface
 
 @ %def request_base_request_workload
 @
 <<Request base: interfaces>>=
      !> Release workload with the information from the request container.
      !!
      !! The release procedure may notify the master about the finishing of the workload associated with the handler_id.
      !! Or, it may just bookkeep whether the workload has finished.
      !! Additionally, if request%callback was true, it could handle the callback (from client side.)
   abstract interface
      subroutine request_base_release_workload (req, request)
        import :: request_base_t, request_t
        class(request_base_t), intent(inout) :: req
        type(request_t), intent(in) :: request
      end subroutine request_base_release_workload
   end interface
 
 @ %def request_base_release_workload
 @ 
 <<Request base: interfaces>>=
      !> Handle associated callback and release workload with the information from the request container.
      !!
      !! The procedure must call the associated callback handler using the handler_id.
      !! Remark: The callback manager is quite squishy regarding a missing handler (silent failure).
      !!         The procedure has to take care whether the callback was actually successful.
      !! The further release of the workload can then be deferred to the release_workload procedure.
      !! \param[in] request.
   abstract interface
      subroutine request_base_handle_and_release_workload (req, request)
        import :: request_base_t, request_t
        class(request_base_t), intent(inout) :: req
        type(request_t), intent(in) :: request
      end subroutine request_base_handle_and_release_workload
   end interface
 
 @ %def request_base_handle_and_release_workload
 @
 <<Request base: group cache: TBP>>=
   procedure :: init => request_group_cache_init
+<<Request base: sub interfaces>>=
+    module subroutine request_group_cache_init (cache, comm)
+      class(request_group_cache_t), intent(inout) :: cache
+      type(MPI_COMM), intent(in) :: comm
+    end subroutine request_group_cache_init
 <<Request base: procedures>>=
-  subroutine request_group_cache_init (cache, comm)
+  module subroutine request_group_cache_init (cache, comm)
     class(request_group_cache_t), intent(inout) :: cache
     type(MPI_COMM), intent(in) :: comm
     call MPI_COMM_DUP (comm, cache%parent_comm)
     !! Local operation.
     call MPI_COMM_GROUP (cache%parent_comm, cache%parent_group)
     cache%group = MPI_GROUP_EMPTY
     cache%comm = MPI_COMM_NULL
   end subroutine request_group_cache_init
 
 @ %def request_group_cache_init
 @
 <<Request base: group cache: TBP>>=
   procedure :: reset => request_group_cache_reset
+<<Request base: sub interfaces>>=
+    module subroutine request_group_cache_reset (cache)
+      class(request_group_cache_t), intent(inout) :: cache
+    end subroutine request_group_cache_reset
 <<Request base: procedures>>=
-  subroutine request_group_cache_reset (cache)
+  module subroutine request_group_cache_reset (cache)
     class(request_group_cache_t), intent(inout) :: cache
     cache%group = MPI_GROUP_EMPTY
     cache%comm = MPI_COMM_NULL
   end subroutine request_group_cache_reset
 
 @ %def request_group_cache_reset
 @
 <<Request base: group cache: TBP>>=
   procedure :: update => request_group_cache_update
+<<Request base: sub interfaces>>=
+    module subroutine request_group_cache_update (cache, tag, rank)
+      class(request_group_cache_t), intent(inout) :: cache
+      integer, intent(in) :: tag
+      integer, dimension(:), allocatable, intent(inout) :: rank
+    end subroutine request_group_cache_update
 <<Request base: procedures>>=
-  subroutine request_group_cache_update (cache, tag, rank)
+  module subroutine request_group_cache_update (cache, tag, rank)
     class(request_group_cache_t), intent(inout) :: cache
     integer, intent(in) :: tag
     integer, dimension(:), allocatable, intent(inout) :: rank
     type(MPI_GROUP) :: group
     integer :: result, error
     call move_alloc (rank, cache%rank)
     call MPI_GROUP_INCL (cache%parent_group, size (cache%rank), cache%rank, group)
     call MPI_GROUP_COMPARE (cache%group, group, result)
     if (result /= MPI_IDENT) then
        cache%group = group
        if (cache%comm /= MPI_COMM_NULL) call MPI_COMM_FREE (cache%comm)
        !! Group-local operation. However, time consuming.
        call MPI_COMM_CREATE_GROUP (cache%parent_comm, cache%group, tag, &
             cache%comm, error)
        if (error /= 0) then
           call msg_bug ("Error occured during communicator creation...")
        end if
     ! else
        ! call msg_message ("CACHE UPDATE: GROUPS ARE (NEARLY) IDENTICAL")
     end if
   end subroutine request_group_cache_update
 
 @ %def request_group_cache_update
 @
 <<Request base: group cache: TBP>>=
   procedure :: get_comm => request_group_cache_get_comm
+<<Request base: sub interfaces>>=
+    module subroutine request_group_cache_get_comm (cache, comm)
+      class(request_group_cache_t), intent(in) :: cache
+      type(MPI_COMM), intent(out) :: comm
+    end subroutine request_group_cache_get_comm
 <<Request base: procedures>>=
-  subroutine request_group_cache_get_comm (cache, comm)
+  module subroutine request_group_cache_get_comm (cache, comm)
     class(request_group_cache_t), intent(in) :: cache
     type(MPI_COMM), intent(out) :: comm
     comm = cache%comm
   end subroutine request_group_cache_get_comm
 
 @ %def request_group_cache_get_comm
 @
 <<Request base: group cache: TBP>>=
   procedure :: is_master => request_group_cache_is_master
+<<Request base: sub interfaces>>=
+    module function request_group_cache_is_master (cache) result (flag)
+      class(request_group_cache_t), intent(in) :: cache
+      integer :: rank, error
+      logical :: flag
+    end function request_group_cache_is_master
 <<Request base: procedures>>=
-  logical function request_group_cache_is_master (cache) result (flag)
+  module function request_group_cache_is_master (cache) result (flag)
     class(request_group_cache_t), intent(in) :: cache
     integer :: rank, error
+    logical :: flag
     call MPI_COMM_RANK (cache%comm, rank, error)
     if (error /= 0) then
        call msg_bug ("Error: Could not retrieve group rank.")
     end if
     flag = (rank == 0)
   end function request_group_cache_is_master
 
 @ %def request_group_cache_is_master
 @
 <<Request base: base: TBP>>=
   procedure :: base_init => request_base_init
+<<Request base: sub interfaces>>=
+    module subroutine request_base_init (req, comm)
+      class(request_base_t), intent(out) :: req
+      type(MPI_COMM), intent(in) :: comm
+    end subroutine request_base_init
 <<Request base: procedures>>=
   !! =================================================
   !! request_base_t
   !! =================================================
 
   !> Initialize request base with parent communicator.
   !!
   !! In order to separate the communication between different parts of the request library,
   !! duplicate the parent communicator using MPI_COMM_DUP, also done by cache and handler objects.
   !!
   !! \param[in] comm Parent MPI communicator for overall library.
-  subroutine request_base_init (req, comm)
+  module subroutine request_base_init (req, comm)
     class(request_base_t), intent(out) :: req
     type(MPI_COMM), intent(in) :: comm
     call MPI_COMM_DUP (comm, req%comm)
     call MPI_COMM_DUP (comm, req%external_comm)
     call req%cache%init (comm)
     call req%handler%init (comm)
   end subroutine request_base_init
 
 @ %def request_base_init
 @
 <<Request base: base: TBP>>=
   procedure :: base_write => request_base_write
   procedure(request_base_deferred_write), deferred :: write
+<<Request base: sub interfaces>>=
+    module subroutine request_base_write (req, unit)
+      class(request_base_t), intent(in) :: req
+      integer, intent(in), optional :: unit
+    end subroutine request_base_write
 <<Request base: procedures>>=
-  subroutine request_base_write (req, unit)
+  module subroutine request_base_write (req, unit)
     class(request_base_t), intent(in) :: req
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     if (allocated (req%balancer)) then
        call req%balancer%write (u)
     else
        write (u, "(A)") "[BALANCER]"
        write (u, "(A)") "=> Not allocated"
     end if
     call req%handler%write (u)
   end subroutine request_base_write
 
 @ %def request_base_write
 @
 <<Request base: base: TBP>>=
   procedure :: is_master => request_base_is_master
   procedure(request_base_has_workers), deferred :: has_workers
+<<Request base: sub interfaces>>=
+    module function request_base_is_master (req) result (flag)
+      class(request_base_t), intent(in) :: req
+      integer :: rank, ierr
+      logical :: flag
+    end function request_base_is_master
 <<Request base: procedures>>=
   !> Check whether current worker is master rank in object communicator.
   !!
   !! Do not confuse with a group's master !!!
   !! Proof: rank == 0
-  logical function request_base_is_master (req) result (flag)
+  module function request_base_is_master (req) result (flag)
     class(request_base_t), intent(in) :: req
     integer :: rank, ierr
+    logical :: flag
     call MPI_COMM_RANK (req%comm, rank, ierr)
     if (ierr /= 0) then
        write (*, "(A,1X,I0)") "MPI Error: request_base_is_master", ierr
        stop 1
     end if
     flag = (rank == 0)
   end function request_base_is_master
 
 @ %def request_base_is_master
 @
 <<Request base: base: TBP>>=
   procedure :: get_external_comm => request_base_get_external_comm
+<<Request base: sub interfaces>>=
+    module subroutine request_base_get_external_comm (req, comm)
+      class(request_base_t), intent(in) :: req
+      type(MPI_COMM), intent(out) :: comm
+    end subroutine request_base_get_external_comm
 <<Request base: procedures>>=
   !> Provide external communicator.
   !!
   !! The external communicator is just a duplicate of request%comm,
   !! in order to provide the same group of workers to external communication,
   !! however, in a different context, such that communication from outside does not interfere with request.
-  subroutine request_base_get_external_comm (req, comm)
+  module subroutine request_base_get_external_comm (req, comm)
     class(request_base_t), intent(in) :: req
     type(MPI_COMM), intent(out) :: comm
     comm = req%external_comm
   end subroutine request_base_get_external_comm
 
 @ %def request_base_get_external_comm
 @
 <<Request base: base: TBP>>=
   procedure :: add_balancer => request_base_add_balancer
+<<Request base: sub interfaces>>=
+    module subroutine request_base_add_balancer (req, balancer)
+      class(request_base_t), intent(inout) :: req
+      class(balancer_base_t), allocatable, intent(inout) :: balancer
+    end subroutine request_base_add_balancer
 <<Request base: procedures>>=
   !> Add balancer to request.
   !!
   !! \param[inout] balancer
-  subroutine request_base_add_balancer (req, balancer)
+  module subroutine request_base_add_balancer (req, balancer)
     class(request_base_t), intent(inout) :: req
     class(balancer_base_t), allocatable, intent(inout) :: balancer
     if (allocated (req%balancer)) deallocate (req%balancer)
     call move_alloc (balancer, req%balancer)
   end subroutine request_base_add_balancer
 
 @ %def request_base_add_balancer
 @
 <<Request base: base: TBP>>=
   procedure :: add_handler => request_base_add_handler
+<<Request base: sub interfaces>>=
+    module subroutine request_base_add_handler (req, handler_id, handler)
+      class(request_base_t), intent(inout) :: req
+      integer, intent(in) :: handler_id
+      class(request_handler_t), pointer, intent(in) :: handler
+    end subroutine request_base_add_handler
 <<Request base: procedures>>=
   !> Add request handler with handler_id.
   !!
   !! \param[in] handler_id
   !! \param[in] handler Pointer to handler object.
-  subroutine request_base_add_handler (req, handler_id, handler)
+  module subroutine request_base_add_handler (req, handler_id, handler)
     class(request_base_t), intent(inout) :: req
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer, intent(in) :: handler
     call req%handler%add (handler_id, handler)
   end subroutine request_base_add_handler
 
 @ %def request_base_add_handler
 @
 <<Request base: base: TBP>>=
   procedure :: reset => request_base_reset
+<<Request base: sub interfaces>>=
+    module subroutine request_base_reset (req, deallocate_balancer)
+      class(request_base_t), intent(inout) :: req
+      logical, intent(in), optional :: deallocate_balancer
+      logical :: flag
+    end subroutine request_base_reset
 <<Request base: procedures>>=
   !> Reset request.
   !! Clear handler manager from associated callbacks,
   !! deallocate balancer, iff allocated, and reset communicator cache.
-  subroutine request_base_reset (req, deallocate_balancer)
+  module subroutine request_base_reset (req, deallocate_balancer)
     class(request_base_t), intent(inout) :: req
     logical, intent(in), optional :: deallocate_balancer
     logical :: flag
     flag = .false.; if (present (deallocate_balancer)) &
          flag = deallocate_balancer
     if (flag .and. allocated (req%balancer)) then
        deallocate (req%balancer)
     end if
     call req%handler%clear ()
     call req%cache%reset ()
   end subroutine request_base_reset
 
 @ %def request_base_reset
 @
 <<Request base: base: TBP>>=
   procedure :: call_handler => request_base_call_handler
+<<Request base: sub interfaces>>=
+    module subroutine request_base_call_handler &
+         (req, handler_id, source_rank)
+      class(request_base_t), intent(inout) :: req
+      integer, intent(in) :: handler_id
+      integer, intent(in) :: source_rank
+    end subroutine request_base_call_handler
 <<Request base: procedures>>=
   !> Call handler for master communication for handler_id.
   !!
   !! \param[in] handler_id The associated key of the callback object.
   !! \param[in] source_rank The rank of the result's source.
-  subroutine request_base_call_handler (req, handler_id, source_rank)
+  module subroutine request_base_call_handler &
+       (req, handler_id, source_rank)
     class(request_base_t), intent(inout) :: req
     integer, intent(in) :: handler_id
     integer, intent(in) :: source_rank
     call req%handler%callback (handler_id, source_rank)
   end subroutine request_base_call_handler
 
 @ %def request_base_call_handler
 @
 <<Request base: base: TBP>>=
   procedure :: call_client_handler => request_base_call_client_handler
+<<Request base: sub interfaces>>=
+    module subroutine request_base_call_client_handler (req, handler_id)
+      class(request_base_t), intent(inout) :: req
+      integer, intent(in) :: handler_id
+    end subroutine request_base_call_client_handler
 <<Request base: procedures>>=
   !> Call handler for slave communication for handler_id.
   !!
   !! \param[in] handler_id The associated key of the callback object.
-  subroutine request_base_call_client_handler (req, handler_id)
+  module subroutine request_base_call_client_handler (req, handler_id)
     class(request_base_t), intent(inout) :: req
     integer, intent(in) :: handler_id
     call req%handler%client_callback (handler_id, 0)
   end subroutine request_base_call_client_handler
 
 @ %def request_base_call_client_handler
 @
 <<Request base: base: TBP>>=
   procedure :: await_handler => request_base_await_handler
+<<Request base: sub interfaces>>=
+    module subroutine request_base_await_handler (req)
+      class(request_base_t), intent(inout) :: req
+    end subroutine request_base_await_handler
 <<Request base: procedures>>=
   !> Wait on all handler in request handler manager to finish communication.
-  subroutine request_base_await_handler (req)
+  module subroutine request_base_await_handler (req)
     class(request_base_t), intent(inout) :: req
     call req%handler%waitall ()
   end subroutine request_base_await_handler
 
 @ %def request_base_await_handler
 @
 <<Request base: base: TBP>>=
   procedure :: barrier => request_base_barrier
   procedure(request_base_request_workload), deferred :: request_workload
   procedure(request_base_release_workload), deferred :: release_workload
   procedure(request_base_handle_and_release_workload), deferred :: handle_and_release_workload
+<<Request base: sub interfaces>>=
+    module subroutine request_base_barrier (req)
+      class(request_base_t), intent(in) :: req
+      integer :: error
+    end subroutine request_base_barrier
 <<Request base: procedures>>=
-  subroutine request_base_barrier (req)
+  module subroutine request_base_barrier (req)
     class(request_base_t), intent(in) :: req
     integer :: error
     call MPI_BARRIER (req%comm, error)
     if (error /= MPI_SUCCESS) then
        call msg_fatal ("Request: Error occured during MPI_BARRIER synchronisation.")
     end if
   end subroutine request_base_barrier
 
 @ %def request_base_barrier
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Request Simple}
 We implement a local request scheme, without any additional communication, except the callback communication.
 <<[[request_simple.f90]]>>=
 <<File header>>
 module request_simple
-  use io_units
-  use diagnostics
 
   use array_list
 
   use balancer_base
   use balancer_simple
   use request_base
 
   use mpi_f08 !NODEP!
 
 <<Standard module head>>
 
 <<Request simple: public>>
 
 <<Request simple: types>>
 
+  interface
+<<Request simple: sub interfaces>>
+  end interface
+
 contains
 
-<<Request simple: procedures>>
+<<Request simple: main procedures>>
 
 end module request_simple
 @ %def request_simple
 @
+<<[[request_simple_sub.f90]]>>=
+<<File header>>
+
+submodule (request_simple) request_simple_s
+
+  use io_units
+  use diagnostics
+
+  implicit none
+
+contains
+
+<<Request simple: procedures>>
+
+end submodule request_simple_s
+
+@ %def request_simple_s
+@
 <<Request simple: public>>=
   public :: request_simple_t
 <<Request simple: types>>=
   type, extends (request_base_t) :: request_simple_t
      integer :: n_workers = 0
      integer :: n_channels = 0
      logical, dimension(:), allocatable :: parallel_grid
    contains
    <<Request simple: simple: TBP>>
   end type request_simple_t
 
 @ %def request_simple_t
 @
+Gfortran 7/8/9 bug, has to remain in main module:
 <<Request simple: simple: TBP>>=
   procedure :: init => request_simple_init
-<<Request simple: procedures>>=
-  subroutine request_simple_init (req, comm, n_channels)
+<<Request simple: main procedures>>=
+  module subroutine request_simple_init (req, comm, n_channels)
     class(request_simple_t), intent(out) :: req
     type(MPI_COMM), intent(in) :: comm
     integer, intent(in) :: n_channels
     integer :: n_workers
     call req%base_init (comm)
     call MPI_COMM_SIZE (req%comm, req%n_workers)
     req%n_channels = n_channels
     allocate (req%parallel_grid (n_channels), source = .false.)
     call allocate_balancer ()
   contains
     subroutine allocate_balancer ()
       class(balancer_base_t), allocatable :: balancer
       allocate (balancer_simple_t :: balancer)
       select type (balancer)
       type is (balancer_simple_t)
          call balancer%init (n_workers = req%n_workers, n_resources = req%n_channels)
       end select
       call req%add_balancer (balancer)
     end subroutine allocate_balancer
   end subroutine request_simple_init
 
 @ %def request_simple_init
-@
+@ Update number of channels and parallel grids. The simple request
+object does not utilize the request balancer, as the complexity of the
+request balancer is not required for the simple approach. The simple
+approach assigns each worker several channel by a modular mapping from
+the set of workers $\left\{0,\ldots, N\right\}$ to the set of channels
+$\left\{1, \ldots , N_c \right\}$. Vetoing on those channel which have
+a parallel grid (check the definition in [[vegas.f90]]), where all
+workers are assigned.
+\begin{equation*}
+  w = \phi(c) = (c - 1) \mod{N}, \text{if not}\; P(c), \text{else}\;
+  \forall w \to c.
+\end{equation*}
+The information is stored in a dynamic-sized array list, which is
+filled, reversed and then used in a stack-like manner keeping track of
+the unassigned channels. Assigned and finished channels are then moved
+to the finished stack.
 <<Request simple: simple: TBP>>=
   procedure :: update => request_simple_update
+<<Request simple: sub interfaces>>=
+    module subroutine request_simple_update (req, parallel_grid)
+      class(request_simple_t), intent(inout) :: req
+      logical, dimension(:), intent(in) :: parallel_grid
+    end subroutine request_simple_update
 <<Request simple: procedures>>=
-  !> Update number of channels and parallel grids.
-  !!
-  !! The simple request object does not utilize the request balancer, as the complexity of the request balancer is not required for the simple approach.
-  !! The simple approach assigns each worker several channel by a modular mapping from the set of workers {0, …, N} to the set of channels {1, …, N_c}.
-  !! Vetoing on those channel which have a parallel grid (check the definition in vegas.f90), where all workers are assigned.
-  !!
-  !! w = \phi(c) = (c - 1) mod N, if not P(c), else \forall w to c.
-  !!
-  !! The information is stored in a dynamic-sized array list, which is filled, reversed and then used in a stack-like manner keeping track of the unassigned channels.
-  !! Assigned and finished channels are then moved to the finished stack.
-  subroutine request_simple_update (req, parallel_grid)
+  module subroutine request_simple_update (req, parallel_grid)
     class(request_simple_t), intent(inout) :: req
     logical, dimension(:), intent(in) :: parallel_grid
     integer :: me, worker
     call req%reset ()
     call MPI_COMM_RANK (req%comm, me)
     worker = SHIFT_RANK_TO_WORKER(me)
     select type (balancer => req%balancer)
     type is (balancer_simple_t)
        call balancer%update_state (worker, parallel_grid)
     end select
     req%parallel_grid = parallel_grid
   end subroutine request_simple_update
 
 @ %def request_simple_update
 @
 <<Request simple: simple: TBP>>=
   procedure :: write => request_simple_write
+<<Request simple: sub interfaces>>=
+    module subroutine request_simple_write (req, unit)
+      class(request_simple_t), intent(in) :: req
+      integer, intent(in), optional :: unit
+    end subroutine request_simple_write
 <<Request simple: procedures>>=
-  subroutine request_simple_write (req, unit)
+  module subroutine request_simple_write (req, unit)
     class(request_simple_t), intent(in) :: req
     integer, intent(in), optional :: unit
     integer :: u, n_size
     u = given_output_unit (unit)
     write (u, "(A)") "[REQUEST_SIMPLE]"
     write (u, "(A,1X,I0)") "N_CHANNELS", req%n_channels
     write (u, "(A,1X,I0)") "N_WORKERS", req%n_workers
     n_size = min (25, req%n_channels)
     write (u, "(A,25(1X,L1))") "PARALLEL_GRID", req%parallel_grid(:n_size)
     call req%base_write (u)
   end subroutine request_simple_write
 
 @ %def request_simple_write
 @
 <<Request simple: simple: TBP>>=
   procedure :: has_workers => request_simple_has_workers
+<<Request simple: sub interfaces>>=
+    module function request_simple_has_workers (req) result (flag)
+      class(request_simple_t), intent(in) :: req
+      logical :: flag
+    end function request_simple_has_workers
 <<Request simple: procedures>>=
-  logical function request_simple_has_workers (req) result (flag)
+  module function request_simple_has_workers (req) result (flag)
     class(request_simple_t), intent(in) :: req
+    logical :: flag
     flag = (req%n_workers > 1)
   end function request_simple_has_workers
 
 @ %def request_simple_has_workers
 @
 <<Request simple: simple: TBP>>=
   procedure :: get_request_master => request_simple_get_request_master
+<<Request simple: sub interfaces>>=
+    module function request_simple_get_request_master &
+         (req, channel) result (rank)
+      class(request_simple_t), intent(in) :: req
+      integer, intent(in) :: channel
+      integer :: rank
+    end function request_simple_get_request_master
 <<Request simple: procedures>>=
-  integer function request_simple_get_request_master (req, channel) &
-       result (rank)
+  module function request_simple_get_request_master &
+       (req, channel) result (rank)
     class(request_simple_t), intent(in) :: req
     integer, intent(in) :: channel
+    integer :: rank
     if (.not. allocated (req%balancer)) then
        call msg_bug ("Error: Balancer is not allocated.")
     end if
     rank = shift_worker_to_rank (req%balancer%get_resource_master (channel))
     !! "Caveat emptor" hits here:
     !! The balancer returns either a valid worker id or (-1) depending on the associated resource (it must be active...)
     !! We have to check whether returned worker index is plausible.
   end function request_simple_get_request_master
 
 @ %def request_simple_get_request_master
 @
 <<Request simple: simple: TBP>>=
   !! deferred.
   procedure :: request_workload => request_simple_request_workload
+<<Request simple: sub interfaces>>=
+    module subroutine request_simple_request_workload (req, request)
+      class(request_simple_t), intent(inout) :: req
+      type(request_t), intent(out) :: request
+    end subroutine request_simple_request_workload
 <<Request simple: procedures>>=
   !> Request workload.
   !!
   !! Depending on parallel_grid, we fill the request object differently.
   !! First, we do not set commnuicator for .not. parallel_grid (group and group master are set to .false., also).
   !! And the callback needs to be executed.
   !! Second, for parallel_grid, we set req%comm to the associated communicator and set group to .true..
   !! However, the overall master has the grid's result, therefore, only the master needs to the callback.
   !! Remark: We can actually intercept the callback for the master to himself; the results are already in the current position.
-  subroutine request_simple_request_workload (req, request)
+  module subroutine request_simple_request_workload (req, request)
     class(request_simple_t), intent(inout) :: req
     type(request_t), intent(out) :: request
     integer :: rank, worker_id
     call MPI_COMM_RANK (req%comm, rank)
     worker_id = shift_rank_to_worker (rank)
     if (.not. req%balancer%is_pending () &
          .or. .not. req%balancer%is_assignable (worker_id)) then
        request%terminate = .true.
        return
     end if
     call req%balancer%assign_worker (worker_id, request%handler_id)
     associate (channel => request%handler_id)
       if (req%parallel_grid (channel)) then
          request%comm = req%external_comm
          request%group = .true.
          !! The object communicator is master.
          request%group_master = &
               (req%get_request_master (channel) == rank)
          request%callback = req%is_master ()
       else
          request%comm = req%external_comm
          request%group = .false.
          request%group_master = .true.
          request%callback = .true.
       end if
     end associate
   end subroutine request_simple_request_workload
 
 @ %def request_simple_request_workload
 @
 <<Request simple: simple: TBP>>=
   procedure :: release_workload => request_simple_release_workload
+<<Request simple: sub interfaces>>=
+    module subroutine request_simple_release_workload (req, request)
+      class(request_simple_t), intent(inout) :: req
+      type(request_t), intent(in) :: request
+    end subroutine request_simple_release_workload
 <<Request simple: procedures>>=
-  subroutine request_simple_release_workload (req, request)
+  module subroutine request_simple_release_workload (req, request)
     class(request_simple_t), intent(inout) :: req
     type(request_t), intent(in) :: request
     integer :: rank, worker_id
     call MPI_COMM_RANK (req%comm, rank)
     worker_id = shift_rank_to_worker (rank)
     call req%balancer%free_worker (worker_id, request%handler_id)
   end subroutine request_simple_release_workload
 
 @ %def request_simple_release_workload
 @
 <<Request simple: simple: TBP>>=
-  procedure :: handle_and_release_workload => request_simple_handle_and_release_workload
+  procedure :: handle_and_release_workload => &
+       request_simple_handle_and_release_workload
+<<Request simple: sub interfaces>>=
+    module subroutine request_simple_handle_and_release_workload (req, request)
+      class(request_simple_t), intent(inout) :: req
+      type(request_t), intent(in) :: request
+    end subroutine request_simple_handle_and_release_workload
 <<Request simple: procedures>>=
-  subroutine request_simple_handle_and_release_workload (req, request)
+  module subroutine request_simple_handle_and_release_workload (req, request)
     class(request_simple_t), intent(inout) :: req
     type(request_t), intent(in) :: request
     call req%call_client_handler (request%handler_id)
     call req%release_workload (request)
   end subroutine request_simple_handle_and_release_workload
 
 @ %def request_simple_handle_and_release_workload
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Request Caller}
 
 We implement an global queue approach, requiring permanent
 communication between a governor and the worker.
 <<[[request_caller.f90]]>>=
 <<File header>>
 module request_caller
-  use kinds, only: default
-  use io_units
+
+<<Use kinds>>
   use diagnostics
 
   use request_base
   use balancer_base
   use balancer_channel
   use request_state
   use request_callback
 
   use mpi_f08 !NODEP!
 
 <<Standard module head>>
 
 <<Request caller: public>>
 
 <<Request caller: types>>
 
+  interface
+<<Request caller: sub interfaces>>
+  end interface
+
 contains
 
-<<Request caller: procedures>>
+<<Request caller: main procedures>>
 
 end module request_caller
 @ %def request_caller
 @
+<<[[request_caller_sub.f90]]>>=
+<<File header>>
+
+submodule (request_caller) request_caller_s
+
+  use io_units
+
+  implicit none
+
+contains
+
+<<Request caller: procedures>>
+
+end submodule request_caller_s
+
+@ %def request_caller_s
+@
 <<Request caller: public>>=
   public :: request_caller_t
 <<Request caller: types>>=
   type, extends (request_base_t):: request_caller_t
      private
      integer :: n_channels = 0
      integer :: n_workers = 0
      type(request_state_t) :: state
    contains
    <<Request caller: caller: TBP>>
   end type request_caller_t
 
 @ %def request_caller_t
 @
+Gfortran 7/8/9 bug, has to remain in the main module:
 <<Request caller: caller: TBP>>=
   procedure :: init => request_caller_init
-<<Request caller: procedures>>=
+<<Request caller: main procedures>>=
   subroutine request_caller_init (req, comm, n_channels)
     class(request_caller_t), intent(out) :: req
     type(MPI_COMM), intent(in) :: comm
     integer, intent(in) :: n_channels
     call req%base_init (comm)
     call MPI_COMM_SIZE (req%comm, req%n_workers)
     !! Exclude master rank (0) from set of workers.
     req%n_workers = req%n_workers - 1
     if (.not. req%has_workers ()) then
        call msg_warning ("Must not handle less than 3 ranks in a master/slave global queue.")
        call MPI_ABORT (req%comm, 1)
     end if
     req%n_channels = n_channels
     call req%state%init (comm, req%n_workers)
     if (req%is_master ()) then
        call allocate_balancer ()
     end if
   contains
     subroutine allocate_balancer ()
       class(balancer_base_t), allocatable :: balancer
       allocate (balancer_channel_t :: balancer)
       select type (balancer)
       type is (balancer_channel_t)
          call balancer%init (n_workers = req%n_workers, n_resources = req%n_channels)
       end select
       call req%add_balancer (balancer)
     end subroutine allocate_balancer
   end subroutine request_caller_init
 
 @ %def request_caller_init
 @
 <<Request caller: caller: TBP>>=
   procedure :: write => request_caller_write
+<<Request caller: sub interfaces>>=
+    module subroutine request_caller_write (req, unit)
+      class(request_caller_t), intent(in) :: req
+      integer, intent(in), optional :: unit
+    end subroutine request_caller_write
 <<Request caller: procedures>>=
-  subroutine request_caller_write (req, unit)
+  module subroutine request_caller_write (req, unit)
     class(request_caller_t), intent(in) :: req
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     write (u, "(A)") "[REQUEST_CALLER]"
     call req%base_write (u)
     if (req%is_master ()) &
        call req%state%write (u)
   end subroutine request_caller_write
 
 @ %def request_caller_write
 @
 <<Request caller: caller: TBP>>=
   procedure :: has_workers => request_caller_has_workers
+<<Request caller: sub interfaces>>=
+    module function request_caller_has_workers (req) result (flag)
+      class(request_caller_t), intent(in) :: req
+      logical :: flag
+    end function request_caller_has_workers
 <<Request caller: procedures>>=
-  logical function request_caller_has_workers (req) result (flag)
+  module function request_caller_has_workers (req) result (flag)
     class(request_caller_t), intent(in) :: req
+    logical :: flag
     !! n_workers excludes the master rank.
     flag = (req%n_workers > 1)
   end function request_caller_has_workers
 
 @ %def request_caller_has_workers
 @
 <<Request caller: caller: TBP>>=
   procedure :: update_balancer => request_caller_update_balancer
+<<Request caller: sub interfaces>>=
+    module subroutine request_caller_update_balancer (req, weight, parallel_grid)
+      class(request_caller_t), intent(inout) :: req
+      real(default), dimension(:), intent(in) :: weight
+      logical, dimension(:), intent(in) :: parallel_grid
+    end subroutine request_caller_update_balancer
 <<Request caller: procedures>>=
-  subroutine request_caller_update_balancer (req, weight, parallel_grid)
+  module subroutine request_caller_update_balancer (req, weight, parallel_grid)
     class(request_caller_t), intent(inout) :: req
     real(default), dimension(:), intent(in) :: weight
     logical, dimension(:), intent(in) :: parallel_grid
     !! \note bug if not allocated?
     if (.not. allocated (req%balancer)) return
     call req%state%reset ()
     call req%reset ()
     select type (balancer => req%balancer)
     type is (balancer_channel_t)
        call balancer%update_state(weight, parallel_grid)
     end select
   end subroutine request_caller_update_balancer
 
 @ %def request_caller_update_balancer
 @
 <<Request caller: caller: TBP>>=
   procedure :: handle_workload => request_caller_handle_workload
+<<Request caller: sub interfaces>>=
+    module subroutine request_caller_handle_workload (req)
+      class(request_caller_t), intent(inout) :: req
+    end subroutine request_caller_handle_workload
 <<Request caller: procedures>>=
-  subroutine request_caller_handle_workload (req)
+  module subroutine request_caller_handle_workload (req)
     class(request_caller_t), intent(inout) :: req
     integer :: handler, tag, source, worker_id
     if (.not. allocated (req%balancer)) then
        call msg_warning ("Request: Error occured, load balancer not allocated.&
           & Terminate all workers.")
        !! We postpone to stop the program here so we can terminate all workers gracefully.
        !! First, we receive their requests, then we overwrite their "original" tag to MPI_TAG_TERMINATE.
        !! Second, we iterate this, until all workers are terminated and return without doing any besides.
     end if
     call req%state%init_request ()
     call req%state%receive_request ()
     do while (.not. req%state%is_terminated ())
        call req%state%await_request ()
        do while (req%state%has_request ())
           call req%state%get_request (source, tag, handler)
           !! Formally differentiate between worker_id and source.
           worker_id = source
           if (.not. allocated (req%balancer)) tag = MPI_TAG_TERMINATE
           select case (tag)
           case (MPI_TAG_REQUEST)
              if (req%balancer%is_assignable (worker_id)) then
                 call req%balancer%assign_worker (worker_id, handler)
                 if (.not. req%balancer%has_resource_group (handler)) then
                    call req%state%update_request (source, MPI_TAG_ASSIGN_SINGLE, handler)
                 else
                    call req%state%update_request (source, MPI_TAG_ASSIGN_GROUP, handler)
                    call provide_request_group (handler, source)
                 end if
              else
                 call req%state%terminate (source)
              end if
           case (MPI_TAG_HANDLER_AND_RELEASE)
              call req%call_handler (handler, source_rank = source)
              call req%balancer%free_worker (worker_id, handler)
           case (MPI_TAG_RELEASE)
              call req%balancer%free_worker (worker_id, handler)
           case (MPI_TAG_TERMINATE)
              call req%state%terminate (source)
           case (MPI_TAG_CLIENT_TERMINATE)
              !! Allow workers to request their own termination.
              call req%state%terminate (source)
           case default
              call msg_warning ()
           end select
        end do
        call req%state%receive_request ()
     end do
     !! If we are here, there should be no leftover communnication.
     !! Hence, we must check whether there is no left-over communication call (from server-side).
     call req%state%free_request ()
   contains
     subroutine provide_request_group (handler_id, dest_rank)
       integer, intent(in) :: handler_id
       integer, intent(in) :: dest_rank
       integer, dimension(:), allocatable :: rank
       !! Rank indices and worker indices are identical, as we skip the master worker deliberately,
       !! we can reuse the worker indices as rank indices.
       call req%balancer%get_resource_group (handler_id, rank)
       call req%state%provide_request_group (dest_rank, rank)
     end subroutine provide_request_group
   end subroutine request_caller_handle_workload
 
 @ %def request_caller_handle_workload
 @
 <<Request caller: caller: TBP>>=
   procedure :: request_workload => request_caller_request_workload
+<<Request caller: sub interfaces>>=
+    module subroutine request_caller_request_workload (req, request)
+      class(request_caller_t), intent(inout) :: req
+      type(request_t), intent(out) :: request
+    end subroutine request_caller_request_workload
 <<Request caller: procedures>>=
-  subroutine request_caller_request_workload (req, request)
+  module subroutine request_caller_request_workload (req, request)
     class(request_caller_t), intent(inout) :: req
     type(request_t), intent(out) :: request
     type(MPI_STATUS) :: status
     call req%state%client_serve (request%handler_id, status)
     request%terminate = .false.
     request%group = .false.
     request%callback = .false.
     request%comm = MPI_COMM_NULL
     select case (status%MPI_TAG)
     case (MPI_TAG_ASSIGN_SINGLE)
        !! Default to req's communicator.
        request%comm = req%external_comm
        request%group_master = .true.
        request%callback = .true.
     case (MPI_TAG_ASSIGN_GROUP)
        request%group = .true.
        call retrieve_request_group (request%handler_id)
        call req%cache%get_comm (request%comm)
        request%group_master = req%cache%is_master ()
        request%callback = req%cache%is_master ()
     case (MPI_TAG_TERMINATE)
        request%terminate = status%MPI_TAG == MPI_TAG_TERMINATE
     end select
   contains
     subroutine retrieve_request_group (handler_id)
       integer, intent(in) :: handler_id
       integer, dimension(:), allocatable :: rank
       !! Here, worker and rank indices are interchangeable.
       call req%state%retrieve_request_group (rank)
       call req%cache%update (handler_id, rank)
     end subroutine retrieve_request_group
   end subroutine request_caller_request_workload
 
 @ %def request_caller_request_workload
 @
 <<Request caller: caller: TBP>>=
   procedure :: release_workload => request_caller_release_workload
+<<Request caller: sub interfaces>>=
+    module subroutine request_caller_release_workload (req, request)
+      class(request_caller_t), intent(inout) :: req
+      type(request_t), intent(in) :: request
+    end subroutine request_caller_release_workload
 <<Request caller: procedures>>=
-  subroutine request_caller_release_workload (req, request)
+  module subroutine request_caller_release_workload (req, request)
     class(request_caller_t), intent(inout) :: req
     type(request_t), intent(in) :: request
-    call req%state%client_free (request%handler_id, has_callback = request%group_master)
+    call req%state%client_free (request%handler_id, &
+         has_callback = request%group_master)
   end subroutine request_caller_release_workload
 
 @ %def request_caller_release_workload
 @
 <<Request caller: caller: TBP>>=
-  procedure :: handle_and_release_workload => request_caller_handle_and_release_workload
+  procedure :: handle_and_release_workload => &
+       request_caller_handle_and_release_workload
+<<Request caller: sub interfaces>>=
+    module subroutine request_caller_handle_and_release_workload (req, request)
+      class(request_caller_t), intent(inout) :: req
+      type(request_t), intent(in) :: request
+    end subroutine request_caller_handle_and_release_workload
 <<Request caller: procedures>>=
-  subroutine request_caller_handle_and_release_workload (req, request)
+  module subroutine request_caller_handle_and_release_workload (req, request)
     class(request_caller_t), intent(inout) :: req
     type(request_t), intent(in) :: request
     if (.not. req%handler%has_handler (request%handler_id)) then
        call msg_bug ("Request: Handler is not registered for this worker.")
     end if
     call req%release_workload (request)
     call req%call_client_handler (request%handler_id)
   end subroutine request_caller_handle_and_release_workload
 
 @ %def request_caller_handle_and_release_workload
 @
 <<Request caller: caller: TBP>>=
   procedure :: request_terminate => request_caller_request_terminate
+<<Request caller: sub interfaces>>=
+    module subroutine request_caller_request_terminate (req)
+      class(request_caller_t), intent(inout) :: req
+    end subroutine request_caller_request_terminate
 <<Request caller: procedures>>=
-  subroutine request_caller_request_terminate (req)
+  module subroutine request_caller_request_terminate (req)
     class(request_caller_t), intent(inout) :: req
     if (req%is_master ()) return
     call req%state%client_terminate ()
   end subroutine request_caller_request_terminate
 
 @ %def request_caller_request_terminate
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsubsection{Request Caller State}
 
 We implement the complete communication for [[request_caller]] into a
 state (or in WK's nomenclature, instance) object.
 <<[[request_state.f90]]>>=
 <<File header>>
 module request_state
+
   use, intrinsic :: iso_fortran_env, only: ERROR_UNIT
   use iterator
-  use diagnostics
 
   use mpi_f08 !NODEP!
 
 <<Standard module head>>
 
 <<Request state: public>>
 
 <<Request state: parameters>>
 
 <<Request state: types>>
 
+  interface
+<<Request state: sub interfaces>>
+  end interface
+
+end module request_state
+@ %def request_state
+@
+<<[[request_state_sub.f90]]>>=
+<<File header>>
+
+submodule (request_state) request_state_s
+
+  use diagnostics
+  
+  implicit none
+
 contains
 
 <<Request state: procedures>>
 
-end module request_state
-@ %def request_state
+end submodule request_state_s
+
+@ %def request_state_s
 @
 <<Request state: parameters>>=
   integer, parameter, public :: MPI_EMPTY_HANDLER = 0
 
   integer, parameter, public :: MPI_TAG_NULL = 0, &
        MPI_TAG_REQUEST = 1, &
        MPI_TAG_RELEASE = 2, &
        MPI_TAG_HANDLER_AND_RELEASE = 4, &
        MPI_TAG_TERMINATE = 8, &
        MPI_TAG_CLIENT_TERMINATE = 16, &
        MPI_TAG_ASSIGN_SINGLE = 32, &
        MPI_TAG_ASSIGN_GROUP = 64, &
        MPI_TAG_COMMUNICATOR_GROUP = 128
 
   integer, parameter :: MPI_STATE_ERR = 1
 
 @
 <<Request state: public>>=
   public :: request_state_t
 <<Request state: types>>=
   type :: request_state_t
      private
      type(MPI_COMM) :: comm
      integer :: n_workers = 0
      integer :: n_workers_done = 0
      !! From MPI-3.1 book
      !! i \in {1, N_workes_done}, max size = N_workers
      type(MPI_Request), dimension(:), allocatable :: request
      type(MPI_Status), dimension(:), allocatable :: status
      integer, dimension(:), allocatable :: indices
      !! i \in {1, N_workers}
      integer, dimension(:), allocatable :: handler
      logical, dimension(:), allocatable :: terminated
      type(iterator_t) :: request_iterator
    contains
    <<Request state: state: TBP>>
   end type request_state_t
 
 @ %def request_state_t
 @
 <<Request state: state: TBP>>=
   procedure :: init => request_state_init
+<<Request state: sub interfaces>>=
+    module subroutine request_state_init (state, comm, n_workers)
+      class(request_state_t), intent(out) :: state
+      type(MPI_COMM), intent(in) :: comm
+      integer, intent(in) :: n_workers
+    end subroutine request_state_init
 <<Request state: procedures>>=
-  subroutine request_state_init (state, comm, n_workers)
+  module subroutine request_state_init (state, comm, n_workers)
     class(request_state_t), intent(out) :: state
     type(MPI_COMM), intent(in) :: comm
     integer, intent(in) :: n_workers
     integer :: rank
     call MPI_COMM_DUP (comm, state%comm)
     state%n_workers = n_workers
     state%n_workers_done = n_workers
     call state%request_iterator%init (1, n_workers)
     allocate (state%request(state%n_workers), source = MPI_REQUEST_NULL)
     allocate (state%status(state%n_workers), source = MPI_STATUS_IGNORE)
     allocate (state%handler(state%n_workers), source = MPI_EMPTY_HANDLER)
     allocate (state%indices(state%n_workers), source = 0)
     allocate (state%terminated(state%n_workers), source = .false.)
     state%indices = [(rank, rank = 1, n_workers)]
   end subroutine request_state_init
 
 @ %def request_state_init
 @
 <<Request state: state: TBP>>=
   procedure :: write => request_state_write
+<<Request state: sub interfaces>>=
+    module subroutine request_state_write (state, unit)
+      class(request_state_t), intent(in) :: state
+      integer, intent(in), optional :: unit
+    end subroutine request_state_write
 <<Request state: procedures>>=
-  subroutine request_state_write (state, unit)
+  module subroutine request_state_write (state, unit)
     class(request_state_t), intent(in) :: state
     integer, intent(in), optional :: unit
     integer :: u, i
     u = ERROR_UNIT; if (present (unit)) u = unit
     write (u, "(A)") "[REQUEST_STATE]"
     write (u, "(A,1X,I0)") "N_WORKERS", state%n_workers
     write (u, "(A,1X,I0)") "N_WORKERS_DONE", state%n_workers_done
     write (u, "(A)") "RANK | SOURCE | TAG | ERROR | REQUEST_NULL"
     do i = 1, state%n_workers_done
        write (u, "(A,4(1X,I0),1X,L1)") "REQUEST", state%indices(i), &
             state%status(i)%MPI_SOURCE, &
             state%status(i)%MPI_TAG, &
             state%status(i)%MPI_ERROR, &
             (state%request(i) == MPI_REQUEST_NULL)
     end do
     write (u, "(A,999(1X,I0))") "HANDLER", state%handler
     write (u, "(A,999(1X,L1))") "TERMINATED", state%terminated
   end subroutine request_state_write
 
 @ %def request_state_write
 @
 <<Request state: state: TBP>>=
   procedure :: reset => request_state_reset
+<<Request state: sub interfaces>>=
+    module subroutine request_state_reset (state)
+      class(request_state_t), intent(inout) :: state
+    end subroutine request_state_reset
 <<Request state: procedures>>=
-  subroutine request_state_reset (state)
+  module subroutine request_state_reset (state)
     class(request_state_t), intent(inout) :: state
     integer :: rank
     state%n_workers_done = state%n_workers
     call state%request_iterator%init (1, state%n_workers)
     state%handler = MPI_EMPTY_HANDLER
     state%indices = [(rank, rank = 1, state%n_workers)]
     state%terminated = .false.
   end subroutine request_state_reset
 
 @ %def request_state_reset
 @
 <<Request state: state: TBP>>=
   procedure :: is_terminated => request_state_is_terminated
+<<Request state: sub interfaces>>=
+    module function request_state_is_terminated (state) result (flag)
+      class(request_state_t), intent(in) :: state
+      logical :: flag
+    end function request_state_is_terminated
 <<Request state: procedures>>=
-  ! pure function request_state_is_terminated (state) result (flag)
-  function request_state_is_terminated (state) result (flag)
+  ! pure module function request_state_is_terminated (state) result (flag)
+  module function request_state_is_terminated (state) result (flag)
     class(request_state_t), intent(in) :: state
     logical :: flag
     flag = all (state%terminated)
   end function request_state_is_terminated
 
 @ %def request_state_is_terminated
-@
+@ Set rank to be terminated (however, do not communicate it). This is
+an EVIL procedure, as it operates only locally on the master and does
+not communicate its purpose. However, in order to allow termination
+requests from client-side we need to manipulate the specific rank
+states.
 <<Request state: state: TBP>>=
   procedure, private :: set_terminated => request_state_set_terminated
+<<Request state: sub interfaces>>=
+    module subroutine request_state_set_terminated (state, rank)
+      class(request_state_t), intent(inout) :: state
+      integer, intent(in) :: rank
+    end subroutine request_state_set_terminated
 <<Request state: procedures>>=
-  !> Set rank to be terminated (however, do not communicate it).
-  !!
-  !! This is an EVIL procedure, as it operates only locally on the master
-  !! and does not communicate its purpose.
-  !! However, in order to allow termination requests from client-side
-  !! we need to manipulate the specific rank states.
-  subroutine request_state_set_terminated (state, rank)
+  module subroutine request_state_set_terminated (state, rank)
     class(request_state_t), intent(inout) :: state
     integer, intent(in) :: rank
     state%terminated(rank) = .true.
   end subroutine request_state_set_terminated
 
 @ %def request_state_set_terminated
 @
 <<Request state: state: TBP>>=
   procedure :: terminate => request_state_terminate
+<<Request state: sub interfaces>>=
+    module subroutine request_state_terminate (state, rank)
+      class(request_state_t), intent(inout) :: state
+      integer, intent(in) :: rank
+    end subroutine request_state_terminate
 <<Request state: procedures>>=
-  subroutine request_state_terminate (state, rank)
+  module subroutine request_state_terminate (state, rank)
     class(request_state_t), intent(inout) :: state
     integer, intent(in) :: rank
     integer :: error
     call MPI_SEND (MPI_EMPTY_HANDLER, 1, MPI_INTEGER, &
          rank, MPI_TAG_TERMINATE, state%comm, error)
     if (error /= 0) then
-       write (msg_buffer, "(A,1X,I3)") "Request: Error occured during terminate, RANK", rank
+       write (msg_buffer, "(A,1X,I3)") "Request: Error occured " // &
+            "during terminate, RANK", rank
        call msg_bug ()
     end if
     call state%set_terminated (rank)
   end subroutine request_state_terminate
 
 @ %def request_state_terminate
 @
 <<Request state: state: TBP>>=
   procedure :: client_terminate => request_state_client_terminate
+<<Request state: sub interfaces>>=
+    module subroutine request_state_client_terminate (state)
+      class(request_state_t), intent(in) :: state
+    end subroutine request_state_client_terminate
 <<Request state: procedures>>=
-  subroutine request_state_client_terminate (state)
+  module subroutine request_state_client_terminate (state)
     class(request_state_t), intent(in) :: state
     integer :: error
     call MPI_SEND (MPI_EMPTY_HANDLER, 1, MPI_INTEGER, &
          0, MPI_TAG_CLIENT_TERMINATE, state%comm, error)
     if (error /= 0) then
-       write (msg_buffer, "(A,1X,I3)") "Request: Error occured during client-sided terminate"
+       write (msg_buffer, "(A,1X,I3)") "Request: Error occured " // &
+            "during client-sided terminate"
        call msg_bug ()
     end if
   end subroutine request_state_client_terminate
 
 @ %def request_state_client_terminate
-@
+@ Init persistent requests. Must be called before first
+[[receive_request]]. [[Free_request]] must be called after 
+[[is_terminated]] returns [[true]].
 <<Request state: state: TBP>>=
   procedure :: init_request => request_state_init_request
+<<Request state: sub interfaces>>=
+    module subroutine request_state_init_request (state)
+      class(request_state_t), intent(inout) :: state
+    end subroutine request_state_init_request
 <<Request state: procedures>>=
-  !> Init persistent requests.
-  !!
-  !! Must be called before first receive_request.
-  !! Free_request must be called after is_terminated returns true.
-  subroutine request_state_init_request (state)
+  module subroutine request_state_init_request (state)
     class(request_state_t), intent(inout) :: state
     integer :: i, rank, error
     do i = 1, state%n_workers_done
        rank = state%indices(i)
        call MPI_RECV_INIT (state%handler(rank), 1, MPI_INTEGER, &
             rank, MPI_ANY_TAG, state%comm, state%request(rank), error)
        if (error /= 0) then
           write (msg_buffer, "(A,2(A,1X,I0))") "Request: Error occured during receive init, &
                & RANK", rank, "HANDLER", state%handler(rank)
           call msg_message ()
           call MPI_ABORT (state%comm, MPI_STATE_ERR)
        end if
     end do
   end subroutine request_state_init_request
 
 @ %def request_state_init_request
-@
+@ Receive requests from non-terminated workers. Before receiving new
+requests, santize arrays of received ranks from terminated ones.
 <<Request state: state: TBP>>=
   procedure :: receive_request => request_state_receive_request
+<<Request state: sub interfaces>>=
+    module subroutine request_state_receive_request (state)
+      class(request_state_t), intent(inout) :: state
+    end subroutine request_state_receive_request
 <<Request state: procedures>>=
-  !> Receive requests from non-terminated workers.
-  !!
-  !! Before receiving new requests, santize arrays of received ranks from terminated ones.
-  subroutine request_state_receive_request (state)
+  module subroutine request_state_receive_request (state)
     class(request_state_t), intent(inout) :: state
     integer :: i, rank
     integer :: error
     if (state%is_terminated ()) return
     call sanitize_from_terminated_ranks ()
     !! Receive new requests from (still active) workers.
     do i = 1, state%n_workers_done
        rank = state%indices(i)
        call MPI_START (state%request(rank), error)
        if (error /= 0) then
           write (msg_buffer, "(A,2(A,1X,I6))") "Request: Error occured during receive request, &
              & RANK", rank, "HANDLER", state%handler(rank)
           call msg_message ()
           call MPI_ABORT (state%comm, MPI_STATE_ERR)
        end if
     end do
   contains
     subroutine sanitize_from_terminated_ranks ()
       integer :: n_workers_done
       integer, dimension(:), allocatable :: indices
       !! Remove terminated ranks from done workers.
       indices = pack(state%indices(:state%n_workers_done), &
            .not. state%terminated(state%indices(:state%n_workers_done)))
       state%n_workers_done = size (indices)
       state%indices(:state%n_workers_done) = indices
     end subroutine sanitize_from_terminated_ranks
   end subroutine request_state_receive_request
 
 @ %def request_state_receive_request
 @
 <<Request state: state: TBP>>=
   procedure :: await_request => request_state_await_request
+<<Request state: sub interfaces>>=
+    module subroutine request_state_await_request (state)
+      class(request_state_t), intent(inout) :: state
+    end subroutine request_state_await_request
 <<Request state: procedures>>=
-  subroutine request_state_await_request (state)
+  module subroutine request_state_await_request (state)
     class(request_state_t), intent(inout) :: state
     integer :: error
     if (state%is_terminated ()) return
     !! We verify that we have active handles associated with request state.
     call MPI_TESTSOME (state%n_workers, state%request, state%n_workers_done, &
          state%indices, state%status, error)
     if (error /= 0) then
        write (ERROR_UNIT, "(A)") "Error occured during await (testing) request..."
        call state%write (ERROR_UNIT)
        call MPI_ABORT (state%comm, MPI_STATE_ERR)
     else if (state%n_workers_done == MPI_UNDEFINED) then
        write (ERROR_UNIT, "(A)") "TEST_WAITSOME returned with MPI_UNDEFINED."
        call state%write (ERROR_UNIT)
        call MPI_ABORT (state%comm, MPI_STATE_ERR)
     end if
     !! Wait a little bit...
     if (state%n_workers_done == 0) then
        !! Proof: REQUEST(i), i \in {1, N_workers}, i is equivalent to rank.
        !! Proof: INDICES(j), STATUS(j), j \in {1, N_workers_done}
        !! Proof: INDICES(j) -> i, injectiv.
        call MPI_WAITSOME (state%n_workers, state%request, state%n_workers_done, &
             state%indices, state%status, error)
        if (error /= 0) then
           write (ERROR_UNIT, "(A)") "Error occured during await request..."
           call state%write (ERROR_UNIT)
           call MPI_ABORT (state%comm, MPI_STATE_ERR)
        end if
     endif
     call state%request_iterator%init (1, state%n_workers_done)
   end subroutine request_state_await_request
 
 @ %def request_state_await_request
 @
 <<Request state: state: TBP>>=
   procedure :: has_request => request_state_has_request
+<<Request state: sub interfaces>>=
+    pure module function request_state_has_request (state) result (flag)
+      class(request_state_t), intent(in) :: state
+      logical :: flag
+    end function request_state_has_request
 <<Request state: procedures>>=
-  pure function request_state_has_request (state) result (flag)
+  pure module function request_state_has_request (state) result (flag)
     class(request_state_t), intent(in) :: state
     logical :: flag
     flag = state%request_iterator%is_iterable ()
   end function request_state_has_request
 
 @ %def request_state_has_request
 @
 <<Request state: state: TBP>>=
   procedure :: get_request => request_state_get_request
+<<Request state: sub interfaces>>=
+    module subroutine request_state_get_request (state, rank, tag, handler)
+      class(request_state_t), intent(inout) :: state
+      integer, intent(out) :: rank
+      integer, intent(out) :: tag
+      integer, intent(out) :: handler
+    end subroutine request_state_get_request
 <<Request state: procedures>>=
-  subroutine request_state_get_request (state, rank, tag, handler)
+  module subroutine request_state_get_request (state, rank, tag, handler)
     class(request_state_t), intent(inout) :: state
     integer, intent(out) :: rank
     integer, intent(out) :: tag
     integer, intent(out) :: handler
     integer :: ndx
     if (.not. state%has_request ()) then
        call msg_bug ("Request: Cannot access missing request.")
     end if
     ndx = state%request_iterator%next ()
     rank = state%indices(ndx)
     if (rank /= state%status(ndx)%MPI_SOURCE) then
        write (msg_buffer, "(A,2(1X,I3))") &
-          "Request: RANK and SOURCE mismatch", rank, state%status(ndx)%MPI_SOURCE
+          "Request: RANK and SOURCE mismatch", rank, &
+          state%status(ndx)%MPI_SOURCE
        call msg_bug ()
     end if
     tag = state%status(ndx)%MPI_TAG
     handler = state%handler(rank)
   end subroutine request_state_get_request
 
 @ %def request_state_get_request
 @
 <<Request state: state: TBP>>=
   procedure :: update_request => request_state_update_request
+<<Request state: sub interfaces>>=
+    module subroutine request_state_update_request (state, rank, tag, handler)
+      class(request_state_t), intent(inout) :: state
+      integer, intent(in) :: rank
+      integer, intent(in) :: tag
+      integer, intent(in) :: handler
+    end subroutine request_state_update_request
 <<Request state: procedures>>=
-  subroutine request_state_update_request (state, rank, tag, handler)
+  module subroutine request_state_update_request (state, rank, tag, handler)
     class(request_state_t), intent(inout) :: state
     integer, intent(in) :: rank
     integer, intent(in) :: tag
     integer, intent(in) :: handler
     integer :: error
     state%handler(rank) = handler
     call MPI_SEND (handler, 1, MPI_INTEGER, &
          rank, tag, state%comm, error)
     if (error /= 0) then
        write (msg_buffer, "(A,3(A,1X,I3))") "Request: Error occured during update, &
           &RANK", rank, "TAG", tag, "HANDLER", handler
        call msg_bug ()
     end if
   end subroutine request_state_update_request
 
 @ %def request_state_update_request
 @
 <<Request state: state: TBP>>=
   procedure :: free_request => request_state_free_request
+<<Request state: sub interfaces>>=
+    module subroutine request_state_free_request (state)
+      class(request_state_t), intent(inout) :: state
+    end subroutine request_state_free_request
 <<Request state: procedures>>=
-  subroutine request_state_free_request (state)
+  module subroutine request_state_free_request (state)
     class(request_state_t), intent(inout) :: state
     integer :: rank
     do rank = 1, state%n_workers
        if (state%request(rank) == MPI_REQUEST_NULL) cycle
        call MPI_REQUEST_FREE (state%request(rank))
     end do
   end subroutine request_state_free_request
 
 @ %def request_state_free_request
 @
 <<Request state: state: TBP>>=
-  procedure :: provide_request_group => request_state_provide_request_group
+  procedure :: provide_request_group => &
+       request_state_provide_request_group
+<<Request state: sub interfaces>>=
+    module subroutine request_state_provide_request_group &
+         (state, dest_rank, worker)
+      class(request_state_t), intent(in) :: state
+      integer, intent(in) :: dest_rank
+      integer, dimension(:), intent(in) :: worker
+    end subroutine request_state_provide_request_group
 <<Request state: procedures>>=
-  subroutine request_state_provide_request_group (state, dest_rank, worker)
+  module subroutine request_state_provide_request_group &
+       (state, dest_rank, worker)
     class(request_state_t), intent(in) :: state
     integer, intent(in) :: dest_rank
     integer, dimension(:), intent(in) :: worker
     call MPI_SEND (worker, size (worker), MPI_INTEGER, &
          dest_rank, MPI_TAG_COMMUNICATOR_GROUP, state%comm)
   end subroutine request_state_provide_request_group
 
 @ %def request_state_provide_request_group
 @
 <<Request state: state: TBP>>=
-  procedure :: retrieve_request_group => request_state_retrieve_request_group
+  procedure :: retrieve_request_group => &
+       request_state_retrieve_request_group
+<<Request state: sub interfaces>>=
+    module subroutine request_state_retrieve_request_group (state, worker)
+      class(request_state_t), intent(inout) :: state
+      integer, dimension(:), allocatable, intent(out) :: worker
+    end subroutine request_state_retrieve_request_group
 <<Request state: procedures>>=
-  subroutine request_state_retrieve_request_group (state, worker)
+  module subroutine request_state_retrieve_request_group (state, worker)
     class(request_state_t), intent(inout) :: state
     integer, dimension(:), allocatable, intent(out) :: worker
     type(MPI_STATUS) :: status
     integer :: n_workers
     call MPI_PROBE (0, MPI_TAG_COMMUNICATOR_GROUP, state%comm, status)
     call MPI_GET_COUNT(status, MPI_INTEGER, n_workers)
     allocate (worker (n_workers), source = 0)
     call MPI_RECV (worker, n_workers, MPI_INTEGER, &
          0, MPI_TAG_COMMUNICATOR_GROUP, state%comm, status)
   end subroutine request_state_retrieve_request_group
 
 @ %def request_state_retrieve_request_group
-@
+@ Query for a request (send an request tag, then receive a handler).
 <<Request state: state: TBP>>=
   procedure :: client_serve => request_state_client_serve
+<<Request state: sub interfaces>>=
+    module subroutine request_state_client_serve (state, handler_id, status)
+      class(request_state_t), intent(in) :: state
+      integer, intent(out) :: handler_id
+      type(MPI_STATUS), intent(out) :: status
+    end subroutine request_state_client_serve
 <<Request state: procedures>>=
-  !> Query for a request (send an request tag, then receive a handler).
-  subroutine request_state_client_serve (state, handler_id, status)
+  module subroutine request_state_client_serve (state, handler_id, status)
     class(request_state_t), intent(in) :: state
     integer, intent(out) :: handler_id
     type(MPI_STATUS), intent(out) :: status
     call MPI_SEND (MPI_EMPTY_HANDLER, 1, MPI_INTEGER, &
          0, MPI_TAG_REQUEST, state%comm)
     call MPI_RECV (handler_id, 1, MPI_INTEGER, &
          0, MPI_ANY_TAG, state%comm, status)
   end subroutine request_state_client_serve
 
 @ %def request_state_client_rate
-@
+@ Free handler from worker.
 <<Request state: state: TBP>>=
   procedure :: client_free => request_state_client_free
+<<Request state: sub interfaces>>=
+    module subroutine request_state_client_free (state, handler_id, has_callback)
+      class(request_state_t), intent(in) :: state
+      integer, intent(in) :: handler_id
+      logical, intent(in) :: has_callback
+    end subroutine request_state_client_free
 <<Request state: procedures>>=
-  !> Free handler from worker.
-  subroutine request_state_client_free (state, handler_id, has_callback)
+  module subroutine request_state_client_free (state, handler_id, has_callback)
     class(request_state_t), intent(in) :: state
     integer, intent(in) :: handler_id
     logical, intent(in) :: has_callback
     integer :: tag
     tag = merge (MPI_TAG_HANDLER_AND_RELEASE, MPI_TAG_RELEASE, has_callback)
     call MPI_SEND (handler_id, 1, MPI_INTEGER, &
          0, tag, state%comm)
   end subroutine request_state_client_free
 
 @ %def request_state_client_free
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Balancer Base}
 \begin{description}
 \item[Balancer Base] Base type for usage for extensions of [[request_base]].
 \end{description}
 <<[[balancer_base.f90]]>>=
 module balancer_base
-  use io_units
-  use diagnostics
+
   use array_list
 
 <<Standard module head>>
 
 <<Balancer base: public>>
 
 <<Balancer base: parameters>>
 
 <<Balancer base: types>>
 
 <<Balancer base: interfaces>>
 
+  interface
+<<Balancer base: sub interfaces>>
+  end interface
+
+end module balancer_base
+@ %def balancer_base
+@
+<<[[balancer_base_sub.f90]]>>=
+<<File header>>
+
+submodule (balancer_base) balancer_base_s
+
+  use io_units
+  use diagnostics
+
+  implicit none
+
 contains
 
 <<Balancer base: procedures>>
 
-end module balancer_base
-@ %def balancer_base
+end submodule balancer_base_s
+
+@ %def balancer_base_s
 @
 <<Balancer base: parameters>>=
   integer, parameter, public :: STATE_SINGLE = 1, &
        STATE_ALL = 2
 
 @
 <<Balancer base: types>>=
   type :: worker_t
      private
      integer :: resource = 0
      integer :: state = 0
      integer :: n_resources = 0
      logical :: assigned = .false.
    contains
    <<Balancer base: worker: TBP>>
   end type worker_t
 
 @ %def worker_t
 @
 <<Balancer base: types>>=
   type :: resource_t
      private
      integer :: resource_id = 0
      logical :: active = .false.
      integer :: n_assigned_workers = 0
    contains
    <<Balancer base: resource: TBP>>
   end type resource_t
 
 @ %def resource_t
 @
 <<Balancer base: public>>=
   public :: resource_state_t
 <<Balancer base: types>>=
   type :: resource_state_t
      integer :: n_workers = 0
      integer :: mode = 0
      type(array_list_t) :: resource_stack
      type(array_list_t) :: finished_stack
    contains
    <<Balancer base: resource state: TBP>>
   end type resource_state_t
 
 @ %def resource_state_t
 @
 Dynamic load balancer. We organize resources and workers in a
 transparent way using indices. These indices replace pointer magic.
 The balancer aggregates a dynamic state, however, we allow the state
 by the use of a pointer, to access the static fields of the balancer.
 <<Balancer base: public>>=
   public :: balancer_base_t
 <<Balancer base: types>>=
   type, abstract :: balancer_base_t
      integer :: n_workers = 0
      integer :: n_resources = 0
      integer :: n_states = 0
      type(worker_t), dimension(:), allocatable :: worker
      type(resource_t), dimension(:), allocatable :: resource
      type(resource_state_t), dimension(:), allocatable :: state
    contains
    <<Balancer base: base: TBP>>
   end type balancer_base_t
 
 @ %def balancer_base_t
 @
 <<Balancer base: interfaces>>=
   abstract interface
      subroutine balancer_base_deferred_write (balancer, unit)
        import :: balancer_base_t
        class(balancer_base_t), intent(in) :: balancer
        integer, intent(in), optional :: unit
      end subroutine balancer_base_deferred_write
    end interface
 
 @ %def balancer_base_deferred_write
 @
 <<Balancer base: interfaces>>=
      !> Has resource an associated resource group.
      !!
      !! \note .true. only on an active resource, else .false.
   abstract interface
      pure logical function balancer_base_has_resource_group (balancer, resource_id) &
           result (flag)
        import :: balancer_base_t
        class(balancer_base_t), intent(in) :: balancer
        integer, intent(in) :: resource_id
      end function balancer_base_has_resource_group
    end interface
 
 @ %def balancer_base_has_resource_group
 @
 <<Balancer base: interfaces>>=
      !> Get resource group.
      !!
      !! \note Implementation must check against group existence.
      !! \return group (allocated|NOT allocated for (inactive|non-group) resource)
   abstract interface
      pure subroutine balancer_base_get_resource_group (balancer, resource_id, group)
        import :: balancer_base_t
        class(balancer_base_t), intent(in) :: balancer
        integer, intent(in) :: resource_id
        integer, dimension(:), allocatable, intent(out) :: group
      end subroutine balancer_base_get_resource_group
    end interface
 
 @ %def balancer_base_get_resource_group
 @
 <<Balancer base: interfaces>>=
      !> Get resource master (worker).
      !!
      !! Return worker as given, however, if extended type is used in a non-local
      !! or in combination with a commnuicative request type, then check on activation status of associated resource.
      !!
      !! \return worker Valid worker index (\in {1, …, N}) only on active resource*, else -1.
   abstract interface
      pure integer function balancer_base_get_resource_master (balancer, resource_id) &
           result (worker)
        import :: balancer_base_t
        class(balancer_base_t), intent(in) :: balancer
        integer, intent(in) :: resource_id
      end function balancer_base_get_resource_master
   end interface
 
 @ %def balancer_base_get_resource_master
 @
 <<Balancer base: interfaces>>=
      !> Assign resource to a given worker or retrieve current assigned resource.
      !!
      !! If worker has already a resource assigned, return resource.
      !! If worker has not been assigned a resource, retrieve new resource from state.
      !!
      !! \note Each call must check if a worker is assignable, if not, the procedure must return resource_id = -1.
   abstract interface
      subroutine balancer_base_assign_worker (balancer, worker_id, resource_id)
        import :: balancer_base_t
        class(balancer_base_t), intent(inout) :: balancer
        integer, intent(in) :: worker_id
        integer, intent(out) :: resource_id
      end subroutine balancer_base_assign_worker
    end interface
 
 @ %def balancer_base_assign_worker
 @
 Free assignment of worker. If the worker is not assigned, this
 procedure is idempotent. If the worker is assigned, alter state
 correspondingly. In order to correctly free a worker from a resource,
 we have to explicitly keep track of the association status of a worker
 and a resource. This feature is mostly relevant for resources with a
 worker group. The resource may be disassociated from their worker by
 earlier calls or the former worker may be already assigned to a new
 resource. In the latter case, we are not allowed to free them (as the
 new resource is still active). Therefore, each call must check if a
 worker and resource are still associated and the resource is still
 active. Only, in this case, disassociating workers and resource is
 allowed.
 <<Balancer base: interfaces>>=
   abstract interface
      subroutine balancer_base_free_worker (balancer, worker_id, resource_id)
        import :: balancer_base_t
        class(balancer_base_t), intent(inout) :: balancer
        integer, intent(in) :: worker_id
        integer, intent(in) :: resource_id
      end subroutine balancer_base_free_worker
   end interface
 
 @ %def balancer_base_free_worker
 @
 <<Balancer base: public>>=
-  public :: shift_rank_to_worker
-@ 
+  public :: shift_rank_to_worker 
+<<Balancer base: sub interfaces>>=
+    elemental module function shift_rank_to_worker (rank) result (worker)
+      integer, intent(in) :: rank
+      integer :: worker
+    end function shift_rank_to_worker
 <<Balancer base: procedures>>=
   !> Shift rank index to worker index.
   !! Proof: rank \in {0, …, N - 1}, worker \in {1, …, N}
-  elemental integer function shift_rank_to_worker (rank) result (worker)
+  elemental module function shift_rank_to_worker (rank) result (worker)
     integer, intent(in) :: rank
+    integer :: worker
     worker = rank + 1
   end function shift_rank_to_worker
 
 @ %def shift_rank_to_worker
 @
 <<Balancer base: public>>=
   public :: shift_worker_to_rank
+<<Balancer base: sub interfaces>>=
+    elemental module function shift_worker_to_rank (worker) result (rank)
+      integer, intent(in) :: worker
+      integer :: rank
+    end function shift_worker_to_rank
 <<Balancer base: procedures>>=
   !> Shift worker index to rank index.
   !! Proof: rank \in {0, …, N - 1}, worker \in {1, …, N}
-  elemental integer function shift_worker_to_rank (worker) result (rank)
+  elemental module function shift_worker_to_rank (worker) result (rank)
     integer, intent(in) :: worker
+    integer :: rank
     rank = worker - 1
   end function shift_worker_to_rank
 
 @ %def shift_worker_to_rank
 @
 <<Balancer base: worker: TBP>>=
   procedure :: write => worker_write
+<<Balancer base: sub interfaces>>=
+    module subroutine worker_write (worker, unit)
+      class(worker_t), intent(in) :: worker
+      integer, intent(in), optional :: unit
+    end subroutine worker_write
 <<Balancer base: procedures>>=
-  subroutine worker_write (worker, unit)
+  module subroutine worker_write (worker, unit)
     class(worker_t), intent(in) :: worker
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     write (u, "(3(A,1X,I3,1X),A,1X,L1)") "RESOURCE", worker%resource, &
          "STATE", worker%state, &
          "N_RESOURCES", worker%n_resources, &
          "ASSIGNED", worker%assigned
   end subroutine worker_write
 
 @ %def worker_write
 @
 <<Balancer base: worker: TBP>>=
   procedure :: is_assigned => worker_is_assigned
+<<Balancer base: sub interfaces>>=
+    elemental module function worker_is_assigned (worker) result (flag)
+      class(worker_t), intent(in) :: worker
+      logical :: flag
+    end function worker_is_assigned
 <<Balancer base: procedures>>=
-  elemental logical function worker_is_assigned (worker) result (flag)
+  elemental module function worker_is_assigned (worker) result (flag)
     class(worker_t), intent(in) :: worker
+    logical :: flag
     flag = worker%assigned
   end function worker_is_assigned
 
 @ %def worker_is_assigned
 @
 <<Balancer base: worker: TBP>>=
   procedure :: get_resource => worker_get_resource
+<<Balancer base: sub interfaces>>=
+    elemental module function worker_get_resource (worker) result (resource_id)
+      class(worker_t), intent(in) :: worker
+      integer :: resource_id
+    end function worker_get_resource
 <<Balancer base: procedures>>=
-  elemental integer function worker_get_resource (worker) result (resource_id)
+  elemental module function worker_get_resource (worker) result (resource_id)
     class(worker_t), intent(in) :: worker
+    integer :: resource_id
     resource_id = worker%resource
   end function worker_get_resource
 
 @ %def worker_get_resource
 @
 <<Balancer base: worker: TBP>>=
   procedure :: get_state => worker_get_state
+<<Balancer base: sub interfaces>>=
+    elemental module function worker_get_state (worker) result (i_state)
+      class(worker_t), intent(in) :: worker
+      integer :: i_state
+    end function worker_get_state
 <<Balancer base: procedures>>=
-  elemental integer function worker_get_state (worker) result (i_state)
+  elemental module function worker_get_state (worker) result (i_state)
     class(worker_t), intent(in) :: worker
+    integer :: i_state
     i_state = worker%state
   end function worker_get_state
 
 @ %def worker_get_state
 @
 <<Balancer base: worker: TBP>>=
   procedure :: add_resource => worker_add_resource
+<<Balancer base: sub interfaces>>=
+    elemental module subroutine worker_add_resource (worker, resource_id)
+      class(worker_t), intent(inout) :: worker
+      integer, intent(in) :: resource_id
+    end subroutine worker_add_resource
 <<Balancer base: procedures>>=
-  elemental subroutine worker_add_resource (worker, resource_id)
+  elemental module subroutine worker_add_resource (worker, resource_id)
     class(worker_t), intent(inout) :: worker
     integer, intent(in) :: resource_id
     worker%n_resources = worker%n_resources + 1
     worker%assigned = .true.
     worker%resource = resource_id
   end subroutine worker_add_resource
 
 @ %def worker_add_resource
 @
 <<Balancer base: worker: TBP>>=
   procedure :: free => worker_free
+<<Balancer base: sub interfaces>>=
+    elemental module subroutine worker_free (worker)
+      class(worker_t), intent(inout) :: worker
+    end subroutine worker_free
 <<Balancer base: procedures>>=
-  elemental subroutine worker_free (worker)
+  elemental module subroutine worker_free (worker)
     class(worker_t), intent(inout) :: worker
     worker%assigned = .false.
     worker%resource = 0
   end subroutine worker_free
 
 @ %def worker_free
 @
 <<Balancer base: resource: TBP>>=
   procedure :: write => resource_write
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_write (resource, unit)
+      class(resource_t), intent(in) :: resource
+      integer, intent(in), optional :: unit
+    end subroutine resource_write
 <<Balancer base: procedures>>=
-  subroutine resource_write (resource, unit)
+  module subroutine resource_write (resource, unit)
     class(resource_t), intent(in) :: resource
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     write (u, "(A,1X,I3,1X,A,1X,L1,1X,A,1X,I3)") &
          "RESOURCE_ID", resource%resource_id, &
          "ACTIVE", resource%active, &
          "N_ASSIGNED_WORKERS", resource%n_assigned_workers
   end subroutine resource_write
 
 @ %def resource_write
 @
 <<Balancer base: resource: TBP>>=
   procedure :: is_active => resource_is_active
+<<Balancer base: sub interfaces>>=
+    elemental module function resource_is_active (resource) result (flag)
+     class(resource_t), intent(in) :: resource
+      logical :: flag
+    end function resource_is_active
 <<Balancer base: procedures>>=
-  elemental logical function resource_is_active (resource) result (flag)
+  elemental module function resource_is_active (resource) result (flag)
     class(resource_t), intent(in) :: resource
+    logical :: flag
     flag = resource%active
   end function resource_is_active
 
 @ %def resource_is_active
 @
 <<Balancer base: resource: TBP>>=
   procedure :: set_active => resource_set_active
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_set_active (resource, n_workers)
+      class(resource_t), intent(inout) :: resource
+      integer, intent(in) :: n_workers
+    end subroutine resource_set_active
 <<Balancer base: procedures>>=
-  subroutine resource_set_active (resource, n_workers)
+  module subroutine resource_set_active (resource, n_workers)
     class(resource_t), intent(inout) :: resource
     integer, intent(in) :: n_workers
     resource%active = .true.
     resource%n_assigned_workers = n_workers
   end subroutine resource_set_active
 
 @ %def resource_set_active
 @
 <<Balancer base: resource: TBP>>=
   procedure :: set_inactive => resource_set_inactive
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_set_inactive (resource)
+      class(resource_t), intent(inout) :: resource
+    end subroutine resource_set_inactive
 <<Balancer base: procedures>>=
-  subroutine resource_set_inactive (resource)
+  module subroutine resource_set_inactive (resource)
     class(resource_t), intent(inout) :: resource
     resource%active = .false.
   end subroutine resource_set_inactive
 
 @ %def resource_set_inactive
 @
 <<Balancer base: resource state: TBP>>=
   procedure :: write => resource_state_write
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_state_write (state, unit)
+      class(resource_state_t), intent(in) :: state
+      integer, intent(in), optional :: unit
+    end subroutine resource_state_write
 <<Balancer base: procedures>>=
-  subroutine resource_state_write (state, unit)
+  module subroutine resource_state_write (state, unit)
     class(resource_state_t), intent(in) :: state
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     write (u, "(A,1X,I0)") "N_STATE_WORKERS", state%n_workers
     select case (state%mode)
     case (STATE_SINGLE)
        write (u, "(A)") "MODE ONE-TO-ONE"
     case (STATE_ALL)
        write (u, "(A)") "MODE ALL-TO-ONE"
     case default
        write (u, "(A)") "UNSUPPORTED MODE"
     end select
     write (u, "(A)") "RESOURCE"
     call state%resource_stack%write (u)
     write (u, "(A)") "FINISHED"
     call state%finished_stack%write (u)
   end subroutine resource_state_write
 
 @ %def resource_state_write
 @
 <<Balancer base: resource state: TBP>>=
   procedure :: init => resource_state_init
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_state_init (state, mode, n_workers)
+      class(resource_state_t), intent(out) :: state
+      integer, intent(in) :: mode
+      integer, intent(in) :: n_workers
+    end subroutine resource_state_init
 <<Balancer base: procedures>>=
-  subroutine resource_state_init (state, mode, n_workers)
+  module subroutine resource_state_init (state, mode, n_workers)
     class(resource_state_t), intent(out) :: state
     integer, intent(in) :: mode
     integer, intent(in) :: n_workers
     state%mode = mode
     state%n_workers = n_workers
     call state%resource_stack%init ()
     call state%finished_stack%init ()
   end subroutine resource_state_init
 
 @ %def resource_state_init
 @
 <<Balancer base: resource state: TBP>>=
   procedure :: add_resource => resource_state_add_resource
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_state_add_resource (state, i_resource)
+      class(resource_state_t), intent(inout) :: state
+      integer, intent(in) :: i_resource
+    end subroutine resource_state_add_resource
 <<Balancer base: procedures>>=
-  subroutine resource_state_add_resource (state, i_resource)
+  module subroutine resource_state_add_resource (state, i_resource)
     class(resource_state_t), intent(inout) :: state
     integer, intent(in) :: i_resource
     call state%resource_stack%add (i_resource)
   end subroutine resource_state_add_resource
 
 @ %def resource_state_add_resource
 @
 <<Balancer base: resource state: TBP>>=
   procedure :: freeze => resource_state_freeze
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_state_freeze (state)
+      class(resource_state_t), intent(inout) :: state
+    end subroutine resource_state_freeze  
 <<Balancer base: procedures>>=
-  subroutine resource_state_freeze (state)
+  module subroutine resource_state_freeze (state)
     class(resource_state_t), intent(inout) :: state
     call state%resource_stack%sort ()
     call state%resource_stack %reverse_order ()
   end subroutine resource_state_freeze
 
 @ %def resource_state_freeze
 @
 <<Balancer base: resource state: TBP>>=
   procedure :: clear => resource_state_clear
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_state_clear (state)
+      class(resource_state_t), intent(inout) :: state
+    end subroutine resource_state_clear
 <<Balancer base: procedures>>=
-  subroutine resource_state_clear (state)
+  module subroutine resource_state_clear (state)
     class(resource_state_t), intent(inout) :: state
     call state%resource_stack%clear ()
     call state%finished_stack%clear ()
   end subroutine resource_state_clear
 
 @ %def resource_state_clear
 @
 <<Balancer base: resource state: TBP>>=
   procedure :: has_resource => resource_state_has_resource
+<<Balancer base: sub interfaces>>=
+    elemental module function resource_state_has_resource (state) result (flag)
+      class(resource_state_t), intent(in) :: state
+      logical :: flag
+    end function resource_state_has_resource
 <<Balancer base: procedures>>=
-  elemental function resource_state_has_resource (state) result (flag)
+  elemental module function resource_state_has_resource (state) result (flag)
     class(resource_state_t), intent(in) :: state
     logical :: flag
     flag = .not. state%resource_stack%is_empty ()
   end function resource_state_has_resource
 
 @ %def resource_state_has_resoruce
 @
 <<Balancer base: resource state: TBP>>=
   procedure :: assign_resource => resource_state_assign_resource
+<<Balancer base: sub interfaces>>=
+    module function resource_state_assign_resource (state) result (i_resource)
+      class(resource_state_t), intent(inout) :: state
+      integer :: i_resource
+    end function resource_state_assign_resource
 <<Balancer base: procedures>>=
-  function resource_state_assign_resource (state) result (i_resource)
+  module function resource_state_assign_resource (state) result (i_resource)
     class(resource_state_t), intent(inout) :: state
     integer :: i_resource
     if (state%resource_stack%is_empty ()) then
        i_resource = 0
        call msg_bug ("Error: No leftover resource on stack.")
        return
     end if
     i_resource = state%resource_stack%remove () !! Pop last element from stack.
   end function resource_state_assign_resource
 
 @ %def resource_state_assign_ressource
 @
 <<Balancer base: resource state: TBP>>=
   procedure :: free_resource => resource_state_free_resource
+<<Balancer base: sub interfaces>>=
+    module subroutine resource_state_free_resource (state, i_resource)
+      class(resource_state_t), intent(inout) :: state
+      integer, intent(in) :: i_resource
+    end subroutine resource_state_free_resource
 <<Balancer base: procedures>>=
-  subroutine resource_state_free_resource (state, i_resource)
+  module subroutine resource_state_free_resource (state, i_resource)
     class(resource_state_t), intent(inout) :: state
     integer, intent(in) :: i_resource
     if (state%resource_stack%is_element (i_resource)) then
-       call msg_bug ("Error: Cannot free resource, still on resource stack.")
+       call msg_bug &
+            ("Error: Cannot free resource, still on resource stack.")
     end if
     call state%finished_stack%add (i_resource)
   end subroutine resource_state_free_resource
 
 @ %def resource_state_free_resource
 @
 <<Balancer base: base: TBP>>=
   procedure :: base_write => balancer_base_write
   procedure(balancer_base_deferred_write), deferred :: write
+<<Balancer base: sub interfaces>>=
+    module subroutine balancer_base_write (balancer, unit)
+      class(balancer_base_t), intent(in) :: balancer
+      integer, intent(in), optional :: unit
+    end subroutine balancer_base_write
 <<Balancer base: procedures>>=
-  subroutine balancer_base_write (balancer, unit)
+  module subroutine balancer_base_write (balancer, unit)
     class(balancer_base_t), intent(in) :: balancer
     integer, intent(in), optional :: unit
     integer :: u, i
     u = given_output_unit (unit)
     write (u, "(A)") "[REQUEST BALANCER]"
     write (u, "(3(A,1X,I3,1X))") "N_WORKERS", balancer%n_workers, &
          "N_RESOURCES", balancer%n_resources, &
          "N_STATES", balancer%n_states
     write (u, "(A)") "[WORKER]"
     do i = 1, balancer%n_workers
        call balancer%worker(i)%write (u)
     end do
     write (u, "(A)") "[RESOURCE]"
     do i = 1, balancer%n_resources
        call balancer%resource(i)%write (u)
     end do
     write (u, "(A)") "[STATES]"
     do i = 1, balancer%n_states
        call balancer%state(i)%write (u)
     end do
   end subroutine balancer_base_write
 
 @ %def balancer_base_write
 @
 <<Balancer base: base: TBP>>=
   procedure :: base_init => balancer_base_base_init
+<<Balancer base: sub interfaces>>=
+    module subroutine balancer_base_base_init (balancer, n_workers, n_resources)
+      class(balancer_base_t), intent(out) :: balancer
+      integer, intent(in) :: n_workers
+      integer, intent(in) :: n_resources
+    end subroutine balancer_base_base_init
 <<Balancer base: procedures>>=
-  subroutine balancer_base_base_init (balancer, n_workers, n_resources)
+  module subroutine balancer_base_base_init (balancer, n_workers, n_resources)
     class(balancer_base_t), intent(out) :: balancer
     integer, intent(in) :: n_workers
     integer, intent(in) :: n_resources
     balancer%n_workers = n_workers
     balancer%n_resources = n_resources
     allocate (balancer%worker (n_workers))
     allocate (balancer%resource (n_resources))
     call init_resource ()
   contains
     subroutine init_resource ()
       integer :: i
       do i = 1, balancer%n_resources
          balancer%resource(i)%resource_id = i
       end do
     end subroutine init_resource
   end subroutine balancer_base_base_init
 
 @ %def balancer_base_base_init
-@
+@ Add partition of workers and link with workers.We move the allocated
+partition object into the balancer. We then assign each partition its
+respective number of workers in a incrementing linear
+fashion. However, we postpone the linking of the resources to the 
+partition, which can be either done dynamically with the balancer
+state or directly with the appropriate type-bound procedure. 
 <<Balancer base: base: TBP>>=
   procedure :: add_state => balancer_base_add_state
+<<Balancer base: sub interfaces>>=
+    module subroutine balancer_base_add_state (balancer, state)
+      class(balancer_base_t), intent(inout) :: balancer
+      type(resource_state_t), dimension(:), allocatable, intent(inout) :: state
+    end subroutine balancer_base_add_state
 <<Balancer base: procedures>>=
-  !> Add partition of workers and link with workers.
-  !! We move the allocated partition object into the balancer.
-  !! We then assign each partition its respective number of workers in a incrementing linear fashion.
-  !! However, we postpone the linking of the resources to the partition, which can be either done dynamically with the balancer state or directly with the appropriate type-bound procedure.
-  subroutine balancer_base_add_state (balancer, state)
+  module subroutine balancer_base_add_state (balancer, state)
     class(balancer_base_t), intent(inout) :: balancer
     type(resource_state_t), dimension(:), allocatable, intent(inout) :: state
     balancer%n_states = size (state)
     call move_alloc (state, balancer%state)
     call balancer%link_worker_and_state ()
   end subroutine balancer_base_add_state
 
 @ %def balancer_base_add_state
 @
 <<Balancer base: base: TBP>>=
-  procedure, private :: link_worker_and_state => balancer_base_link_worker_and_state
+  procedure, private :: link_worker_and_state => &
+       balancer_base_link_worker_and_state
+<<Balancer base: sub interfaces>>=
+    module subroutine balancer_base_link_worker_and_state (balancer)
+      class(balancer_base_t), intent(inout) :: balancer
+    end subroutine balancer_base_link_worker_and_state
 <<Balancer base: procedures>>=
-  subroutine balancer_base_link_worker_and_state (balancer)
+  module subroutine balancer_base_link_worker_and_state (balancer)
     class(balancer_base_t), intent(inout) :: balancer
     integer :: i, j, i_worker
     if (.not. allocated (balancer%state)) &
          call msg_bug ("Error: resource state not allocated.")
     !! Link worker to a state.
     i_worker = 1
     do i = 1, balancer%n_states
        do j = 1, balancer%state(i)%n_workers
           if (i_worker > balancer%n_workers) then
              call msg_bug ("Balancer: Number of state workers&
                   & exceeding global number of workers")
           end if
           associate (worker => balancer%worker(i_worker))
             worker%state = i
             !! Reset worker attributes.
             worker%resource = 0
             worker%n_resources = 0
             worker%assigned = .false.
           end associate
           i_worker = i_worker + 1
        end do
     end do
   end subroutine balancer_base_link_worker_and_state
 
 @ %def balancer_base_link_worker_and_state
-@
+@ Is a worker unassigned, or is a worker assigned, but already
+assigned to an active resource? This is a fence for the
+[[assign_worker]] TBP. The [[assign_worker]] TBP must call
+[[is_assignable]] in order to retrieve the worker status. The input is
+the worker ID, the return flag tells: if the worker is NOT assigned,
+return [[.true.]] if state has resources. If worker is assigned,
+return [[.true.]] if associated resource is active.
 <<Balancer base: base: TBP>>=
   procedure :: is_assignable => balancer_base_is_assignable
+<<Balancer base: sub interfaces>>=
+    pure module function balancer_base_is_assignable &
+         (balancer, worker_id) result (flag)
+      class(balancer_base_t), intent(in) :: balancer
+      integer, intent(in) :: worker_id
+      integer :: i_state, resource_id
+      logical :: flag
+    end function balancer_base_is_assignable
 <<Balancer base: procedures>>=
-  !> Is a worker unassigned, or is a worker assigned, but already assigned to an active resource?
-  !!
-  !! \note Fence for assign_worker TBP.
-  !! The assign_worker TPB must call is_assignable in order to retrieve the worker status.
-  !! \param[in] worker_id
-  !! \return flag If worker is NOT assigned, return .true. if state has resources.
-  !!              If worker is assigned, return .true. if associated resource is active.
-  pure logical function balancer_base_is_assignable (balancer, worker_id) result (flag)
+  pure module function balancer_base_is_assignable &
+       (balancer, worker_id) result (flag)
     class(balancer_base_t), intent(in) :: balancer
     integer, intent(in) :: worker_id
     integer :: i_state, resource_id
+    logical :: flag
     flag = .false.
     if (balancer%worker(worker_id)%assigned) then
        resource_id = balancer%worker(worker_id)%resource
        flag = balancer%resource(resource_id)%is_active ()
     else
        i_state = balancer%worker(worker_id)%get_state ()
        flag = balancer%state(i_state)%has_resource ()
     end if
   end function balancer_base_is_assignable
 
 @ %def balancer_base_is_assignable
-@
+@ Is a worker still pending. Test worker assignment, and if there is a
+(valid) resource and if it is still active.
 <<Balancer base: base: TBP>>=
   procedure :: is_worker_pending => balancer_base_is_worker_pending
+<<Balancer base: sub interfaces>>=
+    pure module function balancer_base_is_worker_pending &
+         (balancer, worker_id) result (flag)
+      class(balancer_base_t), intent(in) :: balancer
+      integer, intent(in) :: worker_id
+      integer :: resource_id
+      logical :: flag
+    end function balancer_base_is_worker_pending
 <<Balancer base: procedures>>=
-  !> Is a worker still pending.
-  !!
-  !! Test worker assignment, and if there is a (valid) resource and if it is still active.
-  pure logical function balancer_base_is_worker_pending (balancer, worker_id) result (flag)
+  pure module function balancer_base_is_worker_pending &
+       (balancer, worker_id) result (flag)
     class(balancer_base_t), intent(in) :: balancer
     integer, intent(in) :: worker_id
     integer :: resource_id
+    logical :: flag
     flag = balancer%worker(worker_id)%assigned
     if (flag) then
        resource_id = balancer%worker(worker_id)%get_resource ()
        flag = balancer%resource(resource_id)%is_active ()
     end if
   end function balancer_base_is_worker_pending
 
 @ %def balancer_base_is_worker_pending
 @
 <<Balancer base: base: TBP>>=
   procedure :: is_pending => balancer_base_is_pending
   procedure(balancer_base_has_resource_group), deferred :: has_resource_group
   procedure(balancer_base_get_resource_group), deferred :: get_resource_group
   procedure(balancer_base_get_resource_master), deferred :: get_resource_master
   procedure(balancer_base_assign_worker), deferred :: assign_worker
   procedure(balancer_base_free_worker), deferred :: free_worker
+<<Balancer base: sub interfaces>>=
+    module function balancer_base_is_pending (balancer) result (flag)
+      class(balancer_base_t), intent(in) :: balancer
+      logical :: flag
+    end function balancer_base_is_pending
 <<Balancer base: procedures>>=
-  logical function balancer_base_is_pending (balancer) result (flag)
+  module function balancer_base_is_pending (balancer) result (flag)
     class(balancer_base_t), intent(in) :: balancer
+    logical :: flag
     flag = all (balancer%state%has_resource ())
   end function balancer_base_is_pending
 
 @ %def balancer_base_is_pending
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Balancer Simple}
+
+This module is the simple balancer. The simple balancer distribute the
+channel among the N workers using a modulo prescription. However, it
+does assign all workers to a channel capable of grid with
+parallelizable structure. The balancer use local, non-communicative
+approach; each worker allocates an own instance of the balancer and
+fills it with the respecting resources. We defer possible checks (or
+sentinels) to the request module, e.g. such as checking whether all
+channels are computed globally.
 <<[[balancer_simple.f90]]>>=
 <<File header>>
 module balancer_simple
-  use io_units
-  use diagnostics
 
   use balancer_base
 
 <<Standard module head>>
 
 <<Balancer simple: parameters>>
 
-  !> Simple balancer.
-  !!
-  !! The simple balancer distribute the channel among the N workers using a modulo prescription.
-  !! However, it does assign all workers to a channel capable of grid with parallelizable structure.
-  !!
-  !! The balancer use local, non-communicative approach; each worker allocates an own instance of the balancer
-  !! and fills it with the respecting resources.
-  !!
-  !! We defer possible checks (or sentinels) to the request module, e.g. such as checking whether all channels are computed globally.
-
 <<Balancer simple: public>>
 
 <<Balancer simple: types>>
 
+  interface
+<<Balancer simple: sub interfaces>>
+  end interface
+
+end module balancer_simple
+@ %def balancer_simple
+@
+<<[[balancer_simple_sub.f90]]>>=
+<<File header>>
+
+submodule (balancer_simple) balancer_simple_s
+
+  use io_units
+  use diagnostics
+
+  implicit none
+
 contains
 
 <<Balancer simple: procedures>>
 
-end module balancer_simple
-@ %def balancer_simple
+end submodule balancer_simple_s
+
+@ %def balancer_simple_s
 @
 <<Balancer simple: parameters>>=
   integer, parameter :: N_BALANCER_SIMPLE_STATES = 1, &
        BALANCER_SIMPLE_CHANNEL = 1
 @ 
 <<Balancer simple: public>>=
   public :: balancer_simple_t
 <<Balancer simple: types>>=
   type, extends (balancer_base_t) :: balancer_simple_t
      logical, dimension(:), allocatable :: parallel_grid
    contains
    <<Balancer simple: simple: TBP>>
   end type balancer_simple_t
 
 @ %def balancer_simple_t
 @
 <<Balancer simple: simple: TBP>>=
   procedure :: init => balancer_simple_init
+<<Balancer simple: sub interfaces>>=
+    module subroutine balancer_simple_init (balancer, n_workers, n_resources)
+      class(balancer_simple_t), intent(out) :: balancer
+      integer, intent(in) :: n_workers
+      integer, intent(in) :: n_resources
+    end subroutine balancer_simple_init
 <<Balancer simple: procedures>>=
-  subroutine balancer_simple_init (balancer, n_workers, n_resources)
+  module subroutine balancer_simple_init (balancer, n_workers, n_resources)
     class(balancer_simple_t), intent(out) :: balancer
     integer, intent(in) :: n_workers
     integer, intent(in) :: n_resources
     type(resource_state_t), dimension(:), allocatable :: state
     call balancer%base_init (n_workers, n_resources)
     allocate (balancer%parallel_grid(n_resources), source = .false.)
     allocate (state (N_BALANCER_SIMPLE_STATES))
     call state(BALANCER_SIMPLE_CHANNEL)%init ( &
          mode = STATE_SINGLE, &
          n_workers = balancer%n_workers)
     call balancer%add_state (state)
   end subroutine balancer_simple_init
 
 @ %def balancer_simple_init
 @
 <<Balancer simple: simple: TBP>>=
   procedure :: write => balancer_simple_write
+<<Balancer simple: sub interfaces>>=
+    module subroutine balancer_simple_write (balancer, unit)
+      class(balancer_simple_t), intent(in) :: balancer
+      integer, intent(in), optional :: unit
+    end subroutine balancer_simple_write
 <<Balancer simple: procedures>>=
-  subroutine balancer_simple_write (balancer, unit)
+  module subroutine balancer_simple_write (balancer, unit)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in), optional :: unit
     integer :: u, n_size
     u = given_output_unit (unit)
     call balancer%base_write (u)
     n_size = min (25, size (balancer%parallel_grid))
     write (u, "(A,25(1X,L1))") "Parallel Grids:", balancer%parallel_grid(:n_size)
   end subroutine balancer_simple_write
 
 @ %def balancer_simple_write
-@
+@ Update balancer state.  Each worker update its own balancer state
+requiring information about the [[worker_id]].
 <<Balancer simple: simple: TBP>>=
   procedure :: update_state => balancer_simple_update_state
+<<Balancer simple: sub interfaces>>=
+    module subroutine balancer_simple_update_state &
+         (balancer, worker_id, parallel_grid)
+      class(balancer_simple_t), intent(inout) :: balancer
+      integer, intent(in) :: worker_id
+      logical, dimension(:), intent(in) :: parallel_grid
+    end subroutine balancer_simple_update_state
 <<Balancer simple: procedures>>=
-  !> Update balancer state.
-  !!
-  !! Each worker update its own balancer state requiring information about the worker_id.
-  subroutine balancer_simple_update_state (balancer, worker_id, parallel_grid)
+  module subroutine balancer_simple_update_state &
+       (balancer, worker_id, parallel_grid)
     class(balancer_simple_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     logical, dimension(:), intent(in) :: parallel_grid
     integer :: ch, worker
     balancer%parallel_grid = parallel_grid
     if (.not. allocated (balancer%state)) then
        call msg_bug ("Error: balancer state not allocated.")
     end if
     associate (state => balancer%state(BALANCER_SIMPLE_CHANNEL))
       call state%clear ()
       do ch = 1, balancer%n_resources
          if (parallel_grid(ch)) then
             call state%add_resource (ch)
          else
             worker = balancer%map_channel_to_worker (ch)
             if (worker == worker_id) then
                call state%add_resource (ch)
             end if
          end if
       end do
       call state%freeze ()
     end associate
   end subroutine balancer_simple_update_state
 
 @ %def balancer_simple_update_state
 @
 <<Balancer simple: simple: TBP>>=
   procedure :: has_resource_group => balancer_simple_has_resource_group
+<<Balancer simple: sub interfaces>>=
+    pure module function balancer_simple_has_resource_group &
+         (balancer, resource_id) result (flag)
+      class(balancer_simple_t), intent(in) :: balancer
+      integer, intent(in) :: resource_id
+      logical :: flag
+    end function balancer_simple_has_resource_group
 <<Balancer simple: procedures>>=
-  pure logical function balancer_simple_has_resource_group (balancer, resource_id) &
-       result (flag)
+  pure module function balancer_simple_has_resource_group &
+       (balancer, resource_id) result (flag)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
+    logical :: flag
     if (.not. balancer%resource(resource_id)%is_active ()) then
        flag = .false.
        return
     end if
     flag = balancer%parallel_grid (resource_id)
   end function balancer_simple_has_resource_group
 
 @ %def balancer_simple_has_resource_group
 @
 <<Balancer simple: simple: TBP>>=
   procedure :: get_resource_group => balancer_simple_get_resource_group
+<<Balancer simple: sub interfaces>>=
+    pure module subroutine balancer_simple_get_resource_group &
+         (balancer, resource_id, group)
+      class(balancer_simple_t), intent(in) :: balancer
+      integer, intent(in) :: resource_id
+      integer, dimension(:), allocatable, intent(out) :: group
+    end subroutine balancer_simple_get_resource_group
 <<Balancer simple: procedures>>=
-  pure subroutine balancer_simple_get_resource_group (balancer, resource_id, group)
+  pure module subroutine balancer_simple_get_resource_group &
+       (balancer, resource_id, group)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     integer, dimension(:), allocatable, intent(out) :: group
     integer :: i
     if (.not. balancer%has_resource_group (resource_id)) return
     group = pack ([(i, i=1,balancer%n_workers)], &
          mask = balancer%worker%get_resource () == resource_id)
   end subroutine balancer_simple_get_resource_group
 
 @ %def balancer_simple_get_resource_group
-@
+@ Retrieve resource master holding the results to be communicated. As
+the simple balancer operates locally on each worker, we do not need to
+check whether a resource is currently active. All the resources (and
+their respective order) is fixed at each update of the balancer.
 <<Balancer simple: simple: TBP>>=
   procedure :: get_resource_master => balancer_simple_get_resource_master
+<<Balancer simple: sub interfaces>>=
+    pure module function balancer_simple_get_resource_master &
+         (balancer, resource_id) result (worker_id)
+      class(balancer_simple_t), intent(in) :: balancer
+      integer, intent(in) :: resource_id
+      integer :: worker_id
+    end function balancer_simple_get_resource_master
 <<Balancer simple: procedures>>=
-  !> Retrieve resource master holding the results to be communicated.
-  !!
-  !! As the simple balancer operates locally on each worker, we do not need to check whether a resource is currently active.
-  !! All the resources (and their respective order) is fixed at each update of the balancer.
-  pure integer function balancer_simple_get_resource_master (balancer, resource_id) &
-       result (worker_id)
+  pure module function balancer_simple_get_resource_master &
+       (balancer, resource_id) result (worker_id)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
+    integer :: worker_id
     !! \note Do NOT check on resource activation (see interface prescription).
     if (balancer%parallel_grid(resource_id)) then
        worker_id = 1
     else
        worker_id = balancer%map_channel_to_worker (resource_id)
     end if
   end function balancer_simple_get_resource_master
 
 @ %def balancer_simple_get_resource_master
 @
 <<Balancer simple: simple: TBP>>=
-  procedure, private :: map_channel_to_worker => balancer_simple_map_channel_to_worker
+  procedure, private :: map_channel_to_worker => &
+       balancer_simple_map_channel_to_worker
+<<Balancer simple: sub interfaces>>=
+    pure module function balancer_simple_map_channel_to_worker &
+         (balancer, channel) result (worker)
+      class(balancer_simple_t), intent(in) :: balancer
+      integer, intent(in) :: channel
+      integer :: worker
+    end function balancer_simple_map_channel_to_worker
 <<Balancer simple: procedures>>=
-  pure integer function balancer_simple_map_channel_to_worker (balancer, channel) &
-       result (worker)
+  pure module function balancer_simple_map_channel_to_worker &
+       (balancer, channel) result (worker)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in) :: channel
+    integer :: worker
     !! Proof: channel \in {1, N_c}, number of workers N, rank \in {0, …, N - 1}
     !! Proof: worker \in {1, …, N}
     !! a = b mod c, then 0 <= a < c
     worker = mod (channel - 1, balancer%n_workers) + 1
   end function balancer_simple_map_channel_to_worker
 
 @ %def balancer_simple_map_channel_to_worker
 @
 <<Balancer simple: simple: TBP>>=
   procedure :: assign_worker => balancer_simple_assign_worker
+<<Balancer simple: sub interfaces>>=
+    module subroutine balancer_simple_assign_worker (balancer, worker_id, resource_id)
+      class(balancer_simple_t), intent(inout) :: balancer
+      integer, intent(in) :: worker_id
+      integer, intent(out) :: resource_id
+    end subroutine balancer_simple_assign_worker
 <<Balancer simple: procedures>>=
-  subroutine balancer_simple_assign_worker (balancer, worker_id, resource_id)
+  module subroutine balancer_simple_assign_worker (balancer, worker_id, resource_id)
     class(balancer_simple_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     integer, intent(out) :: resource_id
     integer :: i
     if (.not. balancer%is_assignable (worker_id)) then
        resource_id = -1
        RETURN
     end if
     if (balancer%worker(worker_id)%is_assigned ()) then
        resource_id = balancer%worker(worker_id)%get_resource ()
        RETURN
     end if
     associate (state => balancer%state(BALANCER_SIMPLE_CHANNEL))
       if (.not. state%has_resource ()) then
          resource_id = 0
          return
       end if
       resource_id = state%assign_resource ()
       if (balancer%parallel_grid(resource_id)) then
          do i = 1, balancer%n_workers
             if (balancer%is_worker_pending (i)) then
-               write (msg_buffer, "(A,1X,I0,1X,A,1X,I0,1X,A)") "WORKER", i, "ASSIGNED"
+               write (msg_buffer, "(A,1X,I0,1X,A,1X,I0,1X,A)") &
+                    "WORKER", i, "ASSIGNED"
                call msg_bug ()
             end if
             call balancer%worker(i)%add_resource (resource_id)
          end do
-         call balancer%resource(resource_id)%set_active (n_workers = balancer%n_workers)
+         call balancer%resource(resource_id)%set_active &
+              (n_workers = balancer%n_workers)
       else
          call balancer%worker(worker_id)%add_resource (resource_id)
          call balancer%resource(resource_id)%set_active (n_workers = 1)
       end if
     end associate
   end subroutine balancer_simple_assign_worker
 
 @ %def balancer_simple_assign_worker
 @
 <<Balancer simple: simple: TBP>>=
   procedure :: free_worker => balancer_simple_free_worker
+<<Balancer simple: sub interfaces>>=
+    module subroutine balancer_simple_free_worker &
+         (balancer, worker_id, resource_id)
+      class(balancer_simple_t), intent(inout) :: balancer
+      integer, intent(in) :: worker_id
+      integer, intent(in) :: resource_id
+    end subroutine balancer_simple_free_worker
 <<Balancer simple: procedures>>=
-  subroutine balancer_simple_free_worker (balancer, worker_id, resource_id)
+  module subroutine balancer_simple_free_worker &
+       (balancer, worker_id, resource_id)
     class(balancer_simple_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     integer, intent(in) :: resource_id
     integer :: i
     if (.not. balancer%worker(worker_id)%is_assigned ()) return
-    if (.not. resource_id == balancer%worker(worker_id)%get_resource ()) then
-       call msg_bug ("Balancer simple: resource and associated resource do not match.")
+    if (.not. resource_id == &
+         balancer%worker(worker_id)%get_resource ()) then
+       call msg_bug ("Balancer simple: resource and " // &
+            "associated resource do not match.")
     end if
     associate (state => balancer%state(BALANCER_SIMPLE_CHANNEL))
       call balancer%resource(resource_id)%set_inactive ()
       call state%free_resource (resource_id)
       if (balancer%parallel_grid(resource_id)) then
          do i = 1, balancer%n_workers
             call balancer%worker(i)%free ()
          end do
       else
          call balancer%worker(worker_id)%free ()
       end if
     end associate
   end subroutine balancer_simple_free_worker
 
 @ %def balancer_simple_free_worker
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Balancer Channel}
 <<[[balancer_channel.f90]]>>=
 <<File header>>
 module balancer_channel
-  use kinds, only: default
-  use io_units
-  use diagnostics
 
+<<Use kinds>>
   use balancer_base
 
 <<Standard module head>>
 
 <<Balancer channel: public>>
 
 <<Balancer channel: parameters>>
 
 <<Balancer channel: types>>
 
+  interface
+<<Balancer channel: sub interfaces>>
+  end interface
+
+end module balancer_channel
+@ %def balancer_channel
+@
+<<[[balancer_channel_sub.f90]]>>=
+<<File header>>
+
+submodule (balancer_channel) balancer_channel_s
+
+  use io_units
+  use diagnostics
+
+  implicit none
+
 contains
 
 <<Balancer channel: procedures>>
 
-end module balancer_channel
-@ %def balancer_channel
+end submodule balancer_channel_s
+
+@ %def balancer_channel_s
 @
 <<Balancer channel: parameters>>=
   real(default), parameter :: BETA = 1.5_default
 
   integer, parameter :: N_BALANCER_CHANNEL_STATE = 2, &
        CHANNEL_STATE = 1, &
        GRID_STATE = 2
 
 @
 <<Balancer channel: public>>=
   public :: balancer_channel_t
 <<Balancer channel: types>>=
   type, extends(balancer_base_t) :: balancer_channel_t
      private
      integer :: n_parallel_grids = 0
      integer :: n_parallel_channels = 0
      integer :: n_grid_workers = 0
      integer :: n_channel_workers = 0
      logical, dimension(:), allocatable :: parallel_grid
    contains
    <<Balancer channel: base: TBP>>
   end type balancer_channel_t
 
 @ %def balancer_channel_t
 @
 <<Balancer channel: base: TBP>>=
   procedure :: init => balancer_channel_init
+<<Balancer channel: sub interfaces>>=
+    module subroutine balancer_channel_init (balancer, n_workers, n_resources)
+      class(balancer_channel_t), intent(out), target :: balancer
+      integer, intent(in) :: n_workers
+      integer, intent(in) :: n_resources
+    end subroutine balancer_channel_init
 <<Balancer channel: procedures>>=
-  subroutine balancer_channel_init (balancer, n_workers, n_resources)
+  module subroutine balancer_channel_init (balancer, n_workers, n_resources)
     class(balancer_channel_t), intent(out), target :: balancer
     integer, intent(in) :: n_workers
     integer, intent(in) :: n_resources
     call balancer%base_init (n_workers, n_resources)
     allocate (balancer%parallel_grid(n_resources), source = .false.)
   end subroutine balancer_channel_init
 
 @ %def balancer_channel_init
 @
 <<Balancer channel: base: TBP>>=
   procedure :: write => balancer_channel_write
+<<Balancer channel: sub interfaces>>=
+    module subroutine balancer_channel_write (balancer, unit)
+      class(balancer_channel_t), intent(in) :: balancer
+      integer, intent(in), optional :: unit
+    end subroutine balancer_channel_write
 <<Balancer channel: procedures>>=
-  subroutine balancer_channel_write (balancer, unit)
+  module subroutine balancer_channel_write (balancer, unit)
     class(balancer_channel_t), intent(in) :: balancer
     integer, intent(in), optional :: unit
     integer :: u, n_size
     u = given_output_unit (unit)
     write (u, "(A)") "Channel Balancer."
     write (u, "(A,1X,I3)") "Parallel grids: ", balancer%n_parallel_grids
     write (u, "(A,1X,I3)") "Parallel channels: ", balancer%n_parallel_channels
     write (u, "(A,1X,I3)") "Grid workers: ", balancer%n_grid_workers
     write (u, "(A,1X,I3)") "Channel workers: ", balancer%n_channel_workers
     n_size = min (25, size (balancer%parallel_grid))
     write (u, "(A,25(1X,L1))") "Parallel Grids:", balancer%parallel_grid(:n_size)
     call balancer%base_write (u)
   end subroutine balancer_channel_write
 
 @ %def balancer_channel_write
 @
 <<Balancer channel: base: TBP>>=
   procedure :: update_state => balancer_channel_update_state
+<<Balancer channel: sub interfaces>>=
+    module subroutine balancer_channel_update_state &
+         (balancer, weight, parallel_grid)
+      class(balancer_channel_t), intent(inout) :: balancer
+      real(default), dimension(:), intent(in) :: weight
+      logical, dimension(:), intent(in) :: parallel_grid
+    end subroutine balancer_channel_update_state
 <<Balancer channel: procedures>>=
-  subroutine balancer_channel_update_state (balancer, weight, parallel_grid)
+  module subroutine balancer_channel_update_state &
+       (balancer, weight, parallel_grid)
     class(balancer_channel_t), intent(inout) :: balancer
     real(default), dimension(:), intent(in) :: weight
     logical, dimension(:), intent(in) :: parallel_grid
     real(default) :: min_parallel_weight
     balancer%parallel_grid = parallel_grid
-    min_parallel_weight = balancer%n_resources**(1._default - 1_default / BETA) &
+    min_parallel_weight = &
+         balancer%n_resources**(1._default - 1_default / BETA) &
          / balancer%n_workers**BETA
     balancer%parallel_grid = &
          balancer%parallel_grid .and. (weight >= min_parallel_weight)
     if (balancer%n_resources >= balancer%n_workers) then
        !! Apply full multi-channel parallelization.
        balancer%n_parallel_grids = 0
        balancer%n_parallel_channels = balancer%n_resources
        balancer%parallel_grid = .false.
        balancer%n_grid_workers = 0
        balancer%n_channel_workers = balancer%n_workers
     else
        if (count (balancer%parallel_grid) == balancer%n_resources) then
           !! Apply full VEGAS parallelization.
           balancer%n_parallel_grids = balancer%n_resources
           balancer%n_parallel_channels = 0
           balancer%n_grid_workers = balancer%n_workers
           balancer%n_channel_workers = 0
        else
           !! Apply mixed mode.
           balancer%n_parallel_grids = count (balancer%parallel_grid)
-          balancer%n_parallel_channels = balancer%n_resources - balancer%n_parallel_grids
+          balancer%n_parallel_channels = balancer%n_resources - &
+               balancer%n_parallel_grids
           call compute_mixed_mode (weight)
        end if
     end if
     if(allocated (balancer%state)) then
        deallocate (balancer%state)
     end if
     call allocate_state ()
   contains
     subroutine compute_mixed_mode (weight)
       real(default), dimension(:), intent(in) :: weight
       real(default) :: weight_parallel_grids, &
            ratio_weight, &
            ratio_n_channels, &
            ratio
       !! Apply mixed mode.
       weight_parallel_grids = sum (weight, balancer%parallel_grid)
       !! Overall normalization of weight, \f$\alpha_{\text{grids}} +
       !! \alpha_{\text{channels}} = 1\f$.
       !! \f$\alpha_{\text{channels}} = 1 - \alpha_{\text{grids}}\f$
       ratio_weight = weight_parallel_grids / (1 - weight_parallel_grids)
       ratio_n_channels = real (balancer%n_parallel_grids, default) &
            / (balancer%n_resources - balancer%n_parallel_grids)
       !! The average computation of channel is proportional to its weight.
       !! Reweight number of channels (per mode) by their summed weights.
       !! R = w * N / (w * N + w' * N'); primed refer to parallel grid entities.
       !!   = 1 / (1 + w' / w * N' / N)
       ratio = 1 / (1  + ratio_weight * ratio_n_channels)
       ratio = min (max (ratio, 0.0_default), 1.0_default) !! Safe-guard ratio computation.
       !! In the case of small numbers of workers and a very small ratio,
       !! nint can assign no worker to channel/grid parallelization,
       !! which is still requested by n_parallel_channels/grids.
       !! In that case, we have to enforce: n_worker = n_channel_worker + n_grid_worker
       balancer%n_channel_workers = nint (ratio * balancer%n_workers)
       balancer%n_grid_workers = nint ((1 - ratio) * balancer%n_workers)
       !! In the case of small numbers of workers and a very small ratio,
       !! nint can assign no worker to channel/grid parallelization,
       !! which is still requested by n_parallel_channels/grids.
       !! In that case, we have to enforce: n_worker = n_channel_worker + n_grid_worker
       if (balancer%n_workers >= 2 &
            .AND. (balancer%n_parallel_channels > 0 .and. balancer%n_channel_workers < 1)) then
          balancer%n_channel_workers = 1
          balancer%n_grid_workers = balancer%n_grid_workers - 1
       end if
       !! The grid resources will only be increased to N = 2
       !! if more than 3 workers are present.
       if (balancer%n_workers >= 3 &
            .AND. (balancer%n_parallel_grids > 0 .and. balancer%n_grid_workers < 2)) then
          balancer%n_grid_workers = 2
          balancer%n_channel_workers = balancer%n_channel_workers - 2
       end if
     end subroutine compute_mixed_mode
 
     subroutine allocate_state ()
       type(resource_state_t), dimension(:), allocatable :: state
       integer :: ch
       allocate (state(N_BALANCER_CHANNEL_STATE))
       call state(CHANNEL_STATE)%init ( &
            mode = STATE_SINGLE, &
            n_workers = balancer%n_channel_workers)
       call state(GRID_STATE)%init ( &
            mode = STATE_ALL, &
            n_workers = balancer%n_grid_workers)
       do ch = 1, balancer%n_resources
          if (balancer%parallel_grid(ch)) then
             call state(GRID_STATE)%add_resource (ch)
          else
             call state(CHANNEL_STATE)%add_resource (ch)
          end if
       end do
       call state(CHANNEL_STATE)%freeze ()
       call state(GRID_STATE)%freeze ()
       call balancer%add_state (state)
     end subroutine allocate_state
   end subroutine balancer_channel_update_state
 
 @ %def balancer_channel_update_state
 @
 <<Balancer channel: base: TBP>>=
   procedure :: has_resource_group => balancer_channel_has_resource_group
+<<Balancer channel: sub interfaces>>=
+    pure module function balancer_channel_has_resource_group &
+         (balancer, resource_id) result (flag)
+      class(balancer_channel_t), intent(in) :: balancer
+      integer, intent(in) :: resource_id
+      logical :: flag
+    end function balancer_channel_has_resource_group
 <<Balancer channel: procedures>>=
-  pure function balancer_channel_has_resource_group (balancer, resource_id) &
-       result (flag)
+  pure module function balancer_channel_has_resource_group &
+       (balancer, resource_id) result (flag)
     class(balancer_channel_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     logical :: flag
     if (.not. balancer%resource(resource_id)%is_active ()) then
        flag = .false.
        return
     end if
     flag = balancer%parallel_grid(resource_id)
   end function balancer_channel_has_resource_group
 
 @ %def balancer_channel_has_resource_group
 @
 <<Balancer channel: base: TBP>>=
   procedure :: get_resource_group => balancer_channel_get_resource_group
+<<Balancer channel: sub interfaces>>=
+    pure module subroutine balancer_channel_get_resource_group &
+         (balancer, resource_id, group)
+      class(balancer_channel_t), intent(in) :: balancer
+      integer, intent(in) :: resource_id
+      integer, dimension(:), allocatable, intent(out) :: group
+    end subroutine balancer_channel_get_resource_group
 <<Balancer channel: procedures>>=
-  pure subroutine balancer_channel_get_resource_group (balancer, resource_id, group)
+  pure module subroutine balancer_channel_get_resource_group &
+       (balancer, resource_id, group)
     class(balancer_channel_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     integer, dimension(:), allocatable, intent(out) :: group
     integer :: i
     if (.not. balancer%has_resource_group (resource_id)) return
     group = pack ([(i, i = 1, balancer%n_workers)], &
          mask = balancer%worker%get_resource () == resource_id)
   end subroutine balancer_channel_get_resource_group
 
 @ %def balancer_channel_get_resource_group
 @
 <<Balancer channel: base: TBP>>=
   procedure :: get_resource_master => balancer_channel_get_resource_master
+<<Balancer channel: sub interfaces>>=
+    pure module function balancer_channel_get_resource_master &
+         (balancer, resource_id) result (worker_id)
+      class(balancer_channel_t), intent(in) :: balancer
+      integer, intent(in) :: resource_id
+      integer :: worker_id
+    end function balancer_channel_get_resource_master
 <<Balancer channel: procedures>>=
-  pure integer function balancer_channel_get_resource_master (balancer, resource_id) &
-       result (worker_id)
+  pure module function balancer_channel_get_resource_master &
+       (balancer, resource_id) result (worker_id)
     class(balancer_channel_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
+    integer :: worker_id
     integer :: i
     if (.not. balancer%resource(resource_id)%is_active ()) then
        worker_id = -1
        return
     end if
     !! Linear search.
     !! First element in worker group is defined as master.
     associate (worker => balancer%worker)
       do i = 1, balancer%n_workers
          if (worker(i)%get_resource () == resource_id) then
             worker_id = i
             exit
          end if
       end do
     end associate
   end function balancer_channel_get_resource_master
 
 @ %def balancer_channel_get_resource_master
 @
 <<Balancer channel: base: TBP>>=
   procedure :: assign_worker => balancer_channel_assign_worker
+<<Balancer channel: sub interfaces>>=
+    module subroutine balancer_channel_assign_worker &
+         (balancer, worker_id, resource_id)
+      class(balancer_channel_t), intent(inout) :: balancer
+      integer, intent(in) :: worker_id
+      integer, intent(out) :: resource_id
+    end subroutine balancer_channel_assign_worker
 <<Balancer channel: procedures>>=
-  subroutine balancer_channel_assign_worker (balancer, worker_id, resource_id)
+  module subroutine balancer_channel_assign_worker &
+       (balancer, worker_id, resource_id)
     class(balancer_channel_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     integer, intent(out) :: resource_id
     integer :: i_state
     if (.not. balancer%is_assignable (worker_id)) then
        resource_id = -1
        return
     end if
     if (balancer%worker(worker_id)%is_assigned ()) then
        resource_id = balancer%worker(worker_id)%get_resource ()
        return
     end if
     associate (state => balancer%state)
       i_state = balancer%worker(worker_id)%get_state ()
       if (.not. state(i_state)%has_resource ()) then
          resource_id = 0
          return
       end if
       resource_id = state(i_state)%assign_resource ()
       select case (state(i_state)%mode)
       case (STATE_SINGLE)
          call balancer%worker(worker_id)%add_resource (resource_id)
          call balancer%resource(resource_id)%set_active (n_workers = 1)
       case (STATE_ALL)
          call fill_resource_group (i_state, resource_id)
       end select
     end associate
   contains
     subroutine fill_resource_group (i_state, resource_id)
       integer, intent(in) :: i_state
       integer, intent(in) :: resource_id
       integer :: i, n_workers
       n_workers = 0
       do i = 1, balancer%n_workers
          if (.not. balancer%worker(i)%get_state () == i_state) cycle
          if (balancer%is_worker_pending (i)) then
-            write (msg_buffer, "(A,1X,I0,1X,A,1X,I0,1X,A)") "WORKER", i, "STATE", i_state, "ASSIGNED"
+            write (msg_buffer, "(A,1X,I0,1X,A,1X,I0,1X,A)") &
+                 "WORKER", i, "STATE", i_state, "ASSIGNED"
             call msg_bug ()
          end if
          call balancer%worker(i)%add_resource (resource_id)
          n_workers = n_workers + 1
       end do
       if (n_workers /= balancer%state(i_state)%n_workers) then
-         call msg_bug ("Number of assigned workers unequal to number of state workers.")
+         call msg_bug ("Number of assigned workers unequal to " // &
+              "number of state workers.")
       end if
       call balancer%resource(resource_id)%set_active (n_workers = n_workers)
     end subroutine fill_resource_group
   end subroutine balancer_channel_assign_worker
 
 @ %def balancer_channel_assign_worker
 @  
 Free worker from associated resource. Idempotent. Depending on state
 association, given resource must equal worker's resource (check) for
 single state. For all state, the *current* resource of the worker may
 differ (grouping behavior!), only in case, that the older resource is
 inactive, return as idempotent. Else, free all worker from resource
 group.
 <<Balancer channel: base: TBP>>=
   procedure :: free_worker => balancer_channel_free_worker
+<<Balancer channel: sub interfaces>>=
+    module subroutine balancer_channel_free_worker &
+         (balancer, worker_id, resource_id)
+      class(balancer_channel_t), intent(inout) :: balancer
+      integer, intent(in) :: worker_id
+      integer, intent(in) :: resource_id
+    end subroutine balancer_channel_free_worker
 <<Balancer channel: procedures>>=
-  subroutine balancer_channel_free_worker (balancer, worker_id, resource_id)
+  module subroutine balancer_channel_free_worker &
+       (balancer, worker_id, resource_id)
     class(balancer_channel_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     integer, intent(in) :: resource_id
     integer :: i, i_state
     if (.not. balancer%worker(worker_id)%is_assigned ()) return
     associate (state => balancer%state)
       i_state = balancer%worker(worker_id)%get_state ()
       select case (state(i_state)%mode)
       case (STATE_SINGLE)
-         if (.not. resource_id == balancer%worker(worker_id)%get_resource ()) then
-            call msg_bug ("Channel balancer: resource and associated resource do not match.")
+         if (.not. resource_id == &
+              balancer%worker(worker_id)%get_resource ()) then
+            call msg_bug ("Channel balancer: resource and associated " // &
+                 "resource do not match.")
          end if
          call balancer%resource(resource_id)%set_inactive ()
          call state(i_state)%free_resource (resource_id)
          call balancer%worker(worker_id)%free ()
       case (STATE_ALL)
          if (resource_id /= balancer%worker(worker_id)%get_resource ()) then
             if (balancer%resource(resource_id)%is_active ()) then
                msg_buffer = "Channel balancer: resource is still active,&
                     & but worker is assigned to another resource."
                call msg_bug ()
             else
-               !! Special case: Worker was already freed from (now inactive) resource_id (by another call to free_worker),
-               !! and in the mean time assigned to a new resource. So, nothing to do.
+               !! Special case: Worker was already freed from (now inactive)
+               !! resource_id (by another call to free_worker),
+               !! and in the mean time assigned to a new resource.
+               !! So, nothing to do.
                return
             end if
          end if
          call balancer%resource(resource_id)%set_inactive ()
          call state(i_state)%free_resource (resource_id)
          do i = 1, balancer%n_workers
             if (.not. balancer%worker(i)%get_state () == i_state) cycle
             call balancer%worker(i)%free ()
          end do
       end select
     end associate
   end subroutine balancer_channel_free_worker
 
 @ %def balancer_channel_free_worker
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Request Callback}
 
 \begin{description}
 \item[request handler] Base type and interface for providing two-sided
   communication in a callback fashion. 
 \item[request handler manager] Manages handler with a binary tree.
 \end{description}
+
+This module constitutes the request handler. A request handler allows
+to dispatch an object for communication a priori for a slave and a
+master. Typically, each slave register its own request handles,
+whereas the master requests all possible requests handles matching
+those of the slaves. The requests can then be used later on,
+e.g. during a computation, a slave may add a request to the master
+worker and starts sending an object. The master worker looks up the
+appropriate request handler, which then closes the communication,
+i.e. by a receive call. Most important: Only a pointer to the buffer
+object is stored, therefore, the calling function has to ensure, that
+the communication object (buffer) will not be changed during a request
+handle (receive or send).
+
+Remark: The handler allows for a tag offset which allows to uniqify
+the the communication. The problem occurs when multiple callback need
+to handled simultanously and MPI needs to connect the communication
+calls accordingly. Each message has a tuple of (source, tag, comm)
+associated, we can uniquify this tuple by a unique tag. [[tag]] $=$
+[[tag_offset]] + $\Bigl\{ 1, …,$ [[N_requests]] $\Bigr\}$ where
+[[tag_offsets]] are multiple of [[N_requests]]. The latter condition
+should checked by a modulo. What happens if the communication is out
+of order (is this a problem? check with standard)?
 <<[[request_callback.f90]]>>=
 module request_callback
+
   use, intrinsic :: iso_fortran_env, only: ERROR_UNIT
   use binary_tree
-  use diagnostics
 
   use mpi_f08 !NODEP!
 
 <<Standard module head>>
 
-  !> Request handler.
-  !!
-  !! A request handler allows to dispatch an object for communication a priori
-  !! for a slave and a master.
-  !! Typically, each slave register its own request handles, whereas the master
-  !! requests all possible requests handles matching those of the slaves.
-  !! The requests can then be used later on, e.g. during a computation, a slave
-  !! may add a request to the master worker and starts sending an object.
-  !! The master worker looks up the appropriate request handler, which then
-  !! closes the communication, i.e. by a receive call.
-  !! Most important: Only a pointer to the buffer object is stored, therefore, the calling function
-  !! has to ensure, that the communication object (buffer) will not be changed
-  !! during a request handle (receive or send).
-  !!
-  !! Remark: The handler allows for a tag offset which allows to uniqify the the communication.
-  !!         The problem occurs when multiple callback need to handled simultanously and MPI needs to connect the communication calls accordingly.
-  !!         Each message has a tuple of (source, tag, comm) associated, we can uniquify this tuple by a unique tag.
-  !!         tag = tag_offset + {1, …, N_requests} where tag_offsets are multiple of N_requests.
-  !!         The latter condition should checked by a modulo.
-  !! What happens if the communication is out of order (is this a problem? check with standard)?
-
 <<Request callback: public>>
 
 <<Request callback: types>>
 
 <<Request callback: interfaces>>
 
+  interface
+<<Request callback: sub interfaces>>
+  end interface
+
+end module request_callback
+@ %def request_callback
+@
+<<[[request_callback_sub.f90]]>>=
+<<File header>>
+
+submodule (request_callback) request_callback_s
+
+  use diagnostics
+
+  implicit none
+
 contains
 
 <<Request callback: procedures>>
 
-end module request_callback
-@ %def request_callback
+end submodule request_callback_s
+
+@ %def request_callback_s
 @
 <<Request callback: public>>=
   public :: request_handler_t
 <<Request callback: types>>=
   type, abstract :: request_handler_t
      integer :: n_requests = 0
      integer :: tag_offset = 0
      type(MPI_REQUEST), dimension(:), allocatable :: request
      type(MPI_STATUS), dimension(:), allocatable :: status
      logical :: activated = .false.
      logical :: finished = .false.
    contains
      procedure :: base_write => request_handler_base_write
      procedure(request_handler_write), deferred :: write
      !! \todo{sbrass} implement initialization procedure.
      procedure(request_handler_handle), deferred :: handle
      procedure(request_handler_client_handle), deferred :: client_handle
      procedure :: allocate => request_handler_allocate
      procedure :: get_status => request_handler_get_status
      procedure :: testall => request_handler_testall
      procedure :: waitall => request_handler_waitall
      procedure :: free => request_handler_free
   end type request_handler_t
 
 @ %def request_handler_t
 @
 <<Request callback: public>>=
   public :: request_handler_manager_t
 <<Request callback: types>>=
   type :: request_handler_manager_t
      private
      type(MPI_COMM) :: comm
      type(binary_tree_t) :: tree
    contains
-   <<Request handler: handler manager: TBP>>
+   <<Request callback: handler manager: TBP>>
   end type request_handler_manager_t
 
 @ %def request_handler_manager_t
 @
 <<Request callback: interfaces>>=
   abstract interface
      subroutine request_handler_write (handler, unit)
        import :: request_handler_t
        class(request_handler_t), intent(in) :: handler
        integer, intent(in), optional :: unit
      end subroutine request_handler_write
   end interface
 
 @ %def request_handler_write
 @
 <<Request callback: interfaces>>=
   abstract interface
      !> Handle a request from server side.
      !!
      !! The message tag can be used in order to uniquify the respective messages between master and slave.
      !! E.g. by explicitly setting it, or by using it in a computation i * N_R + j, i \in {1, …, N} and j \in {1, …, N_R}.
      !!
      !! Must set *activated* to .true. when called.
      !! \param[in] source Integer rank of the source in comm.
      !! \param[in] tag Specify the message tag.
      !! \param[in] comm MPI communicator.
      subroutine request_handler_handle (handler, source_rank, comm)
        import :: request_handler_t, MPI_COMM
        class(request_handler_t), intent(inout) :: handler
        integer, intent(in) :: source_rank
        type(MPI_COMM), intent(in) :: comm
      end subroutine request_handler_handle
   end interface
 
 @ %def request_handler_handle
 @
 <<Request callback: interfaces>>=
   abstract interface
      !> Handle a request from client side.
      !!
      !! Must set *activated* to .true. when called.
      !! \param[in] rank Integer of the receiver in comm.
      !! \param[in] tag Specify the message tag.
      !! \param[in] comm MPI communicator.
      subroutine request_handler_client_handle (handler, dest_rank, comm)
        import :: request_handler_t, MPI_COMM
        class(request_handler_t), intent(inout) :: handler
        integer, intent(in) :: dest_rank
        type(MPI_COMM), intent(in) :: comm
      end subroutine request_handler_client_handle
   end interface
 
 @ %def request_handler_client_handle
-@
+@ Request handler. Write routine.
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_base_write (handler, unit)
+      class(request_handler_t), intent(in) :: handler
+      integer, intent(in), optional :: unit
+    end subroutine request_handler_base_write
 <<Request callback: procedures>>=
-  !!
-  !! Request handler.
-  !!
-  subroutine request_handler_base_write (handler, unit)
+  module subroutine request_handler_base_write (handler, unit)
     class(request_handler_t), intent(in) :: handler
     integer, intent(in), optional :: unit
     integer :: u, i
     u = ERROR_UNIT; if (present (unit)) u = unit
     write (u, "(A,1X,I0)") "N_REQUESTS", handler%n_requests
     write (u, "(A,1X,I0)") "TAG_OFFSET", handler%tag_offset
     write (u, "(A,1X,L1)") "FINISHED", handler%finished
     write (u, "(A,1X,L1)") "ACTIVATED", handler%activated
     write (u, "(A)") "I | SOURCE | TAG | ERROR | REQUEST_NULL"
     do i = 1, handler%n_requests
        write (u, "(A,4(1X,I0),1X,L1)") "REQUEST", i, &
             handler%status(i)%MPI_SOURCE, &
             handler%status(i)%MPI_TAG, &
             handler%status(i)%MPI_ERROR, &
             (handler%request(i) == MPI_REQUEST_NULL)
     end do
   end subroutine request_handler_base_write
 
 @ %def request_handler_base_write
-@
+@ Allocate MPI request and status object. Must be called during or
+after object-initialization.
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_allocate (handler, n_requests, tag_offset)
+      class(request_handler_t), intent(inout) :: handler
+      integer, intent(in) :: n_requests
+      integer, intent(in) :: tag_offset
+    end subroutine request_handler_allocate
 <<Request callback: procedures>>=
-  !> Allocate MPI request and status object.
-  !!
-  !! Must be called during or after object-initialization.
-  !!
   !! \param[inout] handler Handler must be intent inout, as the calling function may already manipulated the extended object.
   !! \param[in] n_requests Number of MPI requests the objects needs to be able to handle.
   !! \param[in] tag_offset First tag to be used, all other must follow in an increasing manner until tag_offset + (N_r + 1).
   !! Proof: tag \in {tag_offset, tag_offset + n_requests}.
-  subroutine request_handler_allocate (handler, n_requests, tag_offset)
+  module subroutine request_handler_allocate (handler, n_requests, tag_offset)
     class(request_handler_t), intent(inout) :: handler
     integer, intent(in) :: n_requests
     integer, intent(in) :: tag_offset
     allocate (handler%request(n_requests), source = MPI_REQUEST_NULL)
     allocate (handler%status(n_requests))
     handler%n_requests = n_requests
     if (mod (tag_offset, n_requests) /= 0) &
-         call msg_bug ("Error during handler allocate, tag_offset is not a multiple of n_requests.")
+         call msg_bug ("Error during handler allocate, tag_offset " // &
+         "is not a multiple of n_requests.")
     !! What is the max.-allowed MPI_TAG?
     handler%tag_offset = tag_offset
     handler%activated = .false.
     handler%finished = .false.
   end subroutine request_handler_allocate
 
 @ %def request_handler_allocate
-@
+@ Get status from request objects in a non-destructive way.
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_get_status (handler)
+      class(request_handler_t), intent(inout) :: handler
+    end subroutine request_handler_get_status
 <<Request callback: procedures>>=
-  !> Get status from request objects in a non-destructive way.
-  subroutine request_handler_get_status (handler)
+  module subroutine request_handler_get_status (handler)
     class(request_handler_t), intent(inout) :: handler
     integer :: i
     logical :: flag
     if (.not. handler%activated) return
     handler%finished = .true.
     do i = 1, handler%n_requests
        call MPI_REQUEST_GET_STATUS (handler%request(i), flag, &
             handler%status(i))
        handler%finished = handler%finished .and. flag
     end do
   end subroutine request_handler_get_status
 
 @ %def request_handler_get_status
-@
+@ Call [[MPI_WATIALL]] and raise finished flag.
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_waitall (handler)
+      class(request_handler_t), intent(inout) :: handler
+    end subroutine request_handler_waitall
 <<Request callback: procedures>>=
-  !> Call MPI_WATIALL and raise finished flag.
-  subroutine request_handler_waitall (handler)
+  module subroutine request_handler_waitall (handler)
     class(request_handler_t), intent(inout) :: handler
     integer :: error
     if (.not. handler%activated .or. handler%finished) return
-    call MPI_WAITALL (handler%n_requests, handler%request, handler%status, error)
+    call MPI_WAITALL (handler%n_requests, handler%request, &
+         handler%status, error)
     if (error /= 0) then
        call msg_bug ("Request: Error occured during waitall on handler.")
     end if
     handler%finished = .true.
   end subroutine request_handler_waitall
 
 @ %def request_handler_waitall
 @
+<<Request callback: sub interfaces>>=
+    module function request_handler_testall (handler) result (flag)
+      class(request_handler_t), intent(inout) :: handler
+      logical :: flag
+    end function request_handler_testall
 <<Request callback: procedures>>=
-  logical function request_handler_testall (handler) result (flag)
+  module function request_handler_testall (handler) result (flag)
     class(request_handler_t), intent(inout) :: handler
+    logical :: flag
     integer :: error
     if (.not. handler%activated .or. .not. handler%finished) then
-       call MPI_TESTALL (handler%n_requests, handler%request, handler%finished, &
-            handler%status, error)
+       call MPI_TESTALL (handler%n_requests, handler%request, &
+            handler%finished, handler%status, error)
        ! call print_status ()
        if (error /= 0) then
           call msg_bug ("Request: Error occured during testall on handler.")
        end if
     end if
     flag = handler%finished
   contains
     subroutine print_status ()
       integer :: i
       do i = 1, handler%n_requests
          associate (status => handler%status(i))
-           write (ERROR_UNIT, *) status%MPI_SOURCE, status%MPI_TAG, status%MPI_ERROR
+           write (ERROR_UNIT, *) status%MPI_SOURCE, status%MPI_TAG, &
+                status%MPI_ERROR
          end associate
       end do
     end subroutine print_status
   end function request_handler_testall
 
 @ %def request_handler_testall
 @
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_free (handler)
+      class(request_handler_t), intent(inout) :: handler
+    end subroutine request_handler_free
 <<Request callback: procedures>>=
-  subroutine request_handler_free (handler)
+  module subroutine request_handler_free (handler)
     class(request_handler_t), intent(inout) :: handler
     integer :: i, error
     do i = 1, handler%n_requests
        if (handler%request(i) == MPI_REQUEST_NULL) cycle
        call MPI_REQUEST_FREE (handler%request(i), error)
        if (error /= 0) then
-          call msg_bug ("Request: Error occured during free request on handler.")
+          call msg_bug ("Request: Error occured during free " // &
+               "request on handler.")
        end if
     end do
   end subroutine request_handler_free
 
 @ %def request_handler_free
-@
-<<Request handler: handler manager: TBP>>=
+@ Request handler manager.
+<<Request callback: handler manager: TBP>>=
   procedure :: init => request_handler_manager_init
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_init (rhm, comm)
+      class(request_handler_manager_t), intent(out) :: rhm
+      type(MPI_COMM), intent(in) :: comm
+    end subroutine request_handler_manager_init
 <<Request callback: procedures>>=
-  !!
-  !! Request handler manager.
-  !!
-  subroutine request_handler_manager_init (rhm, comm)
+  module subroutine request_handler_manager_init (rhm, comm)
     class(request_handler_manager_t), intent(out) :: rhm
     type(MPI_COMM), intent(in) :: comm
     call MPI_COMM_DUP (comm, rhm%comm)
   end subroutine request_handler_manager_init
 
 @ %def request_handler_manager_init 
 @
-<<Request handler: handler manager: TBP>>=
+<<Request callback: handler manager: TBP>>=
   procedure :: write => request_handler_manager_write
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_write (rhm, unit)
+      class(request_handler_manager_t), intent(in) :: rhm
+      integer, intent(in), optional :: unit
+    end subroutine request_handler_manager_write
 <<Request callback: procedures>>=
-  subroutine request_handler_manager_write (rhm, unit)
+  module subroutine request_handler_manager_write (rhm, unit)
     class(request_handler_manager_t), intent(in) :: rhm
     integer, intent(in), optional :: unit
     integer :: u
     u = ERROR_UNIT; if (present (unit)) u = unit
     write (u, "(A)") "[REQUEST_CALLBACK_MANAGER]"
     call rhm%tree%write (u)
   end subroutine request_handler_manager_write
 
 @ %def request_handler_manager_write
 @
-<<Request handler: handler manager: TBP>>=
+<<Request callback: handler manager: TBP>>=
   procedure :: add => request_handler_manager_add
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_add (rhm, handler_id, handler)
+      class(request_handler_manager_t), intent(inout) :: rhm
+      integer, intent(in) :: handler_id
+      class(request_handler_t), pointer, intent(in) :: handler
+    end subroutine request_handler_manager_add
 <<Request callback: procedures>>=
-  subroutine request_handler_manager_add (rhm, handler_id, handler)
+  module subroutine request_handler_manager_add (rhm, handler_id, handler)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer, intent(in) :: handler
     class(*), pointer :: obj
     obj => handler
     call rhm%tree%insert (handler_id, obj)
   end subroutine request_handler_manager_add
 
 @ %def request_handler_manager_add
 @
-<<Request handler: handler manager: TBP>>=
+<<Request callback: handler manager: TBP>>=
   procedure :: clear => request_handler_manager_clear
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_clear (rhm)
+      class(request_handler_manager_t), intent(inout) :: rhm
+    end subroutine request_handler_manager_clear
 <<Request callback: procedures>>=
-  subroutine request_handler_manager_clear (rhm)
+  module subroutine request_handler_manager_clear (rhm)
     class(request_handler_manager_t), intent(inout) :: rhm
     call rhm%tree%clear ()
   end subroutine request_handler_manager_clear
 
 @ %def request_handler_manager_clear
-@
-<<Request handler: handler manager: TBP>>=
+@ Get status (in a non-destructive way) for all associated handler.
+<<Request callback: handler manager: TBP>>=
   procedure, private :: fill_status => request_handler_manager_fill_status
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_fill_status (rhm)
+      class(request_handler_manager_t), intent(inout) :: rhm
+    end subroutine request_handler_manager_fill_status
 <<Request callback: procedures>>=
-  !> Get status (in a non-destructive way) for all associated handler.
-  subroutine request_handler_manager_fill_status (rhm)
+  module subroutine request_handler_manager_fill_status (rhm)
     class(request_handler_manager_t), intent(inout) :: rhm
     type(binary_tree_iterator_t) :: iterator
     integer :: handler_id
     class(request_handler_t), pointer :: handler
     call iterator%init (rhm%tree)
     do while (iterator%is_iterable ())
        call iterator%next (handler_id)
        call rhm%handler_at (handler_id, handler)
        call handler%get_status ()
     end do
   end subroutine request_handler_manager_fill_status
 
 @ %def request_handler_manager_fill_status
 @
-<<Request handler: handler manager: TBP>>=
+<<Request callback: handler manager: TBP>>=
   procedure :: test => request_handler_manager_test
+<<Request callback: sub interfaces>>=
+    module function request_handler_manager_test &
+         (rhm, handler_id) result (flag)
+      class(request_handler_manager_t), intent(inout) :: rhm
+      integer, intent(in) :: handler_id
+      logical :: flag
+    end function request_handler_manager_test
 <<Request callback: procedures>>=
-  logical function request_handler_manager_test (rhm, handler_id) result (flag)
+  module function request_handler_manager_test &
+       (rhm, handler_id) result (flag)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
+    logical :: flag
     class(request_handler_t), pointer :: handler
     call rhm%handler_at (handler_id, handler)
     flag = handler%testall ()
   end function request_handler_manager_test
 
 @ %def request_handler_manager_test
 @
-<<Request handler: handler manager: TBP>>=
+<<Request callback: handler manager: TBP>>=
   procedure :: wait => request_handler_manager_wait
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_wait (rhm, handler_id)
+      class(request_handler_manager_t), intent(inout) :: rhm
+      integer, intent(in) :: handler_id
+    end subroutine request_handler_manager_wait
 <<Request callback: procedures>>=
-  subroutine request_handler_manager_wait (rhm, handler_id)
+  module subroutine request_handler_manager_wait (rhm, handler_id)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer :: handler
     call rhm%handler_at (handler_id, handler)
     call handler%waitall ()
   end subroutine request_handler_manager_wait
 
 @ %def request_handler_manager_wait
 @
-<<Request handler: handler manager: TBP>>=
+<<Request callback: handler manager: TBP>>=
   procedure :: waitall => request_handler_manager_waitall
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_waitall (rhm)
+      class(request_handler_manager_t), intent(inout) :: rhm
+    end subroutine request_handler_manager_waitall
 <<Request callback: procedures>>=
-  subroutine request_handler_manager_waitall (rhm)
+  module subroutine request_handler_manager_waitall (rhm)
     class(request_handler_manager_t), intent(inout) :: rhm
     type(binary_tree_iterator_t) :: iterator
     integer :: handler_id
     call iterator%init (rhm%tree)
     do while (iterator%is_iterable ())
        call iterator%next (handler_id)
        !! Test handler (destructive test on request handler).
        if (.not. rhm%test (handler_id)) &
             call rhm%wait (handler_id)
     end do
   end subroutine request_handler_manager_waitall
 
 @ %def request_handler_manager_waitall
 @
-<<Request handler: handler manager: TBP>>=
+<<Request callback: handler manager: TBP>>=
   procedure, private :: handler_at => request_handler_manager_handler_at
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_handler_at &
+         (rhm, handler_id, handler)
+      class(request_handler_manager_t), intent(in) :: rhm
+      integer, intent(in) :: handler_id
+      class(request_handler_t), pointer, intent(out) :: handler
+    end subroutine request_handler_manager_handler_at
 <<Request callback: procedures>>=
-  subroutine request_handler_manager_handler_at (rhm, handler_id, handler)
+  module subroutine request_handler_manager_handler_at &
+       (rhm, handler_id, handler)
     class(request_handler_manager_t), intent(in) :: rhm
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer, intent(out) :: handler
     class(*), pointer :: obj
     call rhm%tree%search (handler_id, obj)
     select type (obj)
     class is (request_handler_t)
        handler => obj
     class default
        call msg_bug ("Object is not derived from request_handler_t.")
     end select
   end subroutine request_handler_manager_handler_at
 
 @ %def request_handler_manager_handler_at
 @
-<<Request handler: handler manager: TBP>>=
+<<Request callback: handler manager: TBP>>=
   procedure :: has_handler => request_handler_manager_has_handler
+<<Request callback: sub interfaces>>=
+    module function request_handler_manager_has_handler &
+         (rhm, handler_id) result (flag)
+      class(request_handler_manager_t), intent(inout) :: rhm
+      integer, intent(in) :: handler_id
+      logical :: flag
+    end function request_handler_manager_has_handler
 <<Request callback: procedures>>=
-  function request_handler_manager_has_handler (rhm, handler_id) result (flag)
+  module function request_handler_manager_has_handler &
+       (rhm, handler_id) result (flag)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     logical :: flag
     flag = rhm%tree%has_key (handler_id)
   end function request_handler_manager_has_handler
 
 @ %def request_handler_manager_has_handler
-@
-<<Request handler: handler manager: TBP>>=
+@ Call server-sided procedure of callback with [[handler_id]].
+Ingoing variables are [[handler_id]] and [[source]].
+<<Request callback: handler manager: TBP>>=
   procedure :: callback => request_handler_manager_callback
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_callback &
+         (rhm, handler_id, source_rank)
+      class(request_handler_manager_t), intent(inout) :: rhm
+      integer, intent(in) :: handler_id
+      integer, intent(in) :: source_rank
+    end subroutine request_handler_manager_callback
 <<Request callback: procedures>>=
-  !> Call server-sided procedure of callback with handler_id.
-  !!
-  !! \param[in] handler_id
-  !! \param[in] source
-  subroutine request_handler_manager_callback (rhm, handler_id, source_rank)
+  module subroutine request_handler_manager_callback &
+       (rhm, handler_id, source_rank)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     integer, intent(in) :: source_rank
     class(request_handler_t), pointer :: handler
     if (.not. rhm%tree%has_key (handler_id)) return
     call rhm%handler_at (handler_id, handler)
     call handler%handle (source_rank = source_rank, comm = rhm%comm)
   end subroutine request_handler_manager_callback
 
 @ %def request_handler_manager_callback
-@
-<<Request handler: handler manager: TBP>>=
+@ Call client-sided procedure of callback with [[handler_id]], which
+is the input, as well as the destination rank.
+<<Request callback: handler manager: TBP>>=
   procedure :: client_callback => request_handler_manager_client_callback
+<<Request callback: sub interfaces>>=
+    module subroutine request_handler_manager_client_callback &
+         (rhm, handler_id, dest_rank)
+      class(request_handler_manager_t), intent(inout) :: rhm
+      integer, intent(in) :: handler_id
+      integer, intent(in) :: dest_rank
+    end subroutine request_handler_manager_client_callback
 <<Request callback: procedures>>=
-  !> Call client-sided procedure of callback with handler_id.
-  !!
-  !! \param[in] handler_id
-  !! \param[in] source Destination rank.
-  subroutine request_handler_manager_client_callback (rhm, handler_id, dest_rank)
+  module subroutine request_handler_manager_client_callback &
+       (rhm, handler_id, dest_rank)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     integer, intent(in) :: dest_rank
     class(request_handler_t), pointer :: handler
     if (.not. rhm%tree%has_key (handler_id)) return
     call rhm%handler_at (handler_id, handler)
     call handler%client_handle (dest_rank = dest_rank, comm = rhm%comm)
   end subroutine request_handler_manager_client_callback
 
 @ %def request_handlder_manager_client_callback
 @
Index: trunk/src/vegas/Makefile.am
===================================================================
--- trunk/src/vegas/Makefile.am	(revision 8798)
+++ trunk/src/vegas/Makefile.am	(revision 8799)
@@ -1,264 +1,316 @@
 ## Makefile.am -- Makefile for WHIZARD
 ##
 ## Process this file with automake to produce Makefile.in
 #
 # Copyright (C) 1999-2022 by
 #     Wolfgang Kilian <kilian@physik.uni-siegen.de>
 #     Thorsten Ohl <ohl@physik.uni-wuerzburg.de>
 #     Juergen Reuter <juergen.reuter@desy.de>
 #     with contributions from
 #     cf. main AUTHORS file
 #
 # WHIZARD is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2, or (at your option)
 # any later version.
 #
 # WHIZARD is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 #
 ########################################################################
 
 ## The files in this directory implement the VEGAS algorithm and the
 ## multi-channel implementation with VEGAS as backbone integrator
 
 ## We create a library which is still to be combined with auxiliary libs.
 noinst_LTLIBRARIES = libvegas.la
 check_LTLIBRARIES = libvegas_ut.la
 
 COMMON_F90 =
 
 MPI_F90 = \
+  $(MPI_MODULES) \
+  $(MPI_SUBMODULES)
+
+MPI_MODULES = \
   vegas.f90_mpi \
   vamp2.f90_mpi
+MPI_SUBMODULES = \
+  vegas_sub.f90_mpi \
+  vamp2_sub.f90_mpi
 
 LOAD_BALANCER_MODULES = \
   request_base.f90 \
   request_simple.f90 \
   request_caller.f90 \
   request_state.f90 \
   balancer_base.f90 \
   balancer_simple.f90 \
   balancer_channel.f90 \
   request_callback.f90
 
+LOAD_BALANCER_SUBMODULES = \
+  request_base_sub.f90 \
+  request_simple_sub.f90 \
+  request_caller_sub.f90 \
+  request_state_sub.f90 \
+  balancer_base_sub.f90 \
+  balancer_simple_sub.f90 \
+  balancer_channel_sub.f90 \
+  request_callback_sub.f90
+
 SERIAL_F90 = \
+  $(SERIAL_MODULES) \
+  $(SERIAL_SUBMODULES)
+
+SERIAL_MODULES = \
   vegas.f90_serial \
   vamp2.f90_serial
+SERIAL_SUBMODULES = \
+  vegas_sub.f90_serial \
+  vamp2_sub.f90_serial
 
 EXTRA_DIST = \
   $(COMMON_F90) \
   $(SERIAL_F90) \
   $(MPI_F90) \
-  $(LOAD_BALANCER_MODULES)
+  $(LOAD_BALANCER_MODULES) \
+  $(LOAD_BALANCER_SUBMODULES)
 
 nodist_libvegas_la_SOURCES = \
   $(COMMON_F90) \
+  $(VEGAS_MODULES) \
+  $(VEGAS_SUBMODULES)
+
+VEGAS_MODULES = \
   vegas.f90 \
   vamp2.f90
 
+VEGAS_SUBMODULES = \
+  vegas_sub.f90 \
+  vamp2_sub.f90
+
 if FC_USE_MPI
-nodist_libvegas_la_SOURCES += \
-  request_base.f90 \
-  request_simple.f90 \
-  request_caller.f90 \
-  request_state.f90 \
-  balancer_base.f90 \
-  balancer_simple.f90 \
-  balancer_channel.f90 \
-  request_callback.f90
+VEGAS_MODULES += \
+  $(LOAD_BALANCER_MODULES)
+VEGAS_SUBMODULES += \
+  $(LOAD_BALANCER_SUBMODULES)
 endif
 
 DISTCLEANFILES = \
   vegas.f90 \
-  vamp2.f90
+  vegas_sub.f90 \
+  vamp2.f90 \
+  vamp2_sub.f90
 
 if FC_USE_MPI
 vegas.f90: vegas.f90_mpi
 	-cp -f $< $@
+vegas_sub.f90: vegas_sub.f90_mpi
+	-cp -f $< $@
 vamp2.f90: vamp2.f90_mpi
 	-cp -f $< $@
+vamp2_sub.f90: vamp2_sub.f90_mpi
+	-cp -f $< $@
 else
 vegas.f90: vegas.f90_serial
 	-cp -f $< $@
+vegas_sub.f90: vegas_sub.f90_serial
+	-cp -f $< $@
 vamp2.f90: vamp2.f90_serial
 	-cp -f $< $@
+vamp2_sub.f90: vamp2_sub.f90_serial
+	-cp -f $< $@
 endif
 
 libvegas_ut_la_SOURCES = \
   vegas_uti.f90 vegas_ut.f90 \
   vamp2_uti.f90 vamp2_ut.f90
 
 ## Omitting this would exclude it from the distribution
 dist_noinst_DATA = vegas.nw
 
 # Modules and installation
 # Dump module names into file Modules
 execmoddir = $(fmoddir)/whizard
 nodist_execmod_HEADERS = \
-  ${nodist_libvegas_la_SOURCES:.f90=.$(FCMOD)}
+  ${VEGAS_MODULES:.f90=.$(FCMOD)}
 
-libvegas_Modules = $(nodist_libvegas_la_SOURCES:.f90=) $(libvegas_ut_la_SOURCES:.f90=)
+libvegas_Modules = $(VEGAS_MODULES:.f90=) $(libvegas_ut_la_SOURCES:.f90=)
 Modules: Makefile
 	@for module in $(libvegas_Modules); do \
 		echo $$module >> $@.new; \
 	done
 	@if diff $@ $@.new -q >/dev/null; then \
 		rm $@.new; \
 	else \
 		mv $@.new $@; echo "Modules updated"; \
 	fi
 BUILT_SOURCES = Modules
 
 ## Fortran module dependencies
 # Get module lists from other directories
 module_lists = \
   ../basics/Modules \
   ../utilities/Modules \
   ../testing/Modules \
   ../system/Modules \
   ../rng/Modules \
   ../phase_space/Modules
 
 $(module_lists):
 	$(MAKE) -C `dirname $@` Modules
 
 Module_dependencies.sed: $(nodist_libvegas_la_SOURCES) $(libvegas_ut_la_SOURCES)
 Module_dependencies.sed: $(module_lists)
 	@rm -f $@
 	echo 's/, *only:.*//' >> $@
 	echo 's/, *&//' >> $@
 	echo 's/, *.*=>.*//' >> $@
 	echo 's/$$/.lo/' >> $@
 	for list in $(module_lists); do \
 		dir="`dirname $$list`"; \
 		for mod in `cat $$list`; do \
 			echo 's!: '$$mod'.lo$$!': $$dir/$$mod'.lo!' >> $@; \
 		done ; \
 	done
 
 DISTCLEANFILES += Module_dependencies.sed
 
 
 # The following line just says
 #    include Makefile.depend
 # but in a portable fashion (depending on automake's AM_MAKE_INCLUDE
 @am__include@ @am__quote@Makefile.depend@am__quote@
 
 Makefile.depend: Module_dependencies.sed
 Makefile.depend: $(nodist_libvegas_la_SOURCES) $(libvegas_ut_la_SOURCES)
 	@rm -f $@
 	for src in $^; do \
 		module="`basename $$src | sed 's/\.f[90][0358]//'`"; \
 		grep '^ *use ' $$src \
 		| grep -v '!NODEP!' \
 		| sed -e 's/^ *use */'$$module'.lo: /' \
 				-f Module_dependencies.sed; \
 	done > $@
 
 DISTCLEANFILES += Makefile.depend
 
 ## Dependencies across directories and packages, if not automatically generated
 rng_tao.lo: ../../vamp/src/tao_random_numbers.$(FCMOD)
 
 # Fortran90 module files are generated at the same time as object files
 .lo.$(FCMOD):
 	@:
 #	touch $@
 
 AM_FCFLAGS = -I../basics -I../utilities -I../testing -I../system -I../combinatorics -I../rng -I../physics -I../fastjet -I../qft -I../matrix_elements -I../types -I../particles -I../beams -I../phase_space -I../expr_base -I../variables -I../../vamp/src -I../mci
 
 ########################################################################
+vegas_sub.lo: vegas.lo
+vamp2_sub.lo: vamp2.lo
+if FC_USE_MPI
+request_base_sub.lo: request_base.lo
+request_simple_sub.lo: request_simple.lo
+request_caller_sub.lo: request_caller.lo
+request_state_sub.lo: request_state.lo
+balancer_base_sub.lo: balancer_base.lo
+balancer_simple_sub.lo: balancer_simple.lo
+balancer_channel_sub.lo: balancer_channel.lo
+request_callback_sub.lo: request_callback.lo
+endif
+
+########################################################################
 ## Default Fortran compiler options
 
 ## Profiling
 if FC_USE_PROFILING
 AM_FCFLAGS += $(FCFLAGS_PROFILING)
 endif
 
 ## OpenMP
 if FC_USE_OPENMP
 AM_FCFLAGS += $(FCFLAGS_OPENMP)
 endif
 
 # MPI
 if FC_USE_MPI
 AM_FCFLAGS += $(FCFLAGS_MPI)
 endif
 
 ########################################################################
 ## Non-standard targets and dependencies
 
 ## (Re)create F90 sources from NOWEB source.
 if NOWEB_AVAILABLE
 
 NOMPI_FILTER = -filter "sed 's/defn NOMPI:/defn/'"
 MPI_FILTER = -filter "sed 's/defn MPI:/defn/'"
 
 PRELUDE = $(top_srcdir)/src/noweb-frame/whizard-prelude.nw
 POSTLUDE = $(top_srcdir)/src/noweb-frame/whizard-postlude.nw
 
 vegas.stamp: $(PRELUDE) $(srcdir)/vegas.nw $(POSTLUDE)
 	@rm -f vegas.tmp
 	@touch vegas.tmp
 	for src in $(COMMON_F90) $(libvegas_ut_la_SOURCES); do \
 		$(NOTANGLE) -R[[$$src]] $^ | $(CPIF) $$src; \
 		done
 	for src in $(SERIAL_F90:.f90_serial=.f90); do \
 		$(NOTANGLE) -R[[$$src]] $(NOMPI_FILTER) $^ | $(CPIF) $$src'_serial'; \
 		done
 	for src in $(MPI_F90:.f90_mpi=.f90); do \
 		$(NOTANGLE) -R[[$$src]] $(MPI_FILTER) $^ | $(CPIF) $$src'_mpi'; \
 		done
-	for src in $(LOAD_BALANCER_MODULES); do \
+	for src in $(LOAD_BALANCER_MODULES) $(LOAD_BALANCER_SUBMODULES); do \
 		$(NOTANGLE) -R[[$$src]] $(MPI_FILTER) $^ | $(CPIF) $$src; \
 		done
 	@mv -f vegas.tmp vegas.stamp
 
-$(COMMON_F90) $(SERIAL_F90) $(MPI_F90) $(LOAD_BALANCER_MODULES) $(libvegas_ut_la_SOURCES): vegas.stamp
+$(COMMON_F90) $(SERIAL_F90) $(MPI_F90) $(LOAD_BALANCER_MODULES) $(LOAD_BALANCER_SUBMODULES) $(libvegas_ut_la_SOURCES): vegas.stamp
 ## Recover from the removal of $@
 	@if test -f $@; then :; else \
 		rm -f vegas.stamp; \
 		$(MAKE) $(AM_MAKEFLAGS) vegas.stamp; \
 	fi
 
 endif
 
 
 ########################################################################
 ## Non-standard cleanup tasks
 ## Remove sources that can be recreated using NOWEB
 if NOWEB_AVAILABLE
 maintainer-clean-noweb:
 	-rm -f *.f90 *.f90_serial *.f90_mpi *.c
 endif
 .PHONY: maintainer-clean-noweb
 
 ## Remove those sources also if builddir and srcdir are different
 if NOWEB_AVAILABLE
 clean-noweb:
 	test "$(srcdir)" != "." && rm -f *.f90 *.f90_serial *.f90_mpi *.c || true
 endif
 .PHONY: clean-noweb
 
 ## Remove F90 module files
 clean-local: clean-noweb
 	-rm -f vegas.stamp vegas.tmp
 	-rm -f *.$(FCMOD)
 if FC_SUBMODULES
 	-rm -f *.smod *.sub
 endif
 
 ## Remove backup files
 maintainer-clean-backup:
 	-rm -f *~
 .PHONY: maintainer-clean-backup
 
 ## Register additional clean targets
 maintainer-clean-local: maintainer-clean-noweb maintainer-clean-backup
Index: trunk/share/debug/Makefile_full
===================================================================
--- trunk/share/debug/Makefile_full	(revision 8798)
+++ trunk/share/debug/Makefile_full	(revision 8799)
@@ -1,668 +1,670 @@
 FC=pgfortran_2019
 FCFLAGS=-Mbackslash
 CC=gcc
 CCFLAGS=
 
 MODELS = \
 	SM.mdl \
 	SM_hadrons.mdl \
 	Test.mdl
 
 CC_SRC = \
 	sprintf_interface.c \
 	signal_interface.c
 
 F77_SRC = \
 	pythia.F \
 	pythia_pdf.f \
 	pythia6_up.f \
 	toppik.f \
 	toppik_axial.f
 
 FC0_SRC = 
 
 FC_SRC = \
 	format_defs.f90 \
 	io_units.f90 \
 	kinds.f90 \
 	constants.f90 \
 	iso_varying_string.f90 \
 	unit_tests.f90 \
 	unit_tests_sub.f90 \
 	numeric_utils.f90 \
 	numeric_utils_sub.f90 \
 	system_dependencies.f90 \
 	string_utils.f90 \
 	string_utils_sub.f90 \
 	system_defs.f90 \
 	system_defs_sub.f90 \	
 	debug_master.f90 \
 	diagnostics.f90 \
 	diagnostics_sub.f90 \
 	sorting.f90 \
 	physics_defs.f90 \
 	physics_defs_sub.f90 \
 	pdg_arrays.f90 \
 	bytes.f90 \
 	hashes.f90 \
 	md5.f90 \
 	model_data.f90 \
 	model_data_sub.f90 \
 	auto_components.f90 \
 	auto_components_sub.f90 \
 	var_base.f90 \
 	model_testbed.f90 \
 	auto_components_uti.f90 \
 	auto_components_ut.f90 \
 	os_interface.f90 \
 	os_interface_sub.f90 \
 	c_particles.f90 \
 	c_particles_sub.f90 \
 	format_utils.f90 \
 	lorentz.f90 \
 	lorentz_sub.f90 \
 	phs_points.f90 \
 	phs_points_sub.f90 \
 	colors.f90 \
 	colors_sub.f90 \
 	flavors.f90 \
 	flavors_sub.f90 \
 	helicities.f90 \
 	helicities_sub.f90 \
 	quantum_numbers.f90 \
 	quantum_numbers_sub.f90 \
 	state_matrices.f90 \
 	state_matrices_sub.f90 \
 	interactions.f90 \
 	interactions_sub.f90 \
 	CppStringsWrap_dummy.f90 \
 	FastjetWrap_dummy.f90 \
 	cpp_strings.f90 \
 	cpp_strings_sub.f90 \
 	fastjet.f90 \
 	fastjet_sub.f90 \
 	jets.f90 \
 	subevents.f90 \
 	su_algebra.f90 \
 	su_algebra_sub.f90 \
 	bloch_vectors.f90 \
 	bloch_vectors_sub.f90 \
 	polarizations.f90 \
 	polarizations_sub.f90 \
 	particles.f90 \
 	particles_sub.f90 \
 	event_base.f90 \
 	event_base_sub.f90 \
 	eio_data.f90 \
 	eio_data_sub.f90 \
 	event_handles.f90 \
 	eio_base.f90 \
 	eio_base_sub.f90 \
 	eio_base_uti.f90 \
 	eio_base_ut.f90 \
 	variables.f90 \
 	variables_sub.f90 \
 	rng_base.f90 \
 	rng_base_sub.f90 \
 	tao_random_numbers.f90 \
 	rng_tao.f90 \
 	rng_tao_sub.f90 \
 	rng_stream.f90 \
 	rng_stream_sub.f90 \
 	rng_base_uti.f90 \
 	rng_base_ut.f90 \
 	dispatch_rng.f90 \
 	dispatch_rng_sub.f90 \
 	dispatch_rng_uti.f90 \
 	dispatch_rng_ut.f90 \
 	beam_structures.f90 \
 	beam_structures_sub.f90 \
 	evaluators.f90 \
 	evaluators_sub.f90 \
 	beams.f90 \
 	beams_sub.f90 \
 	sm_physics.f90 \
 	sm_physics_sub.f90 \
 	file_registries.f90 \
 	file_registries_sub.f90 \
 	sf_aux.f90 \
 	sf_aux_sub.f90 \
 	sf_mappings.f90 \
 	sf_mappings_sub.f90 \
 	sf_base.f90 \
 	sf_base_sub.f90 \
 	electron_pdfs.f90 \
 	electron_pdfs_sub.f90 \
 	sf_isr.f90 \
 	sf_isr_sub.f90 \
 	sf_epa.f90 \
 	sf_epa_sub.f90 \
 	sf_ewa.f90 \
 	sf_ewa_sub.f90 \
 	sf_escan.f90 \
 	sf_escan_sub.f90 \
 	sf_gaussian.f90 \
 	sf_gaussian_sub.f90 \
 	sf_beam_events.f90 \
 	sf_beam_events_sub.f90 \
 	circe1.f90 \
 	sf_circe1.f90 \
 	sf_circe1_sub.f90 \
 	circe2.f90 \
 	selectors.f90 \
 	selectors_sub.f90 \
 	sf_circe2.f90 \
 	sf_circe2_sub.f90 \
 	sm_qcd.f90 \
 	sm_qcd_sub.f90 \
 	sm_qed.f90 \
 	sm_qed_sub.f90 \
 	mrst2004qed.f90 \
 	cteq6pdf.f90 \
 	mstwpdf.f90 \
 	ct10pdf.f90 \
 	CJpdf.f90 \
 	ct14pdf.f90 \
 	pdf_builtin.f90 \
 	pdf_builtin_sub.f90 \
 	LHAPDFWrap_dummy.f90 \
 	lhapdf5_full_dummy.f90 \
 	lhapdf5_has_photon_dummy.f90 \
 	lhapdf.f90 \
 	hoppet_dummy.f90 \
 	hoppet_interface.f90 \
 	sf_pdf_builtin.f90 \
 	sf_pdf_builtin_sub.f90 \
 	sf_lhapdf.f90 \
 	sf_lhapdf_sub.f90 \
 	dispatch_beams.f90 \
 	dispatch_beams_sub.f90 \
 	process_constants.f90 \
 	process_constants_sub.f90 \
 	prclib_interfaces.f90 \
 	prc_core_def.f90 \
 	prc_core_def_sub.f90 \
 	particle_specifiers.f90 \
 	particle_specifiers_sub.f90 \
 	process_libraries.f90 \
 	process_libraries_sub.f90 \
 	prc_test.f90 \
 	prc_test_sub.f90 \
 	prc_core.f90 \
 	prc_core_sub.f90 \
 	prc_test_core.f90 \
 	prc_test_core_sub.f90 \
 	sm_qed.f90 \
 	sm_qed_sub.f90 \
 	prc_omega.f90 \
 	prc_omega_sub.f90 \
 	phs_base.f90 \
 	phs_base_sub.f90 \
 	ifiles.f90 \
 	lexers.f90 \
 	syntax_rules.f90 \
 	parser.f90 \
 	expr_base.f90 \
 	formats.f90 \
 	formats_sub.f90 \
 	analysis.f90 \
 	user_code_interface.f90 \
 	observables.f90 \
 	observables_sub.f90 \
 	eval_trees.f90 \
 	eval_trees_sub.f90 \
 	interpolation.f90 \
 	interpolation_sub.f90 \
 	nr_tools.f90 \
 	ttv_formfactors.f90 \
 	ttv_formfactors_use.f90 \
 	ttv_formfactors_uti.f90 \
 	ttv_formfactors_ut.f90 \
 	models.f90 \
 	prclib_stacks.f90 \
 	prclib_stacks_sub.f90 \
 	user_files.f90 \
 	cputime.f90 \
 	cputime_sub.f90 \
 	mci_base.f90 \
 	integration_results.f90 \
 	integration_results_uti.f90 \
 	integration_results_ut.f90 \
 	mappings.f90 \
 	mappings_sub.f90 \
 	permutations.f90 \
 	permutations_sub.f90 \
 	resonances.f90 \
 	resonances_sub.f90 \
 	phs_trees.f90 \
 	phs_trees_sub.f90 \
 	phs_forests.f90 \
 	phs_forests_sub.f90 \
 	prc_external.f90 \
 	blha_config.f90 \
 	blha_config_sub.f90 \
 	blha_olp_interfaces.f90 \
 	blha_olp_interfaces_sub.f90 \
 	prc_openloops.f90 \
 	prc_openloops_sub.f90 \
 	prc_threshold.f90 \
 	prc_threshold_sub.f90 \
 	process_config.f90 \
 	process_counter.f90 \
 	process_mci.f90 \
 	pcm_base.f90 \
 	nlo_data.f90 \
 	nlo_data_sub.f90 \
 	cascades.f90 \
 	cascades_sub.f90 \
 	cascades2_lexer.f90 \
 	cascades2_lexer_sub.f90 \
 	cascades2_lexer_uti.f90 \
 	cascades2_lexer_ut.f90 \
 	cascades2.f90 \
 	cascades2_sub.f90 \
 	cascades2_uti.f90 \
 	cascades2_ut.f90 \
 	phs_none.f90 \
 	phs_none_sub.f90 \
 	phs_rambo.f90 \
 	phs_rambo_sub.f90 \
 	phs_wood.f90 \
 	phs_wood_sub.f90 \
 	phs_fks.f90 \
 	phs_fks_sub.f90 \
 	phs_single.f90 \
 	phs_single_sub.f90 \
 	fks_regions.f90 \
 	fks_regions_sub.f90 \
 	virtual.f90 \
 	virtual_sub.f90 \
 	pdf.f90 \
 	pdf_sub.f90 \
 	real_subtraction.f90 \
 	real_subtraction_sub.f90 \
 	dglap_remnant.f90 \
 	dispatch_fks.f90 \
 	dispatch_fks_sub.f90 \
 	dispatch_phase_space.f90 \
 	dispatch_phase_space_sub.f90 \
 	pcm.f90 \
 	recola_wrapper_dummy.f90 \
 	prc_recola.f90 \
 	subevt_expr.f90 \
 	parton_states.f90 \
 	prc_template_me.f90 \
 	prc_template_me_sub.f90 \
 	process.f90 \
 	process_stacks.f90 \
 	iterations.f90 \
 	rt_data.f90 \
 	file_utils.f90 \
 	file_utils_sub.f90 \
 	prc_gosam.f90 \
 	prc_gosam_sub.f90 \
 	dispatch_me_methods.f90 \
 	sf_base_uti.f90 \
 	sf_base_ut.f90 \
 	dispatch_uti.f90 \
 	dispatch_ut.f90 \
 	formats_uti.f90 \
 	formats_ut.f90 \
 	md5_uti.f90 \
 	md5_ut.f90 \
 	os_interface_uti.f90 \
 	os_interface_ut.f90 \
 	sorting_uti.f90 \
 	sorting_ut.f90 \
 	grids.f90 \
 	grids_uti.f90 \
 	grids_ut.f90 \
 	solver.f90 \
 	solver_uti.f90 \
 	solver_ut.f90 \
 	cputime_uti.f90 \
 	cputime_ut.f90 \
 	sm_qcd_uti.f90 \
 	sm_qcd_ut.f90 \
 	sm_physics_uti.f90 \
 	sm_physics_ut.f90 \
 	lexers_uti.f90 \
 	lexers_ut.f90 \
 	parser_uti.f90 \
 	parser_ut.f90 \
 	xml.f90 \
 	xml_uti.f90 \
 	xml_ut.f90 \
 	colors_uti.f90 \
 	colors_ut.f90 \
 	state_matrices_uti.f90 \
 	state_matrices_ut.f90 \
 	analysis_uti.f90 \
 	analysis_ut.f90 \
 	particles_uti.f90 \
 	particles_ut.f90 \
 	radiation_generator.f90 \
 	radiation_generator_sub.f90 \
 	radiation_generator_uti.f90 \
 	radiation_generator_ut.f90 \
 	blha_uti.f90 \
 	blha_ut.f90 \
 	evaluators_uti.f90 \
 	evaluators_ut.f90 \
 	models_uti.f90 \
 	models_ut.f90 \
 	eval_trees_uti.f90 \
 	eval_trees_ut.f90 \
 	resonances_uti.f90 \
 	resonances_ut.f90 \
 	phs_trees_uti.f90 \
 	phs_trees_ut.f90 \
 	phs_forests_uti.f90 \
 	phs_forests_ut.f90 \
 	beams_uti.f90 \
 	beams_ut.f90 \
 	su_algebra_uti.f90 \
 	su_algebra_ut.f90 \
 	bloch_vectors_uti.f90 \
 	bloch_vectors_ut.f90 \
 	polarizations_uti.f90 \
 	polarizations_ut.f90 \
 	sf_aux_uti.f90 \
 	sf_aux_ut.f90 \
 	sf_mappings_uti.f90 \
 	sf_mappings_ut.f90 \
 	sf_pdf_builtin_uti.f90 \
 	sf_pdf_builtin_ut.f90 \
 	sf_lhapdf_uti.f90 \
 	sf_lhapdf_ut.f90 \
 	sf_isr_uti.f90 \
 	sf_isr_ut.f90 \
 	sf_epa_uti.f90 \
 	sf_epa_ut.f90 \
 	sf_ewa_uti.f90 \
 	sf_ewa_ut.f90 \
 	sf_circe1_uti.f90 \
 	sf_circe1_ut.f90 \
 	sf_circe2_uti.f90 \
 	sf_circe2_ut.f90 \
 	sf_gaussian_uti.f90 \
 	sf_gaussian_ut.f90 \
 	sf_beam_events_uti.f90 \
 	sf_beam_events_ut.f90 \
 	sf_escan_uti.f90 \
 	sf_escan_ut.f90 \
 	phs_base_uti.f90 \
 	phs_base_ut.f90 \
 	phs_none_uti.f90 \
 	phs_none_ut.f90 \
 	phs_single_uti.f90 \
 	phs_single_ut.f90 \
 	phs_rambo_uti.f90 \
 	phs_rambo_ut.f90 \
 	phs_wood_uti.f90 \
 	phs_wood_ut.f90 \
 	phs_fks_uti.f90 \
 	phs_fks_ut.f90 \
 	fks_regions_uti.f90 \
 	fks_regions_ut.f90 \
 	mci_midpoint.f90 \
 	mci_base_uti.f90 \
 	mci_base_ut.f90 \
 	mci_midpoint_uti.f90 \
 	mci_midpoint_ut.f90 \
 	kinematics.f90 \
 	instances.f90 \
 	mci_none.f90 \
 	mci_none_uti.f90 \
 	mci_none_ut.f90 \
 	processes_uti.f90 \
 	processes_ut.f90 \
 	process_stacks_uti.f90 \
 	process_stacks_ut.f90 \
 	prc_recola_uti.f90 \
 	prc_recola_ut.f90 \
 	rng_tao_uti.f90 \
 	rng_tao_ut.f90 \
 	rng_stream_uti.f90 \
 	rng_stream_ut.f90 \
 	selectors_uti.f90 \
 	selectors_ut.f90 \
 	vegas.f90 \
+	vegas_sub.f90 \
 	vegas_uti.f90 \
 	vegas_ut.f90 \
 	vamp2.f90 \
+	vamp2_sub.f90 \
 	vamp2_uti.f90 \
 	vamp2_ut.f90 \
 	exceptions.f90 \
 	vamp_stat.f90 \
 	utils.f90 \
 	divisions.f90 \
 	linalg.f90 \
 	vamp.f90 \
 	mci_vamp.f90 \
 	mci_vamp_uti.f90 \
 	mci_vamp_ut.f90 \
 	mci_vamp2.f90 \
 	mci_vamp2_uti.f90 \
 	mci_vamp2_ut.f90 \
 	prclib_interfaces_uti.f90 \
 	prclib_interfaces_ut.f90 \
 	particle_specifiers_uti.f90 \
 	particle_specifiers_ut.f90 \
 	process_libraries_uti.f90 \
 	process_libraries_ut.f90 \
 	prclib_stacks_uti.f90 \
 	prclib_stacks_ut.f90 \
 	slha_interface.f90 \
 	slha_interface_sub.f90 \
 	slha_interface_uti.f90 \
 	slha_interface_ut.f90 \
 	cascades_uti.f90 \
 	cascades_ut.f90 \
 	prc_test_uti.f90 \
 	prc_test_ut.f90 \
 	prc_template_me_uti.f90 \
 	prc_template_me_ut.f90 \
 	prc_omega_uti.f90 \
 	prc_omega_ut.f90 \
 	event_transforms.f90 \
 	event_transforms_uti.f90 \
 	event_transforms_ut.f90 \
 	hep_common.f90 \
 	hep_common_sub.f90 \
 	hepev4_aux.f90 \
 	tauola_dummy.f90 \
 	tauola_interface.f90 \
 	tauola_interface_sub.f90 \
 	shower_base.f90 \
 	shower_base_sub.f90 \
 	shower_partons.f90 \
 	shower_partons_sub.f90 \
 	muli.f90 \
 	matching_base.f90 \
 	powheg_matching.f90 \
 	shower_core.f90 \
 	shower_core_sub.f90 \
 	shower_base_uti.f90 \
 	shower_base_ut.f90 \
 	shower.f90 \
 	shower_uti.f90 \
 	shower_ut.f90 \
 	shower_pythia6.f90 \
 	shower_pythia6_sub.f90 \
 	whizard_lha.f90 \
 	whizard_lha_uti.f90 \
 	whizard_lha_ut.f90 \
 	LHAWhizard_dummy.f90 \
 	Pythia8Wrap_dummy.f90 \
 	pythia8.f90 \
 	pythia8_uti.f90 \
 	pythia8_ut.f90 \
 	shower_pythia8.f90 \
 	shower_pythia8_sub.f90 \
 	hadrons.f90 \
 	ktclus.f90 \
 	mlm_matching.f90 \
 	ckkw_matching.f90 \
 	jets_uti.f90 \
 	jets_ut.f90 \
 	pdg_arrays_uti.f90 \
 	pdg_arrays_ut.f90 \
 	interactions_uti.f90 \
 	interactions_ut.f90 \
 	decays.f90 \
 	decays_uti.f90 \
 	decays_ut.f90 \
 	evt_nlo.f90 \
 	events.f90 \
 	events_uti.f90 \
 	events_ut.f90 \
 	HepMCWrap_dummy.f90 \
 	hepmc_interface.f90 \
 	hepmc_interface_sub.f90 \
 	hepmc_interface_uti.f90 \
 	hepmc_interface_ut.f90 \
 	LCIOWrap_dummy.f90 \
 	lcio_interface.f90 \
 	lcio_interface_sub.f90 \
 	lcio_interface_uti.f90 \
 	lcio_interface_ut.f90 \
 	hep_events.f90 \
 	hep_events_sub.f90 \
 	hep_events_uti.f90 \
 	hep_events_ut.f90 \
 	expr_tests_uti.f90 \
 	expr_tests_ut.f90 \
 	parton_states_uti.f90 \
 	parton_states_ut.f90 \
 	eio_data_uti.f90 \
 	eio_data_ut.f90 \
 	eio_raw.f90 \
 	eio_raw_uti.f90 \
 	eio_raw_ut.f90 \
 	eio_checkpoints.f90 \
 	eio_checkpoints_sub.f90 \
 	eio_checkpoints_uti.f90 \
 	eio_checkpoints_ut.f90 \
 	eio_lhef.f90 \
 	eio_lhef_sub.f90 \
 	eio_lhef_uti.f90 \
 	eio_lhef_ut.f90 \
 	eio_hepmc.f90 \
 	eio_hepmc_sub.f90 \
 	eio_hepmc_uti.f90 \
 	eio_hepmc_ut.f90 \
 	eio_lcio.f90 \
 	eio_lcio_sub.f90 \
 	eio_lcio_uti.f90 \
 	eio_lcio_ut.f90 \
 	stdhep_dummy.f90 \
 	xdr_wo_stdhep.f90 \
 	eio_stdhep.f90 \
 	eio_stdhep_sub.f90 \
 	eio_stdhep_uti.f90 \
 	eio_stdhep_ut.f90 \
 	eio_ascii.f90 \
 	eio_ascii_sub.f90 \
 	eio_ascii_uti.f90 \
 	eio_ascii_ut.f90 \
 	eio_weights.f90 \
 	eio_weights_sub.f90 \
 	eio_weights_uti.f90 \
 	eio_weights_ut.f90 \
 	eio_dump.f90 \
 	eio_dump_sub.f90 \
 	eio_dump_uti.f90 \
 	eio_dump_ut.f90 \
 	eio_callback.f90 \
 	eio_callback_sub.f90 \
 	real_subtraction_uti.f90 \
 	real_subtraction_ut.f90 \
 	iterations_uti.f90 \
 	iterations_ut.f90 \
 	rt_data_uti.f90 \
 	rt_data_ut.f90 \
 	dispatch_mci.f90 \
 	dispatch_mci_uti.f90 \
 	dispatch_mci_ut.f90 \
 	dispatch_phs_uti.f90 \
 	dispatch_phs_ut.f90 \
 	resonance_insertion.f90 \
 	resonance_insertion_uti.f90 \
 	resonance_insertion_ut.f90 \
 	recoil_kinematics.f90 \
 	recoil_kinematics_uti.f90 \
 	recoil_kinematics_ut.f90 \
 	isr_epa_handler.f90 \
 	isr_epa_handler_uti.f90 \
 	isr_epa_handler_ut.f90 \
 	dispatch_transforms.f90 \
 	dispatch_transforms_uti.f90 \
 	dispatch_transforms_ut.f90 \
 	beam_structures_uti.f90 \
 	beam_structures_ut.f90 \
 	process_configurations.f90 \
 	process_configurations_uti.f90 \
 	process_configurations_ut.f90 \
 	compilations.f90 \
 	compilations_uti.f90 \
 	compilations_ut.f90 \
 	integrations.f90 \
 	integrations_uti.f90 \
 	integrations_ut.f90 \
 	event_streams.f90 \
 	event_streams_uti.f90 \
 	event_streams_ut.f90 \
 	restricted_subprocesses.f90 \
 	eio_direct.f90 \
 	eio_direct_sub.f90 \
 	eio_direct_uti.f90 \
 	eio_direct_ut.f90 \
 	simulations.f90 \
 	restricted_subprocesses_uti.f90 \
 	restricted_subprocesses_ut.f90 \
 	simulations_uti.f90 \
 	simulations_ut.f90 \
 	commands.f90 \
 	commands_uti.f90 \
 	commands_ut.f90 \
 	cmdline_options.f90 \
 	libmanager.f90 \
 	features.f90 \
 	whizard.f90 \
 	api.f90 \
 	api_hepmc_uti.f90 \
 	api_hepmc_ut.f90 \
 	api_lcio_uti.f90 \
 	api_lcio_ut.f90 \
 	api_uti.f90 \
 	api_ut.f90
 
 FC_OBJ = $(FC0_SRC:.f90=.o) $(F77_SRC:.f=.o) $(FC_SRC:.f90=.o) 
 CC_OBJ = $(CC_SRC:.c=.o)
 
 all: whizard_test 
 check: whizard_test
 	./whizard_test --check resonances
 
 whizard_test: $(FC_OBJ) $(CC_OBJ) main_ut.f90
 	$(FC) $(FC_OBJ) $(CC_OBJ) -ldl -o $@ main_ut.f90
 
 whizard: $(FC_OBJ) $(CC_OBJ) main.f90
 	$(FC) $(FC_OBJ) $(CC_OBJ) -ldl -o $@ main.f90
 
 %.o: %.f90
 	$(FC) $(FCFLAGS) -c $<
 
 %.o: %.f
 	$(FC) $(FCFLAGS) -c $<
 
 %.o: %.c
 	$(CC) $(CCFLAGS) -c $<
 
 tar: $(FC_SRC) $(F77_SRC) $(FC0_SRC) $(CC_SRC) $(MODELS)
 	tar cvvzf whizard-`date +%y%m%d`-`date +%H%M`.tar.gz $(FC_SRC) $(FC0_SRC) \
 	$(F77_SRC) $(CC_SRC) main_ut.f90 Makefile $(MODELS)
 
 clean:
 	rm -f *.mod *.o whizard_test