Index: trunk/src/vegas/vegas.nw
===================================================================
--- trunk/src/vegas/vegas.nw	(revision 8796)
+++ trunk/src/vegas/vegas.nw	(revision 8797)
@@ -1,7750 +1,8428 @@
 %% -*- ess-noweb-default-code-mode: f90-mode; noweb-default-code-mode: f90-mode; noweb-code-mode: f90-mode -*-
 % WHIZARD code as NOWEB source: VEGAS algorithm
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \chapter{VEGAS Integration}
 \label{cha:vegas-integration}
 
 @ The backbone integrator of WHIZARD is a object-oriented implemetation of the VEGAS algorithm.
 
 <<[[vegas.f90]]>>=
 <<File header>>
 
 module vegas
 <<Use kinds>>
-
-<<vegas: modules>>
+<<Vegas: modules>>
 
 <<Standard module head>>
 
-<<vegas: public>>
+<<Vegas: public>>
 
-<<vegas: parameters>>
+<<Vegas: parameters>>
 
-<<vegas: types>>
+<<Vegas: types>>
 
-<<vegas: interfaces>>
+<<Vegas: interfaces>>
 
 contains
 
-<<vegas: procedures>>
+<<Vegas: procedures>>
 
 end module vegas
 @ %def vegas
-<<vegas: modules>>=
+<<Vegas: modules>>=
   use diagnostics
   use io_units
   use format_utils, only: write_indent
   use format_defs, only: FMT_17
   use rng_base, only: rng_t
   use rng_stream, only: rng_stream_t
 @
 @ MPI Module.
-<<MPI: vegas: modules>>=
+<<MPI: Vegas: modules>>=
   use request_callback, only: request_handler_t
   use mpi_f08 !NODEP!
 @
 
 \section{Integration modes}
 \label{sec:integration-modes}
 
 @ VEGAS operates in three different modes: [[vegas_mode_importance_only]], [[vegas_mode_importance]] or [[vegas_mode_stratified]].
 The default mode is [[vegas_mode_importance]], where the algorithm decides whether if it is possible to use importance sampling or stratified sampling.
 In low dimensions VEGAS uses strict stratified sampling.
 
-<<vegas: parameters>>=
+<<Vegas: parameters>>=
   integer, parameter, public :: VEGAS_MODE_IMPORTANCE = 0, &
        & VEGAS_MODE_STRATIFIED = 1, VEGAS_MODE_IMPORTANCE_ONLY = 2
 @ %def vegas_mode_importance vegas_mode_stratified vegas_mode_importance_only
 @
 \section{Type: vegas\_func\_t}
 \label{sec:type:vegas_func_t}
 
 We define a abstract [[func]] type which only gives an interface to an [[evaluate]] procedure.
 The inside of implementation and also the optimization of are not a concern of the [[vegas]] implementation.
-<<vegas: public>>=
+<<Vegas: public>>=
   public :: vegas_func_t
-<<vegas: types>>=
+<<Vegas: types>>=
   type, abstract :: vegas_func_t
      !
    contains
      procedure(vegas_func_evaluate), deferred, pass, public :: evaluate
   end type vegas_func_t
 
 @ %def vegas_func_t
 @ The only procedure called in [[vegas]] is [[vegas_func_evaluate]].
 It takes a real value [[x]] and returns a real value [[f]].
-<<vegas: interfaces>>=
+<<Vegas: interfaces>>=
   abstract interface
      real(default) function vegas_func_evaluate (self, x) result (f)
        import :: default, vegas_func_t
        class(vegas_func_t), intent(inout) :: self
        real(default), dimension(:), intent(in) :: x
      end function vegas_func_evaluate
   end interface
 
 @ %def vegas_func_evaluate
 @
 \section{Type: vegas\_config\_t}
 \label{sec:type:vegas_config_t}
 
 We store the complete configuration in a transparent container.
 The [[vegas_config_t]] object inside VEGAS must not be directly accesible.
 We provide a get method which returns a copy of the [[vegas_config_t]] object.
 
 Apart from the options which can be set by the constructor of [[vegas_t]] object, we store the run-time configuration [[n_calls]], [[calls_per_box]], [[n_bins]] and [[n_boxes]].
 Those are calculated and set accordingly by VEGAS.
-<<vegas: public>>=
+<<Vegas: public>>=
   public :: vegas_config_t
-<<vegas: types>>=
+<<Vegas: types>>=
   type :: vegas_config_t
      integer :: n_dim = 0
      real(default) :: alpha = 1.5
      integer :: n_bins_max = 50
      integer :: iterations = 5
      integer :: mode = VEGAS_MODE_STRATIFIED
      integer :: calls_per_box = 0
      integer :: n_calls = 0
      integer :: n_calls_min = 20
      integer :: n_boxes = 1
      integer :: n_bins = 1
    contains
-   <<vegas: vegas config: TBP>>
+   <<Vegas: vegas config: TBP>>
   end type vegas_config_t
 
 @ %def vegas_config_t, n_calls, calls_per_box, n_bins, n_boxes
 @ Write out the configuration of the grid.
-<<vegas: vegas config: TBP>>=
+<<Vegas: vegas config: TBP>>=
   procedure, public :: write => vegas_config_write
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_config_write (self, unit, indent)
     class(vegas_config_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of dimensions            = ", self%n_dim
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Adaption power (alpha)          = ", self%alpha
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Max. number of bins (per dim.)  = ", self%n_bins_max
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of iterations            = ", self%iterations
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Mode (stratified or importance) = ", self%mode
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Calls per box                   = ", self%calls_per_box
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of calls                 = ", self%n_calls
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Min. number of calls            = ", self%n_calls_min
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of bins                  = ", self%n_bins
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of boxes                 = ", self%n_boxes
   end subroutine vegas_config_write
 
 @ %def vegas_config_write
 @
 \section{Type: vegas\_grid\_t}
 \label{sec:type:-vegas_g}
 
 We provide a simple and transparent grid container. The container can then later
 be used, to export the actual grid.
-<<vegas: public>>=
+<<Vegas: public>>=
   public :: vegas_grid_t
-<<vegas: types>>=
+<<Vegas: types>>=
   type :: vegas_grid_t
      integer :: n_dim = 1
      integer :: n_bins = 1
      real(default), dimension(:), allocatable :: x_lower
      real(default), dimension(:), allocatable :: x_upper
      real(default), dimension(:), allocatable :: delta_x
      real(default), dimension(:,:), allocatable :: xi
    contains
-   <<vegas: vegas grid: TBP>>
+   <<Vegas: vegas grid: TBP>>
   end type vegas_grid_t
 
 @ %def vegas_grid_t
 @ Initialise grid.
-<<vegas: interfaces>>=
+<<Vegas: interfaces>>=
   interface vegas_grid_t
      module procedure vegas_grid_init
   end interface vegas_grid_t
 
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   type(vegas_grid_t) function vegas_grid_init (n_dim, n_bins_max) result (self)
     integer, intent(in) :: n_dim
     integer, intent(in) :: n_bins_max
     self%n_dim = n_dim
     self%n_bins = 1
     allocate (self%x_upper(n_dim), source=1.0_default)
     allocate (self%x_lower(n_dim), source=0.0_default)
     allocate (self%delta_x(n_dim), source=1.0_default)
     allocate (self%xi((n_bins_max + 1), n_dim), source=0.0_default)
   end function vegas_grid_init
 
 @ %def vegas_grid_init
 @ Output.
-<<vegas: vegas grid: TBP>>=
+<<Vegas: vegas grid: TBP>>=
   procedure, public :: write => vegas_grid_write
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_grid_write (self, unit, pacify)
     class(vegas_grid_t), intent(in) :: self
     integer, intent(in), optional :: unit
     logical, intent(in), optional :: pacify
     logical :: pac
     integer :: u, i, j
     pac = .false.; if (present (pacify))  pac = pacify
     u = given_output_unit (unit)
     write (u, descr_fmt) "begin vegas_grid_t"
     write (u, integer_fmt) "n_dim = ", self%n_dim
     write (u, integer_fmt) "n_bins = ", self%n_bins
     write (u, descr_fmt) "begin x_lower"
     do j = 1, self%n_dim
        if (pac) then
           write (u, double_array_pac_fmt)  j, self%x_lower(j)
        else
           write (u, double_array_fmt)  j, self%x_lower(j)
        end if
     end do
     write (u, descr_fmt) "end x_lower"
     write (u, descr_fmt) "begin x_upper"
     do j = 1, self%n_dim
        if (pac) then
           write (u, double_array_pac_fmt) j, self%x_upper(j)
        else
           write (u, double_array_fmt) j, self%x_upper(j)
        end if
     end do
     write (u, descr_fmt) "end x_upper"
     write (u, descr_fmt) "begin delta_x"
     do j = 1, self%n_dim
        if (pac) then
           write (u, double_array_pac_fmt)  j, self%delta_x(j)
        else
           write (u, double_array_fmt)  j, self%delta_x(j)
        end if
     end do
     write (u, descr_fmt) "end delta_x"
     write (u, descr_fmt) "begin xi"
     do j = 1, self%n_dim
        do i = 1, self%n_bins + 1
           if (pac) then
              write (u, double_array2_pac_fmt) i, j, self%xi(i, j)
           else
              write (u, double_array2_fmt) i, j, self%xi(i, j)
           end if
        end do
     end do
     write (u, descr_fmt) "end xi"
     write (u, descr_fmt) "end vegas_grid_t"
   end subroutine vegas_grid_write
 
 @ %def vegas_grid_write
 @ Compare two grids, if they match up to an given precision.
-<<vegas: public>>=
+<<Vegas: public>>=
   public :: operator (==)
-<<vegas: interfaces>>=
+<<Vegas: interfaces>>=
   interface operator (==)
      module procedure vegas_grid_equal
   end interface operator (==)
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   logical function vegas_grid_equal (grid_a, grid_b) result (yorn)
     type(vegas_grid_t), intent(in) :: grid_a, grid_b
     yorn = .true.
     yorn = yorn .and. (grid_a%n_dim == grid_b%n_dim)
     yorn = yorn .and. (grid_a%n_bins == grid_b%n_bins)
     yorn = yorn .and. all (grid_a%x_lower == grid_b%x_lower)
     yorn = yorn .and. all (grid_a%x_upper == grid_b%x_upper)
     yorn = yorn .and. all (grid_a%delta_x == grid_b%delta_x)
     yorn = yorn .and. all (grid_a%xi == grid_b%xi)
   end function vegas_grid_equal
 
 @ %def vegas_grid_equal
 @ Resize each bin accordingly to its corresponding weight [[w]]. Can be used to
 resize the grid to a new size of bins or refinement.
 
 The procedure expects two arguments, firstly, [[n_bins]] and, secondly, the
 refinement weights [[w]]. If [[n_bins]] differs from the internally stored one,
 we resize the grid under consideration of [[w]]. If each element of [[w]] equals
 one, then the bins are resized preserving their original bin density.
 
 Anytime else, we refine the grid accordingly to [[w]].
-<<vegas: vegas grid: TBP>>=
+<<Vegas: vegas grid: TBP>>=
   procedure, private :: resize => vegas_grid_resize
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_grid_resize (self, n_bins, w)
     class(vegas_grid_t), intent(inout) :: self
     integer, intent(in) :: n_bins
     real(default), dimension(:, :), intent(in) :: w
     real(default), dimension(size(self%xi)) :: xi_new
     integer :: i, j, k
     real(default) :: pts_per_bin
     real(default) :: d_width
     do j = 1, self%n_dim
        if (self%n_bins /= n_bins) then
           pts_per_bin = real(self%n_bins, default) / real(n_bins, default)
           self%n_bins = n_bins
        else
           if (all (w(:, j) == 0.)) then
              call msg_bug ("[VEGAS] grid_resize: resize weights are zero.")
           end if
           pts_per_bin = sum(w(:, j)) / self%n_bins
        end if
        d_width = 0.
        k = 0
        do i = 2, self%n_bins
           do while (pts_per_bin > d_width)
              k = k + 1
              d_width = d_width + w(k, j)
           end do
           d_width = d_width - pts_per_bin
           associate (x_upper => self%xi(k + 1, j), x_lower => self%xi(k, j))
             xi_new(i) = x_upper - (x_upper - x_lower) * d_width / w(k, j)
           end associate
        end do
        self%xi(:, j) = 0. ! Reset grid explicitly
        self%xi(2:n_bins, j) = xi_new(2:n_bins)
        self%xi(n_bins + 1, j) = 1.
     end do
   end subroutine vegas_grid_resize
 
 @ %def vegas_grid_resize
 @ Find the probability for a given [[x]] in the unit hypercube.
 
 For the case [[n_bins < N_BINARY_SEARCH]], we utilize linear search which is faster for short arrays.
 Else we make use of a binary search. Furthermore, we calculate the inverse of
 the probability and invert the result only at the end (saving some time on division).
-<<vegas: vegas grid: TBP>>=
+<<Vegas: vegas grid: TBP>>=
   procedure, public :: get_probability => vegas_grid_get_probability
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   function vegas_grid_get_probability (self, x) result (g)
     class(vegas_grid_t), intent(in) :: self
     real(default), dimension(:), intent(in) :: x
     real(default) :: g
     integer :: j, i_lower, i_higher, i_mid
     real(default), dimension(size(x)) :: y
     g = 1
     y = (x - self%x_lower) / self%delta_x
     if (any (y < 0 .or. y > 1)) then
        g = 0; return
     end if
     ndim: do j = 1, self%n_dim
        i_lower = 1
        i_higher = self%n_bins + 1
        !! Left-most search
        search: do while (i_lower < i_higher - 1)
           i_mid = floor ((i_higher + i_lower) / 2.)
           if (y(j) > self%xi(i_mid, j)) then
              i_lower = i_mid
           else
              i_higher = i_mid
           end if
        end do search
        g = g * (self%delta_x(j) * &
             & self%n_bins * (self%xi(i_lower + 1, j) - self%xi(i_lower, j)))
     end do ndim
     ! Move division to the end, which is more efficient.
     if (g /= 0) g = 1 / g
   end function vegas_grid_get_probability
 
 @ %def vegas_grid_get_probability
 @ Broadcast the grid information. As safety measure, we get the actual grid
 object from VEGAS (correclty allocated, but for non-root unfilled) and broadcast
 the root object. On success, we set grid into VEGAS.
 
 We use the non-blocking broadcast routine, because we have to send quite a bunch
 of integers and reals.
 
 We have to be very careful with [[n_bins]], the number of bins can actually
 change during different iterations. If we reuse a grid, we have to take that,
 every grid uses the same [[n_bins]]. We expect, that the number of dimension
 does not change, which is in principle possible, but will be checked onto in
 [[vegas_set_grid]].
-<<MPI: vegas: vegas grid: TBP>>=
+<<MPI: Vegas: vegas grid: TBP>>=
   procedure, public :: broadcast => vegas_grid_broadcast
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   subroutine vegas_grid_broadcast (self, comm)
     class(vegas_grid_t), intent(inout) :: self
     type(MPI_COMM), intent(in) :: comm
     integer :: j, ierror
     type(MPI_Request), dimension(self%n_dim + 4) :: status
     ! Blocking
     call MPI_Bcast (self%n_bins, 1, MPI_INTEGER, 0, comm)
     ! Non blocking
     call MPI_Ibcast (self%n_dim, 1, MPI_INTEGER, 0, comm, status(1))
     call MPI_Ibcast (self%x_lower, self%n_dim, &
          & MPI_DOUBLE_PRECISION, 0, comm, status(2))
     call MPI_Ibcast (self%x_upper, self%n_dim, &
          & MPI_DOUBLE_PRECISION, 0, comm, status(3))
     call MPI_Ibcast (self%delta_x, self%n_dim, &
          & MPI_DOUBLE_PRECISION, 0, comm, status(4))
     ndim: do j = 1, self%n_dim
        call MPI_Ibcast (self%xi(1:self%n_bins + 1, j), self%n_bins + 1,&
             & MPI_DOUBLE_PRECISION, 0, comm, status(4 + j))
     end do ndim
     call MPI_Waitall (self%n_dim + 4, status, MPI_STATUSES_IGNORE)
   end subroutine vegas_grid_broadcast
 
 @ %def vegas_grid_broadcast
 @
 \section{Type: vegas\_result\_t}
 \label{sec:type:-vegas_r}
 
 We store the result of the latest iteration(s) in a transparent container.
 The [[vegas_result_t]] object inside VEGAS must not be directly accessible.
 We export the a copy of the result via a get-method of the [[vegas_t]] object.
 
 We store latest event weight in [[evt_weight]] and a (possible) evebt weight
 excess in [[evt_weight_excess]], if the event weight is larger than
 [[max_abs_f]].
-<<vegas: public>>=
+<<Vegas: public>>=
   public :: vegas_result_t
-<<vegas: types>>=
+<<Vegas: types>>=
   type :: vegas_result_t
      integer :: it_start = 0
      integer :: it_num = 0
      integer :: samples = 0
      real(default) :: sum_int_wgtd = 0.
      real(default) :: sum_wgts
      real(default) :: sum_chi = 0.
      real(default) :: chi2 = 0.
      real(default) :: efficiency = 0.
      real(default) :: efficiency_pos = 0.
      real(default) :: efficiency_neg = 0.
      real(default) :: max_abs_f = 0.
      real(default) :: max_abs_f_pos = 0.
      real(default) :: max_abs_f_neg = 0.
      real(default) :: result = 0.
      real(default) :: std = 0.
      real(default) :: evt_weight = 0.
      real(default) :: evt_weight_excess = 0.
    contains
-   <<vegas: vegas result: TBP>>
+   <<Vegas: vegas result: TBP>>
   end type vegas_result_t
 
 @ %def vegas_results_t
 @ Write out the current status of the integration result.
-<<vegas: vegas result: TBP>>=
+<<Vegas: vegas result: TBP>>=
   procedure, public :: write => vegas_result_write
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_result_write (self, unit, indent)
     class(vegas_result_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Start iteration                 = ", self%it_start
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Iteration number                = ", self%it_num
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Sample number                   = ", self%samples
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 //")") &
          & "Sum of weighted integrals       = ", self%sum_int_wgtd
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")")  &
          & "Sum of weights                  = ", self%sum_wgts
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Sum of chi                      = ", self%sum_chi
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "chi2                            = ", self%chi2
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Overall efficiency              = ", self%efficiency
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "f-positive efficiency           = ", self%efficiency_pos
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "f-negative efficiency           = ", self%efficiency_neg
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Maximum absolute overall value  = ", self%max_abs_f
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Maximum absolute positive value = ", self%max_abs_f_pos
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Maximum absolute negative value = ", self%max_abs_f_neg
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Integral (of latest iteration)  = ", self%result
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Standard deviation              = ", self%std
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Event weight                    = ", self%evt_weight
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Event weight excess             = ", self%evt_weight_excess
   end subroutine vegas_result_write
 
 @  %def vegas_results_write
 @ Update results and efficiency.
 
 We define
 \begin{equation}
 \operatorname*{max}_{x} w(x) = \frac{f(x)}{p(x)} \Delta_{\text{jac}}.
 \end{equation}
 In the implementation we have to factor out [[n_calls]] in the jacobian.
 Also, during event generation.
-<<vegas: vegas result: TBP>>=
+<<Vegas: vegas result: TBP>>=
   procedure, public :: update => vegas_result_update
   procedure, public :: update_efficiency => vegas_result_update_efficiency
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_result_update (result, integral, variance)
     class(vegas_result_t), intent(inout) :: result
     real(default), intent(in) :: integral
     real(default), intent(in) :: variance
     real(default) :: guarded_variance, sq_integral
     real(default) :: wgt, chi
     !! Guard against zero (or negative) variance.
     !! \Delta = I * \epsilon -> I = \Delta.
     if (variance < &
          max ((epsilon (integral) * integral)**2, tiny(integral))) then
        guarded_variance = &
             max ((epsilon (integral) * integral)**2, &
             tiny (integral))
     else
       guarded_variance = variance
     end if
     wgt = 1._default / guarded_variance
     sq_integral = integral**2
     result%result = integral
     result%std = sqrt (guarded_variance)
     result%samples = result%samples + 1
     if (result%samples == 1) then
        result%chi2 = 0._default
     else
        chi = integral
        if (result%sum_wgts > 0) then
           chi = chi - result%sum_int_wgtd / result%sum_wgts
        end if
        result%chi2 = result%chi2 * (result%samples - 2.0_default)
        result%chi2 = (wgt / (1._default + (wgt / result%sum_wgts))) &
             & * chi**2
        result%chi2 = result%chi2 / (result%samples - 1._default)
     end if
     result%sum_wgts = result%sum_wgts + wgt
     result%sum_int_wgtd = result%sum_int_wgtd + (integral * wgt)
     result%sum_chi = result%sum_chi + (sq_integral * wgt)
   end subroutine vegas_result_update
 
   subroutine vegas_result_update_efficiency (result, &
        n_calls, max_pos, max_neg, sum_pos, sum_neg)
     class(vegas_result_t), intent(inout) :: result
     integer, intent(in) :: n_calls
     real(default), intent(in) :: max_pos
     real(default), intent(in) :: max_neg
     real(default), intent(in) :: sum_pos
     real(default), intent(in) :: sum_neg
     result%max_abs_f_pos = n_calls * max_pos
     result%max_abs_f_neg = n_calls * max_neg
     result%max_abs_f = &
          & max (result%max_abs_f_pos, result%max_abs_f_neg)
     result%efficiency_pos = 0
     if (max_pos > 0) then
        result%efficiency_pos = &
             & sum_pos / max_pos
     end if
     result%efficiency_neg = 0
     if (max_neg > 0) then
        result%efficiency_neg = &
             & sum_neg / max_neg
     end if
     result%efficiency = 0
     if (result%max_abs_f > 0.) then
        result%efficiency = (sum_pos + sum_neg) &
             & / result%max_abs_f
     end if
   end subroutine vegas_result_update_efficiency
 
 @ %def vegas_result_update, vegas_result_update_efficiency
 @ Reset results.
-<<vegas: vegas result: TBP>>=
+<<Vegas: vegas result: TBP>>=
   procedure, public :: reset => vegas_result_reset
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_result_reset (result)
     class(vegas_result_t), intent(inout) :: result
     result%sum_int_wgtd = 0
     result%sum_wgts = 0
     result%sum_chi = 0
     result%it_num = 0
     result%samples = 0
     result%chi2 = 0
     result%efficiency = 0
     result%efficiency_pos = 0
     result%efficiency_neg = 0
     result%max_abs_f = 0
     result%max_abs_f_pos = 0
     result%max_abs_f_neg = 0
   end subroutine vegas_result_reset
 
 @ %def vegas_result_reset
 @ Send the result object to specified rank, internally in a non-blocking way.
 
 We do not need to handle the event results, because each event result is atomic.
-<<MPI: vegas: vegas result: TBP>>=
+<<MPI: Vegas: vegas result: TBP>>=
   procedure, public :: send => vegas_result_send
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   subroutine vegas_result_send (self, receiver, tag, comm, request)
     class(vegas_result_t), intent(in) :: self
     integer, intent(in) :: receiver
     integer, intent(in) :: tag
     type(MPI_COMM), intent(in) :: comm
     type(MPI_Request), dimension(:), intent(inout) :: request
     if (size (request) /= self%get_n_requests ()) &
          call msg_bug ("VEGAS: number of requests does not match.")
     call MPI_Isend (self%it_start, 1, MPI_INTEGER, receiver, tag + 1,&
          & comm, request(1))
     call MPI_Isend (self%it_num, 1, MPI_INTEGER, receiver , tag + 2,&
          & comm, request(2))
     call MPI_Isend (self%samples, 1, MPI_INTEGER, receiver, tag + 3,&
          & comm, request(3))
     call MPI_Isend (self%sum_int_wgtd, 1, MPI_DOUBLE_PRECISION, receiver, tag + 4,&
          & comm, request(4))
     call MPI_Isend (self%sum_wgts, 1, MPI_DOUBLE_PRECISION, receiver, tag + 5,&
          & comm, request(5))
     call MPI_Isend (self%sum_chi, 1, MPI_DOUBLE_PRECISION, receiver, tag + 6,&
          & comm, request(6))
     call MPI_Isend (self%chi2, 1, MPI_DOUBLE_PRECISION, receiver, tag + 7,&
          & comm, request(7))
     call MPI_Isend (self%efficiency, 1, MPI_DOUBLE_PRECISION, receiver, tag + 8,&
          & comm, request(8))
     call MPI_Isend (self%efficiency_pos, 1, MPI_DOUBLE_PRECISION, receiver, tag + 9,&
          & comm, request(9))
     call MPI_Isend (self%efficiency_neg, 1, MPI_DOUBLE_PRECISION, receiver, tag + 10,&
          & comm, request(10))
     call MPI_Isend (self%max_abs_f, 1, MPI_DOUBLE_PRECISION, receiver, tag + 11,&
          & comm, request(11))
     call MPI_Isend (self%max_abs_f_pos, 1, MPI_DOUBLE_PRECISION, receiver, tag + 12,&
          & comm, request(12))
     call MPI_Isend (self%max_abs_f_neg, 1, MPI_DOUBLE_PRECISION, receiver, tag + 13,&
          & comm, request(13))
     call MPI_Isend (self%result, 1, MPI_DOUBLE_PRECISION, receiver, tag + 14,&
          & comm, request(14))
     call MPI_Isend (self%std, 1, MPI_DOUBLE_PRECISION, receiver, tag + 15,&
          & comm, request(15))
   end subroutine vegas_result_send
 
 @ %def vegas_result_communicate
 @ Receive the result object from a specified rank, internally in a non-blocking way.
-<<MPI: vegas: vegas result: TBP>>=
+<<MPI: Vegas: vegas result: TBP>>=
   procedure, public :: receive => vegas_result_receive
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   subroutine vegas_result_receive (self, sender, tag, comm, request)
     class(vegas_result_t), intent(inout) :: self
     integer, intent(in) :: sender
     integer, intent(in) :: tag
     type(MPI_COMM), intent(in) :: comm
     type(MPI_REQUEST), dimension(:), intent(inout) :: request
     if (size (request) /= self%get_n_requests ()) &
          call msg_bug ("VEGAS: number of requests does not match.")
     call MPI_Irecv (self%it_start, 1, MPI_INTEGER, sender, tag + 1,&
          & comm, request(1))
     call MPI_Irecv (self%it_num, 1, MPI_INTEGER, sender , tag + 2,&
          & comm, request(2))
     call MPI_Irecv (self%samples, 1, MPI_INTEGER, sender, tag + 3,&
          & comm, request(3))
     call MPI_Irecv (self%sum_int_wgtd, 1, MPI_DOUBLE_PRECISION, sender, tag + 4,&
          & comm, request(4))
     call MPI_Irecv (self%sum_wgts, 1, MPI_DOUBLE_PRECISION, sender, tag + 5,&
          & comm, request(5))
     call MPI_Irecv (self%sum_chi, 1, MPI_DOUBLE_PRECISION, sender, tag + 6,&
          & comm, request(6))
     call MPI_Irecv (self%chi2, 1, MPI_DOUBLE_PRECISION, sender, tag + 7,&
          & comm, request(7))
     call MPI_Irecv (self%efficiency, 1, MPI_DOUBLE_PRECISION, sender, tag + 8,&
          & comm, request(8))
     call MPI_Irecv (self%efficiency_pos, 1, MPI_DOUBLE_PRECISION, sender, tag + 9,&
          & comm, request(9))
     call MPI_Irecv (self%efficiency_neg, 1, MPI_DOUBLE_PRECISION, sender, tag + 10,&
          & comm, request(10))
     call MPI_Irecv (self%max_abs_f, 1, MPI_DOUBLE_PRECISION, sender, tag + 11,&
          & comm, request(11))
     call MPI_Irecv (self%max_abs_f_pos, 1, MPI_DOUBLE_PRECISION, sender, tag + 12,&
          & comm, request(12))
     call MPI_Irecv (self%max_abs_f_neg, 1, MPI_DOUBLE_PRECISION, sender, tag + 13,&
          & comm, request(13))
     call MPI_Irecv (self%result, 1, MPI_DOUBLE_PRECISION, sender, tag + 14,&
          & comm, request(14))
     call MPI_Irecv (self%std, 1, MPI_DOUBLE_PRECISION, sender, tag + 15,&
          & comm, request(15))
   end subroutine vegas_result_receive
 
 @ %def vegas_result_receive
 @
-<<MPI: vegas: vegas result: TBP>>=
+<<MPI: Vegas: vegas result: TBP>>=
   procedure, private :: get_n_requests => vegas_result_get_n_requests
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   pure integer function vegas_result_get_n_requests (result) result (n_requests)
     class(vegas_result_t), intent(in) :: result
     n_requests = 15
   end function vegas_result_get_n_requests
 @ %def vegas_result_get_n_requests
 @
 \section{Type: vegas\_handler\_t}
 \label{sec:vegas_handler_t}
 
 Callback handler for VEGAS result and grid.
 
-<<MPI: vegas: public>>=
+<<MPI: Vegas: public>>=
   public :: vegas_handler_t
-<<MPI: vegas: types>>=
+<<MPI: Vegas: types>>=
   type, extends(request_handler_t) :: vegas_handler_t
      type(vegas_result_t), pointer :: result => null ()
      real(default), dimension(:, :), pointer :: d => null ()
    contains
-   <<vegas: vegas handler: TBP>>
+   <<Vegas: vegas handler: TBP>>
   end type vegas_handler_t
 @ %def vegas_handler_t
 @ Provide the actual communication between master and client.
 
 The communication for [[vegas_result_t]] is done in a separate procedure, where we only have to pass the MPI requests.
 Collecting of the distribution array is done directly.
-<<MPI: vegas: vegas handler: TBP>>=
+<<MPI: Vegas: vegas handler: TBP>>=
   procedure :: init => vegas_handler_init
   procedure :: write => vegas_handler_write
   procedure :: handle => vegas_handler_handle
   procedure :: client_handle => vegas_handler_client_handle
   final :: vegas_handler_final
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   subroutine vegas_handler_init (handler, handler_id, result, d)
     class(vegas_handler_t), intent(inout) :: handler
     integer, intent(in) :: handler_id
     type(vegas_result_t), intent(in), target :: result
     real(default), dimension(:, :), intent(in), target :: d
     integer :: n_requests, tag_offset
     handler%result => result
     handler%d => d
     handler%finished = .false.
     !! Add one request for handling of the distribution d.
     n_requests = result%get_n_requests () + 1
     tag_offset = max(handler_id - 1, 0) * n_requests
     call handler%allocate (n_requests, tag_offset)
   end subroutine vegas_handler_init
 
   subroutine vegas_handler_write (handler, unit)
     class(vegas_handler_t), intent(in) :: handler
     integer, intent(in), optional :: unit
     integer :: u, j
     u = given_output_unit (unit)
     write (u, "(A)") "[VEGAS_HANDLER]"
     call handler%base_write (unit)
     call handler%result%write (u)
     write (u, "(A)") "BEGIN D"
     do j = 1, size (handler%d, dim=2)
        write (u, "(1X,I3,999(1X," // FMT_17 // "))") j, handler%d(:, j)
     end do
     write (u, "(A)") "END D"
   end subroutine vegas_handler_write
 
   subroutine vegas_handler_handle (handler, source_rank, comm)
     class(vegas_handler_t), intent(inout) :: handler
     integer, intent(in) :: source_rank
     type(MPI_COMM), intent(in) :: comm
     !! Take the complete contiguous array memory.
     call MPI_Irecv (handler%d, size (handler%d),&
             & MPI_DOUBLE_PRECISION, source_rank, handler%tag_offset, comm,&
             & handler%request(1))
     call handler%result%receive (source_rank, handler%tag_offset, comm, &
          handler%request(2:))
     handler%activated = .true.
     handler%finished = .false.
   end subroutine vegas_handler_handle
 
   subroutine vegas_handler_client_handle (handler, dest_rank, comm)
     class(vegas_handler_t), intent(inout) :: handler
     integer, intent(in) :: dest_rank
     type(MPI_COMM), intent(in) :: comm
     !! Take the complete contiguous array memory.
     call MPI_Isend (handler%d, size (handler%d),&
          & MPI_DOUBLE_PRECISION, dest_rank, handler%tag_offset, comm,&
          & handler%request(1))
     call handler%result%send (dest_rank, handler%tag_offset, comm, &
          handler%request(2:))
     handler%activated = .true.
     handler%finished = .false.
   end subroutine vegas_handler_client_handle
 
   !> Finalize vegas_handler_t.
   !!
   !! Nullify pointer to object.
   subroutine vegas_handler_final (handler)
     type(vegas_handler_t), intent(inout) :: handler
     nullify (handler%result)
     nullify (handler%d)
   end subroutine vegas_handler_final
 
 @ %def vegas_handler_init, vegas_handler_write, vegas_handler_handle,
 @ %def vegas_handler_client_handle, vegas_handler_final
 @
 \section{Type: vegas\_t}
 \label{sec:type:-vegas_t}
 
 The VEGAS object contains the methods for integration and grid resize- and
 refinement. We store the grid configuration and the (current) result in
 transparent containers alongside with the actual grid and the distribution.
 
 The values of the distribution depend on the chosen mode whether the function
 value or the variance is stored. The distribution is used after each iteration
 to refine the grid.
-<<vegas: public>>=
+<<Vegas: public>>=
   public :: vegas_t
-<<vegas: types>>=
+<<Vegas: types>>=
   type :: vegas_t
      private
      type(vegas_config_t) :: config
      real(default) :: hypercube_volume = 0.
      real(default) :: jacobian = 0.
      real(default), dimension(:, :), allocatable :: d
      type(vegas_grid_t) :: grid
      integer, dimension(:), allocatable :: bin
      integer, dimension(:), allocatable :: box
      type(vegas_result_t) :: result
-   <<vegas: vegas: type>>
+   <<Vegas: vegas: type>>
    contains
-   <<vegas: vegas: TBP>>
+   <<Vegas: vegas: TBP>>
   end type vegas_t
 
 @ %def vegas_t
 @ Add members for MPI communication and parallel mode.
 Must be set before calling to [[vegas_integrate]], likewise [[vegas_set_calls]].
-<<MPI: vegas: vegas: type>>=
+<<MPI: Vegas: vegas: type>>=
   type(MPI_COMM) :: comm
   logical :: parallel_mode = .false.
 @ We overload the type constructor of [[vegas_t]] which initialises the
 mandatory argument [[n_dim]] and allocate memory for the grid.
-<<vegas: interfaces>>=
+<<Vegas: interfaces>>=
   interface vegas_t
      module procedure vegas_init
   end interface vegas_t
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   type(vegas_t) function vegas_init (n_dim, alpha, n_bins_max, iterations, mode) result (self)
     integer, intent(in) :: n_dim
     integer, intent(in), optional :: n_bins_max
     real(default), intent(in), optional :: alpha
     integer, intent(in), optional :: iterations
     integer, intent(in), optional :: mode
     self%config%n_dim = n_dim
     if (present (alpha)) self%config%alpha = alpha
     if (present (n_bins_max)) self%config%n_bins_max = n_bins_max
     if (present (iterations)) self%config%iterations = iterations
     if (present (mode)) self%config%mode = mode
     self%grid = vegas_grid_t (n_dim, self%config%n_bins_max)
     allocate (self%d(self%config%n_bins_max, n_dim), source=0.0_default)
     allocate (self%box(n_dim), source=1)
     allocate (self%bin(n_dim), source=1)
     self%config%n_bins = 1
     self%config%n_boxes = 1
     call self%set_limits (self%grid%x_lower, self%grid%x_upper)
     call self%reset_grid ()
     call self%reset_result ()
-  <<vegas: vegas init>>
+  <<Vegas: vegas init>>
   end function vegas_init
 
 @ %def vegas_init
-<<vegas: vegas init>>=
+<<Vegas: vegas init>>=
 @
 @ Prepare VEGAS to be run in parallel modues (when compiled with MPI).
-<<MPI: vegas: vegas init>>=
+<<MPI: Vegas: vegas init>>=
   call self%prepare_parallel_integrate (MPI_COMM_WORLD, &
        duplicate_comm = .false., &
        parallel_mode = .true.)
 @
 @ Finalize the grid. Deallocate grid memory.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: final => vegas_final
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_final (self)
     class(vegas_t), intent(inout) :: self
     deallocate (self%grid%x_upper)
     deallocate (self%grid%x_lower)
     deallocate (self%grid%delta_x)
     deallocate (self%d)
     deallocate (self%grid%xi)
     deallocate (self%box)
     deallocate (self%bin)
   end subroutine vegas_final
 @ %def vegas_final
 
 \section{Get-/Set-methods}
 \label{sec:set-get-methods}
 
 @ The VEGAS object prohibits direct access from outside. Communication is handle
 via get- or set-methods. Set the limits of integration. The defaults limits
 correspong the $n$-dimensionl unit hypercube.
 
 \textit{Remark:} After setting the limits, the grid is initialised, again.
 Previous results are lost due to recalculation of the overall jacobian.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: set_limits => vegas_set_limits
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_set_limits (self, x_lower, x_upper)
     class(vegas_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x_lower
     real(default), dimension(:), intent(in) :: x_upper
     if (size (x_lower) /= self%config%n_dim &
          & .or. size (x_upper) /= self%config%n_dim) then
        write (msg_buffer, "(A, I5, A, I5, A, I5)") &
             "VEGAS: [set_limits] n_dim of new lower/upper integration limit&
             & does not match previously set n_dim. ", self%config%n_dim, " =/=&
             & ", size (x_lower), " =/= ", size (x_upper)
        call msg_fatal ()
     end if
     if (any(x_upper < x_lower)) then
        call msg_fatal ("VEGAS: [set_limits] upper limits are smaller than lower limits.")
     end if
     if (any((x_upper - x_lower) > huge(0._default))) then
        call msg_fatal ("VEGAS: [set_limits] upper and lower limits exceed rendering.")
     end if
     self%grid%x_upper = x_upper
     self%grid%x_lower = x_lower
     self%grid%delta_x = self%grid%x_upper - self%grid%x_lower
     self%hypercube_volume = product (self%grid%delta_x)
     call self%reset_result ()
   end subroutine vegas_set_limits
 
 @ %def vegas_set_limits
 @ Set the number of calls. If the number of calls changed during different
 passes, we resize the grid preserving the probability density. We should reset
 the results after changing the number of calls which change the size of the grid
 and the running mode of VEGAS. But, this is a set method only for the number of
 calls.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: set_calls => vegas_set_n_calls
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_set_n_calls (self, n_calls)
     class(vegas_t), intent(inout) :: self
     integer, intent(in) :: n_calls
     if (.not. (n_calls > 0)) then
        write (msg_buffer, "(A, I5)") &
             "VEGAS: [set_calls] number of calls must be a positive number. Keep&
             & number of calls = ", self%config%n_calls
        call msg_warning ()
     else
        self%config%n_calls = max (n_calls, self%config%n_calls_min)
        if (self%config%n_calls /= n_calls) then
           write (msg_buffer, "(A,I5)") &
                "VEGAS: [set calls] number of calls is too few, reset to ", self%config%n_calls
           call msg_warning ()
        end if
        call self%init_grid ()
     end if
   end subroutine vegas_set_n_calls
 
 @ %def vegas_set_n_calls
 @ Get the grid object and set [[n_bins]], [[n_dim]] inside grid container.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_grid => vegas_get_grid
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   type(vegas_grid_t) function vegas_get_grid (self) result (grid)
     class(vegas_t), intent(in) :: self
     grid = self%grid
     grid%n_dim = self%config%n_dim
     grid%n_bins = self%config%n_bins
   end function vegas_get_grid
 
 @ %def vegas_get_grid
 @ Set grid. We need a set method for the parallelisation. We do some additional
 checks before copying the object. Be careful, we do not check on [[n_bins]],
 because the number of bins can change after setting [[n_calls]]. We remind you,
 that you will loose all your current progress, if you use set the grid. Hence,
 it will only be used when compiled with [[MPI]].
-<<MPI: vegas: vegas: TBP>>=
+<<MPI: Vegas: vegas: TBP>>=
   procedure, public :: set_grid => vegas_set_grid
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   subroutine vegas_set_grid (self, grid)
     class(vegas_t), intent(inout) :: self
     type(vegas_grid_t), intent(in) :: grid
     integer :: j
     logical :: success
     success = .true.
     success = (success .and. (grid%n_dim .eq. self%config%n_dim))
     success = (success .and. all (grid%x_lower .eq. self%grid%x_lower))
     success = (success .and. all (grid%x_upper .eq. self%grid%x_upper))
     success = (success .and. all (grid%delta_x .eq. self%grid%delta_x))
     if (success) then
        self%config%n_bins = grid%n_bins
        do j = 1, self%config%n_dim
           self%grid%xi(1, j) = 0._default
           self%grid%xi(2:self%config%n_bins, j) = grid%xi(2:grid%n_bins, j)
           self%grid%xi(self%config%n_bins + 1, j) = 1._default
        end do
     else
        call msg_bug ("VEGAS: set grid: boundary conditions do not match.")
     end if
   end subroutine vegas_set_grid
 
 @ %def vegas_set_grid
 @ Prepare a parallel integration.
 
 A parallel integration requires a communicator, which may be duplicated in order to provide a safe communication context for the current VEGAS instance.
 Furthermore, given an optional parameter, the behavior with regards to the parallel evaluation can be changed (e.g. embbed integration).
-<<MPI: vegas: vegas: TBP>>=
+<<MPI: Vegas: vegas: TBP>>=
   procedure, public :: prepare_parallel_integrate => vegas_prepare_parallel_integrate
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   subroutine vegas_prepare_parallel_integrate (self, comm, duplicate_comm, parallel_mode)
     class(vegas_t), intent(inout) :: self
     type(MPI_COMM), intent(in) :: comm
     logical, intent(in), optional :: duplicate_comm
     logical, intent(in), optional :: parallel_mode
     logical :: flag
     flag = .true.; if (present (duplicate_comm)) flag = duplicate_comm
     if (duplicate_comm) then
        call MPI_COMM_DUP (comm, self%comm)
     else
        self%comm = comm
     end if
     self%parallel_mode = .true.; if (present (parallel_mode)) &
          self%parallel_mode = parallel_mode
   end subroutine vegas_prepare_parallel_integrate
 
 @ %def vegas_prepare_parallel_integrate
 @ We check if it is senseful to parallelize the actual grid. In simple, this
 means that [[n_boxes]] has to be larger than 2. With the result that we could
 have an actual superimposed stratified grid. In advance, we can give the size of
 communicator [[n_size]] and check whether we have enough boxes to distribute.
-<<MPI: vegas: vegas: TBP>>=
+<<MPI: Vegas: vegas: TBP>>=
   procedure, public :: is_parallelizable => vegas_is_parallelizable
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   elemental logical function vegas_is_parallelizable (self, opt_n_size) result (flag)
     class(vegas_t), intent(in) :: self
     integer, intent(in), optional :: opt_n_size
     integer :: n_size
     n_size = 2; if (present (opt_n_size)) n_size = opt_n_size
     flag = (self%config%n_boxes**floor (self%config%n_dim / 2.) >= n_size)
   end function vegas_is_parallelizable
 
 @ %def vegas_is_parallelizable
 @ Get the config object.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_config => vegas_get_config
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_get_config (self, config)
     class(vegas_t), intent(in) :: self
     type(vegas_config_t), intent(out) :: config
     config = self%config
   end subroutine vegas_get_config
 
 @ %def vegas_get_config
 @ Set non-runtime dependent configuration. It will no be possible to change
 [[n_bins_max]].
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: set_config => vegas_set_config
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_set_config (self, config)
     class(vegas_t), intent(inout) :: self
     class(vegas_config_t), intent(in) :: config
     self%config%alpha = config%alpha
     self%config%iterations = config%iterations
     self%config%mode = config%mode
     self%config%n_calls_min = config%n_calls_min
    end subroutine vegas_set_config
 
 @ %def vegas_set_config
 @ Get the result object.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_result => vegas_get_result
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   type(vegas_result_t) function vegas_get_result (self) result (result)
     class(vegas_t), intent(in) :: self
     result = self%result
   end function vegas_get_result
 
 @ %def vegas_get_result
 @ Set the result object. Be reminded, that you will loose your current results,
 if you are not careful! Hence, it will only be avaible during usage with
 [[MPI]].
-<<MPI: vegas: vegas: TBP>>=
+<<MPI: Vegas: vegas: TBP>>=
   procedure, public :: set_result => vegas_set_result
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   subroutine vegas_set_result (self, result)
     class(vegas_t), intent(inout) :: self
     type(vegas_result_t), intent(in) :: result
     self%result = result
   end subroutine vegas_set_result
 
 @ %def vegas_set_result
 @ Get (actual) number of calls.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_calls => vegas_get_n_calls
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   elemental real(default) function vegas_get_n_calls (self) result (n_calls)
     class(vegas_t), intent(in) :: self
     n_calls = self%config%n_calls
   end function vegas_get_n_calls
 
 @ %def vegas_get_n_calls
 @ Get the cumulative result of the integration. Recalculate weighted average of the integration.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_integral => vegas_get_integral
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   elemental real(default) function vegas_get_integral (self) result (integral)
     class(vegas_t), intent(in) :: self
     integral = 0.
     if (self%result%sum_wgts > 0.) then
        integral = self%result%sum_int_wgtd / self%result%sum_wgts
     end if
   end function vegas_get_integral
 
 @ %def vegas_get_integral
 @ Get the cumulative variance of the integration. Recalculate the variance.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_variance => vegas_get_variance
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   elemental real(default) function vegas_get_variance (self) result (variance)
     class(vegas_t), intent(in) :: self
     variance = 0.
     if (self%result%sum_wgts > 0.) then
        variance = 1.0 / self%result%sum_wgts
     end if
   end function vegas_get_variance
 
 @ %def vegas_get_variance
 @ Get efficiency.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_efficiency => vegas_get_efficiency
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   elemental real(default) function vegas_get_efficiency (self) result (efficiency)
     class(vegas_t), intent(in) :: self
     efficiency = 0.
     if (self%result%efficiency > 0. ) then
        efficiency = self%result%efficiency
     end if
   end function vegas_get_efficiency
 
 @ %def vegas_get_efficiency
 @ Get [[f_max]].
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_max_abs_f => vegas_get_max_abs_f
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   elemental real(default) function vegas_get_max_abs_f (self) result (max_abs_f)
     class(vegas_t), intent(in) :: self
     max_abs_f = 0.
     if (self%result%max_abs_f > 0.) then
        max_abs_f = self%result%max_abs_f
     end if
   end function vegas_get_max_abs_f
 
 @ %def vegas_get_max_abs_f
 @ Get sum of absolute (positive and negative) values.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_sum_abs_f_pos => vegas_get_sum_abs_f_pos
   procedure, public :: get_sum_abs_f_neg => vegas_get_sum_abs_f_neg
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   elemental real(default) function vegas_get_sum_abs_f_pos (self) result (sum_abs_f)
     class(vegas_t), intent(in) :: self
     sum_abs_f = self%result%efficiency_pos * self%result%max_abs_f_pos
   end function vegas_get_sum_abs_f_pos
 
   elemental real(default) function vegas_get_sum_abs_f_neg (self) result (sum_abs_f)
     class(vegas_t), intent(in) :: self
     sum_abs_f = self%result%efficiency_neg * self%result%max_abs_f_neg
   end function vegas_get_sum_abs_f_neg
 
 @ %def vegas_get_sum_abs_f_pos, vegas_get_sum_abs_f_neg
 @ Get [[f_max_pos]].
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
 procedure, public :: get_max_abs_f_pos => vegas_get_max_abs_f_pos
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
 elemental real(default) function vegas_get_max_abs_f_pos (self) result (max_abs_f)
   class(vegas_t), intent(in) :: self
   max_abs_f = 0.
   if (self%result%max_abs_f_pos > 0.) then
      max_abs_f = self%result%max_abs_f_pos
   end if
 end function vegas_get_max_abs_f_pos
 
 @ %def vegas_get_max_abs_f_pos
 @ Get [[f_max_neg]].
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_max_abs_f_neg => vegas_get_max_abs_f_neg
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   elemental real(default) function vegas_get_max_abs_f_neg (self) result (max_abs_f)
     class(vegas_t), intent(in) :: self
     max_abs_f = 0.
     if (self%result%max_abs_f_neg > 0.) then
        max_abs_f = self%result%max_abs_f_neg
     end if
   end function vegas_get_max_abs_f_neg
 
 @ %def vegas_get_max_abs_f_neg
 @ Get event weight and excess.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_evt_weight => vegas_get_evt_weight
   procedure, public :: get_evt_weight_excess => vegas_get_evt_weight_excess
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   real(default) function vegas_get_evt_weight (self) result (evt_weight)
     class(vegas_t), intent(in) :: self
     evt_weight = self%result%evt_weight
   end function vegas_get_evt_weight
 
   real(default) function vegas_get_evt_weight_excess (self) result (evt_weight_excess)
     class(vegas_t), intent(in) :: self
     evt_weight_excess = self%result%evt_weight_excess
   end function vegas_get_evt_weight_excess
 
 @ %def vegas_get_evt_weight, vegas_get_evt_weight_excess
 @ Get and set distribution.
 
 We allow the distribution to be manipulated by an external call.
 The integration result cannot be changed by this, however, the error behavior may worsen and the efficiency may fall pretty low.
 But, also, the opposite is possible, see [[vamp2_equivalences]].
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: get_distribution => vegas_get_distribution
   procedure, public :: set_distribution => vegas_set_distribution
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   function vegas_get_distribution (self) result (d)
     class(vegas_t), intent(in) :: self
     real(default), dimension(:, :), allocatable :: d
     d = self%d
   end function vegas_get_distribution
 
   subroutine vegas_set_distribution (self, d)
     class(vegas_t), intent(inout) :: self
     real(default), dimension(:, :), intent(in) :: d
     if (size (d, dim = 2) /= self%config%n_dim) then
        call msg_bug ("[VEGAS] set_distribution: new distribution has wrong size of dimension")
     end if
     if (size (d, dim = 1) /= self%config%n_bins_max) then
        call msg_bug ("[VEGAS] set_distribution: new distribution has wrong number of bins")
     end if
     self%d = d
   end subroutine vegas_set_distribution
 
 @ %def vegas_get_distribution, vegas_set_distribution
 @ Allocate result handler from VEGAS integrator object.
-<<MPI: vegas: vegas: TBP>>=
+<<MPI: Vegas: vegas: TBP>>=
   procedure, public :: allocate_handler => vegas_allocate_handler
-<<MPI: vegas: procedures>>=
+<<MPI: Vegas: procedures>>=
   subroutine vegas_allocate_handler (self, handler_id, handler)
     class(vegas_t), intent(in), target :: self
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer, intent(out) :: handler
     allocate (vegas_handler_t :: handler)
     select type (handler)
     type is (vegas_handler_t)
        call handler%init (handler_id, result = self%result, d = self%d)
     end select
   end subroutine vegas_allocate_handler
 
 @ %def vegas_allocate_handler
 @ \section{Grid resize- and refinement}
 \label{sec:grid-resize-refin}
 
 Before integration the grid itself must be initialised. Given the number of
 [[n_calls]] and [[n_dim]] we prepare the grid for the integration.
 
 The grid is binned according to the VEGAS mode and [[n_calls]]. If the mode is
 not set to [[vegas_importance_only]], the grid is divided in to equal boxes. We
 try for 2 calls per box
 \begin{equation}
   boxes = \sqrt[n_{dim}]{\frac{calls}{2}}.
 \end{equation}
 If the numbers of boxes exceeds the number of bins, which is the case for low
 dimensions, the algorithm switches to stratified sampling. Otherwise, we are
 still using importance sampling, but keep the boxes for book keeping. If the
 number of bins changes from the previous invocation, bins are expanded or
 contracted accordingly, while preserving bin density.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, private :: init_grid => vegas_init_grid
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_init_grid (self)
     class(vegas_t), intent(inout) :: self
     integer :: n_bins, n_boxes, box_per_bin, n_total_boxes
     real(default), dimension(:, :), allocatable :: w
     n_bins = self%config%n_bins_max
     n_boxes = 1
     if (self%config%mode /= VEGAS_MODE_IMPORTANCE_ONLY) then
        ! We try for 2 calls per box
        n_boxes = max (floor ((self%config%n_calls / 2.)**(1. / self%config%n_dim)), 1)
        self%config%mode = VEGAS_MODE_IMPORTANCE
        if (2 * n_boxes >= self%config%n_bins_max) then
           ! if n_bins/box < 2
           box_per_bin = max (n_boxes / self%config%n_bins_max, 1)
           n_bins =  min (n_boxes / box_per_bin, self%config%n_bins_max)
           n_boxes = box_per_bin * n_bins
           self%config%mode = VEGAS_MODE_STRATIFIED
        end if
     end if
     n_total_boxes = n_boxes**self%config%n_dim
     self%config%calls_per_box = max (floor (real (self%config%n_calls) / n_total_boxes), 2)
     self%config%n_calls = self%config%calls_per_box * n_total_boxes
     !  Total volume of x-space/(average n_calls per bin)
     self%jacobian = self%hypercube_volume * real(n_bins, default)&
          &**self%config%n_dim / real(self%config%n_calls, default)
     self%config%n_boxes = n_boxes
     if (n_bins /= self%config%n_bins) then
        allocate (w(self%config%n_bins, self%config%n_dim), source=1.0_default)
        call self%grid%resize (n_bins, w)
        self%config%n_bins = n_bins
     end if
   end subroutine vegas_init_grid
 
 @ %def vegas_init_grid
 @ Reset the cumulative result, and efficiency and max. grid values.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: reset_result => vegas_reset_result
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_reset_result (self)
     class(vegas_t), intent(inout) :: self
     call self%result%reset ()
   end subroutine vegas_reset_result
 
 @ %def vegas_reset_results
 @ Reset the grid. Purge the adapted grid and the distribution.
 Furthermore, reset the results. The maximal size of the grid remains.
 
 Note: Handle [[vegas_reset_grid]] with great care! Instead of reusing an old
 object, create a new one.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: reset_grid => vegas_reset_grid
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_reset_grid (self)
     class(vegas_t), intent(inout) :: self
     self%config%n_bins = 1
     self%d = 0._default
     self%grid%xi = 0._default
     self%grid%xi(1, :) = 0._default
     self%grid%xi(2, :) = 1._default
     call self%reset_result ()
   end subroutine vegas_reset_grid
 
 @ %def vegas_reset_grid
 @ Refine the grid to match the distribution [[d]].
 
 Average the distribution over neighbouring bins, then contract or expand the bins.
 The averaging dampens high fluctuations amog the integrand or the variance.
 
 We make the type-bound procedure public accessible because the multi-channel
 integration refines each grid after integration over all grids.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: refine => vegas_refine_grid
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_refine_grid (self, average)
     class(vegas_t), intent(inout) :: self
     logical, intent(in), optional :: average
     logical :: opt_average
     opt_average = .true.; if (present (average)) opt_average = average
     if (opt_average) call self%average_distribution ()
     call self%grid%resize (self%config%n_bins, self%d(:self%config%n_bins, :))
   end subroutine vegas_refine_grid
 
 @ %def vegas_refine_grid
 @ We average the collected values [[d]] of the (sq.) weighted [[f]] over
 neighbouring bins. The averaged [[d]] are then agian damped by a logarithm to
 enhance numerical stability. The results are then the refinement weights [[w]].
 
 We have to take care of the special case where we have a very low sampling rate.
 In those cases we can not be sure that the distribution is satisfying filled,
 although we have already averaged over neighbouring bins. This will lead to a
 squashing of the unfilled bins and such the boundaries of those will be pushed
 together. We circumvent this problem by setting those unfilled bins to the
 smallest representable value of a default real.
 
 The problem becomes very annoying in the multi-channel formualae where have to
 look up via binary search the corresponding probability of [[x]] and if the
 width is zero, the point will be neglected.
 
 Another issue arises when [[NaN]]s appear during sampling.
 A single [[NaN]] may spoil the complete adaption as the refinement depends on all weights for a single axis, c.f. [[vegas_grid_resize]] for the details of the mechanism.
 Therefore, if we find a single (or more) [[NaN]]s in the distribution of an axis, then we won't adapt that specific axis by setting the weights to one.
 Hence, we skip the adaption for parts of the grid, where [[NaN]] appeared during evaluation.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, private :: average_distribution => vegas_average_distribution
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_average_distribution (self)
     !! use, intrinsic :: ieee_arithmetic, only: ieee_is_nan
     class(vegas_t), intent(inout) :: self
     integer :: j
     ndim: do j = 1, self%config%n_dim
        associate (d => self%d(:self%config%n_bins, j), &
             n_bins => self%config%n_bins)
          !! Non-portable and compiler-flag sensistive test, may replace with ieee_is_nan
          if (any (d /= d)) then
             d = 1.0_default
             cycle ndim
          end if
          if (n_bins > 2) then
             d(1) = (d(1) + d(2)) / 2.0_default
             d(2:n_bins - 1) = (d(1:n_bins - 2) + d(2:n_bins - 1) + d(3:n_bins)) /&
                  & 3.0_default
             d(n_bins) = d(n_bins - 1) + d(n_bins) / 2.0_default
          end if
          if (all (d < tiny (d))) then
             d = 1.0_default; cycle ndim
          end if
          d = d / sum (d)
          where (d < tiny (1.0_default))
             d = tiny (1.0_default)
          end where
          where (d /= 1.0_default)
             d = ((d - 1.0_default) / log(d))**self%config%alpha
          elsewhere
             ! Analytic limes for d -> 1
             d = 1.0_default
          end where
        end associate
     end do ndim
   end subroutine vegas_average_distribution
 
 @ %def vegas_average_distribution
 @
 \section{Integration}
 \label{sec:integration}
 
 Integrate [[func]], in the previous set bounds [[x_lower]] to [[x_upper]], with [[n_calls]].
 Use results from previous invocations of [[integrate]] with [[opt_reset_result =
 .false.]] and better with subsequent calls.
 
 Before we walk through the hybercube, we initialise the grid (at a central position).
 
 We step through the (equidistant) boxes which ensure we do not miss any place in the n-dim. hypercube.
 In each box we sample [[calls_per_box]] random points and transform them to bin coordinates.
 The total integral and the total (sample) variance over each box $i$ is then calculated by
 \begin{align*}
   E(I)_{i} = \sum_{j}^{\text{calls per box}} I_{i, j}, \\
   V(I)_{i} = \text{calls per box} \frac{\sum_{j}^{\text{calls per box}}} I_{i, j}^{2} - (\sum_{j}^{\text{calls per box}} I_{i, j})**2 \frac{\text{calls per box}}{\text{calls per box} - 1}.
 \end{align*}
 The stratification of the $n$-dimensional hybercube allows a simple
 parallelisation of the algorithm (R. Kreckel, "Parallelization of adaptive MC integrators", Computer Physics Communications, vol. 106, no. 3, pp. 258–266, Nov. 1997.).
 We have to ensure that all boxes are sampled, but the number of boxes to
 distribute is too large. We allow each thread to sample a fraction $r$ of all
 boxes $k$ such that $r \ll k$. Furthermore, we constrain that the number of
 process $p$ is much smaller than $r$.
 
 The overall constraint is
 \begin{equation}
 p \ll r \ll k.
 \end{equation}
 We divide the intgeration into a parallel and
 a perpendicular subspace. The number of parallel dimensions is $D_{\parallel} =
 \lfloor \frac{D}{2} \rfloor$.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: integrate => vegas_integrate
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_integrate (self, func, rng, iterations, reset_result,&
        & refine_grid, verbose, result, abserr)
     class(vegas_t), intent(inout) :: self
     class(vegas_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     integer, intent(in), optional :: iterations
     logical, intent(in), optional :: reset_result
     logical, intent(in), optional :: refine_grid
     logical, intent(in), optional :: verbose
     real(default), optional, intent(out) :: result, abserr
     integer :: it, k
     real(default), dimension(self%config%n_dim) :: x
     real(default) :: fval, fval_sq, bin_volume
     real(default) :: fval_box, fval_sq_box
     real(default) :: total_integral, total_sq_integral
     real(default) :: cumulative_int, cumulative_std
     real(default) :: sum_abs_f_pos, max_abs_f_pos
     real(default) :: sum_abs_f_neg, max_abs_f_neg
     logical :: opt_reset_result
     logical :: opt_refine_grid
     logical :: opt_verbose
     integer :: n_size
     integer :: n_dim_par
     logical :: box_success
-  <<vegas: vegas integrate: variables>>
+  <<Vegas: vegas integrate: variables>>
     call set_options ()
     call self%init_grid ()
     if (opt_reset_result) call self%result%reset ()
     self%result%it_start = self%result%it_num
     cumulative_int = 0.
     cumulative_std = 0.
     n_dim_par = floor (self%config%n_dim / 2.)
     n_size = 1
-  <<vegas: vegas integrate: init>>
+  <<Vegas: vegas integrate: init>>
     if (opt_verbose) then
        call msg_message ("Results: [it, calls, integral, error, chi^2, eff.]")
     end if
     iteration: do it = 1, self%config%iterations
        self%result%it_num = self%result%it_start + it
        self%d = 0.
        self%box = 1
        self%bin = 1
        total_integral = 0.
        total_sq_integral = 0.
        sum_abs_f_pos = 0.
        max_abs_f_pos = 0.
        sum_abs_f_neg = 0.
        max_abs_f_neg = 0.
        box_success = .true.
        select type (rng)
        type is (rng_stream_t)
           call rng%next_substream ()
        end select
-     <<vegas: vegas integrate: presampling>>
+     <<Vegas: vegas integrate: presampling>>
        loop_over_par_boxes: do while (box_success)
           loop_over_perp_boxes: do while (box_success)
              fval_box = 0._default
              fval_sq_box = 0._default
              do k = 1, self%config%calls_per_box
                 call self%random_point (rng, x, bin_volume)
                 ! Call the function, yeah, call it...
                 fval = self%jacobian * bin_volume * func%evaluate (x)
                 fval_sq = fval**2
                 fval_box = fval_box + fval
                 fval_sq_box = fval_sq_box + fval_sq
                 if (fval > 0.) then
                    max_abs_f_pos = max(abs (fval), max_abs_f_pos)
                    sum_abs_f_pos = sum_abs_f_pos + abs(fval)
                 else
                    max_abs_f_neg = max(abs (fval), max_abs_f_neg)
                    sum_abs_f_neg = sum_abs_f_neg + abs(fval)
                 end if
                 if (self%config%mode /= VEGAS_MODE_STRATIFIED) then
                    call self%accumulate_distribution (fval_sq)
                 end if
              end do
              fval_sq_box = sqrt (fval_sq_box * self%config%calls_per_box)
              ! (a - b) * (a + b) = a**2 - b**2
              fval_sq_box = (fval_sq_box - fval_box) * (fval_sq_box + fval_box)
              if (fval_sq_box <= 0.0) fval_sq_box = fval_box**2 * epsilon (1.0_default)
              total_integral = total_integral + fval_box
              total_sq_integral = total_sq_integral + fval_sq_box
              if (self%config%mode == VEGAS_MODE_STRATIFIED) then
                 call self%accumulate_distribution (fval_sq_box)
              end if
              call increment_box_coord (self%box(n_dim_par + 1:self%config&
                   &%n_dim), box_success)
           end do loop_over_perp_boxes
           shift: do k = 1, n_size
              call increment_box_coord (self%box(1:n_dim_par), box_success)
              if (.not. box_success) exit shift
           end do shift
-        <<vegas: vegas integrate: loop>>
+        <<Vegas: vegas integrate: loop>>
        end do loop_over_par_boxes
-     <<vegas: vegas integrate: postsampling>>
+     <<Vegas: vegas integrate: postsampling>>
        associate (result => self%result)
          ! Compute final results for this iterations
          call result%update (total_integral, variance = &
                total_sq_integral / (self%config%calls_per_box - 1._default))
          call result%update_efficiency (n_calls = self%config%n_calls, &
               max_pos = max_abs_f_pos, max_neg = max_abs_f_neg, &
               sum_pos = sum_abs_f_pos, sum_neg = sum_abs_f_neg)
          cumulative_int = result%sum_int_wgtd / result%sum_wgts
          cumulative_std = sqrt (1 / result%sum_wgts)
        end associate
        if (opt_verbose) then
           write (msg_buffer, "(I0,1x,I0,1x, 4(E24.16E4,1x))") &
                & it, self%config%n_calls, cumulative_int, cumulative_std, &
                & self%result%chi2, self%result%efficiency
           call msg_message ()
        end if
        if (opt_refine_grid) then
           call self%refine (average = .true.)
        else
           !! Skip grid refinement, but average the (grid) distribution.
           !! \note Now, we always average and dampen the distribution,
           !!       even when not adapting (e.g. final pass).
           call self%average_distribution ()
        end if
     end do iteration
     if (present(result)) result = cumulative_int
     if (present(abserr)) abserr = abs(cumulative_std)
   contains
-  <<vegas: vegas integrate: procedures>>
+  <<Vegas: vegas integrate: procedures>>
   end subroutine vegas_integrate
 
 @ %def vegas_integrate
 @ Set optional arguments of [[vegas_integrate]].
-<<vegas: vegas integrate: procedures>>=
+<<Vegas: vegas integrate: procedures>>=
   subroutine set_options ()
     if (present (iterations)) self%config%iterations = iterations
     opt_reset_result = .true.
     if (present (reset_result)) opt_reset_result = reset_result
     opt_refine_grid = .true.
     if (present (refine_grid)) opt_refine_grid = refine_grid
     opt_verbose = .false.
     if (present (verbose)) opt_verbose = verbose
   end subroutine set_options
 
 @ We define additional chunk, which will be used later on for inserting
 MPI/general MPI code. The code is then removed by additional noweb filter if
 not compiled with the correspondig compiler flag.
 
 Overall variables, some additionally introduced due to the MPI parallelization
 and needed in sequentiell verison.
-<<vegas: vegas integrate: variables>>=
+<<Vegas: vegas integrate: variables>>=
 @
 @ Overall initialization.
-<<vegas: vegas integrate: init>>=
+<<Vegas: vegas integrate: init>>=
 @
 @ Reset all last-iteration results before sampling.
-<<vegas: vegas integrate: presampling>>=
+<<Vegas: vegas integrate: presampling>>=
 @
 @ Adjust rng between parallel and perpendicular loops.
-<<vegas: vegas integrate: loop>>=
+<<Vegas: vegas integrate: loop>>=
 @
-<<vegas: vegas integrate: postsampling>>=
+<<Vegas: vegas integrate: postsampling>>=
 @ Increment the box coordinates by 1. If we reach the largest value for the
 current axis (starting with the largest dimension number), we reset the counter
 to 1 and increment the next axis counter by 1. And so on, until we reach the
 maximum counter value of the axis with the lowest dimension, then we set
 [[success]] to false and the box coord is set to 1.
-<<vegas: vegas integrate: procedures>>=
+<<Vegas: vegas integrate: procedures>>=
   subroutine increment_box_coord (box, success)
     integer, dimension(:), intent(inout) :: box
     logical, intent(out) :: success
     integer :: j
     success = .true.
     do j = size (box), 1, -1
        box(j) = box(j) + 1
        if (box(j) <= self%config%n_boxes) return
        box(j) = 1
     end do
     success = .false.
   end subroutine increment_box_coord
 
 @ %def increment_box_coord
 @ We parallelize [[VEGAS]] in simple forward manner. The hyper-cube is dissambled
 in to equidistant boxes in which we sample the integrand [[calls_per_box]]
 times. The workload of calculating those boxes is distributed along the worker.
 
 The number of dimensions which will be parallelised are $\lfloor \frac{D}{2}
 \rfloor$, such MPI Variables for [[vegas_integrate]]. We have to duplicate all
 buffers for [[MPI_Ireduce]], because we cannot use the same send or receive
 buffer.
 
 We temporarily store a (empty) grid, before communicating.
-<<MPI: vegas: vegas integrate: variables>>=
+<<MPI: Vegas: vegas integrate: variables>>=
   integer :: rank
   logical :: parallel_mode
   type(vegas_grid_t) :: grid
 @ MPI procedure-specific initialization.
 
 Allow for (external) veto on parallel mode.
-<<MPI: vegas: vegas integrate: init>>=
+<<MPI: Vegas: vegas integrate: init>>=
   parallel_mode = self%parallel_mode .and. self%is_parallelizable ()
   if (parallel_mode) then
      call MPI_Comm_size (self%comm, n_size)
      call MPI_Comm_rank (self%comm, rank)
   else
      n_size = 1
      rank = 0
   end if
 @ Pre-sampling communication.
 
 We make a copy of the (actual) grid, which is unfilled when non-root.
 The actual grid is then broadcasted among the workers and inserted into the
 [[VEGAS]] object.
-<<MPI: vegas: vegas integrate: presampling>>=
+<<MPI: Vegas: vegas integrate: presampling>>=
   if (parallel_mode) then
      grid = self%get_grid ()
      call grid%broadcast (self%comm)
      call self%set_grid (grid)
 @ Start index of the boxes for different ranks. If the random number generator
 is RngStream, we can advance the current stream in such a way, that we will
 getting matching numbers. Iff [[n_boxes]] is larger than 2, otherwise
 parallelization is useless.
-<<MPI: vegas: vegas integrate: presampling>>=
+<<MPI: Vegas: vegas integrate: presampling>>=
      do k = 1, rank
         call increment_box_coord (self%box(1:n_dim_par), box_success)
         if (.not. box_success) exit
      end do
      select type (rng)
      type is (rng_stream_t)
         call rng%advance_state (self%config%n_dim * self%config%calls_per_box&
              & * self%config%n_boxes**(self%config%n_dim - n_dim_par) * rank)
      end select
   end if
 @ Increment [[n_size]] times the box coordinates.
-<<MPI: vegas: vegas integrate: loop>>=
+<<MPI: Vegas: vegas integrate: loop>>=
   if (parallel_mode) then
      select type (rng)
      type is (rng_stream_t)
         call rng%advance_state (self%config%n_dim * self%config%calls_per_box&
              & * self%config%n_boxes**(self%config%n_dim - n_dim_par) * (n_size - 1))
      end select
   end if
 @ Call to [[vegas_integrate_collect]].
-<<MPI: vegas: vegas integrate: postsampling>>=
+<<MPI: Vegas: vegas integrate: postsampling>>=
   if (parallel_mode) then
      call vegas_integrate_collect ()
      if (rank /= 0) cycle iteration
   end if
 @ Reduce (in an non-blocking fashion) all sampled information via [[MPI_SUM]] or [[MPI_MAX]].
-<<MPI: vegas: vegas integrate: procedures>>=
+<<MPI: Vegas: vegas integrate: procedures>>=
   subroutine vegas_integrate_collect ()
     integer :: j
     real(default) :: root_total_integral, root_total_sq_integral
     real(default) :: root_sum_abs_f_pos, root_max_abs_f_pos
     real(default) :: root_sum_abs_f_neg, root_max_abs_f_neg
     real(default), dimension(self%config%n_bins_max, self%config%n_dim) :: root_d
     type(MPI_Request), dimension(self%config%n_dim + 6) :: status
     root_d = 0._default
     root_sum_abs_f_pos = 0._default
     root_sum_abs_f_neg = 0._default
     root_max_abs_f_pos = 0._default
     root_sum_abs_f_neg = 0._default
     root_total_integral = 0._default
     root_total_sq_integral = 0._default
     call MPI_Ireduce (sum_abs_f_pos, root_sum_abs_f_pos, 1, MPI_DOUBLE_PRECISION,&
          & MPI_SUM, 0, self%comm, status(1))
     call MPI_Ireduce (sum_abs_f_neg, root_sum_abs_f_neg, 1, MPI_DOUBLE_PRECISION,&
          & MPI_SUM, 0, self%comm, status(2))
     call MPI_Ireduce (max_abs_f_pos, root_max_abs_f_pos, 1, MPI_DOUBLE_PRECISION,&
          & MPI_MAX, 0, self%comm, status(3))
     call MPI_Ireduce (max_abs_f_neg, root_max_abs_f_neg, 1, MPI_DOUBLE_PRECISION,&
          & MPI_MAX, 0, self%comm, status(4))
     call MPI_Ireduce (total_integral, root_total_integral, 1, MPI_DOUBLE_PRECISION,&
          & MPI_SUM, 0, self%comm, status(5))
     call MPI_Ireduce (total_sq_integral, root_total_sq_integral, 1,&
          & MPI_DOUBLE_PRECISION, MPI_SUM, 0, self%comm, status(6))
     do j = 1, self%config%n_dim
        call MPI_Ireduce (self%d(1:self%config%n_bins, j), root_d(1:self%config&
             &%n_bins, j), self%config%n_bins, MPI_DOUBLE_PRECISION, MPI_SUM, 0,&
             & self%comm, status(6 + j))
     end do
     call MPI_Waitall (self%config%n_dim + 6, status, MPI_STATUSES_IGNORE)
     if (rank == 0) sum_abs_f_pos = root_sum_abs_f_pos
     if (rank == 0) sum_abs_f_neg = root_sum_abs_f_neg
     if (rank == 0) max_abs_f_pos = root_max_abs_f_pos
     if (rank == 0) max_abs_f_neg = root_max_abs_f_neg
     if (rank == 0) total_integral = root_total_integral
     if (rank == 0) total_sq_integral = root_total_sq_integral
     if (rank == 0) self%d = root_d
   end subroutine vegas_integrate_collect
 
 @ %def vegas_integrate_collect
 @ Obtain a random point inside the $n$-dimensional hypercube, transform onto the correct interval and calculate the bin volume. The additional factor [[n_bins]] is already applied to the [[jacobian]] (per dimension).
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, private :: random_point => vegas_random_point
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_random_point (self, rng, x, bin_volume)
     class(vegas_t), intent(inout) :: self
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out) :: x
     real(default), intent(out) :: bin_volume
     integer :: j
     real(default) :: r, y, z, bin_width
     bin_volume = 1.
     ndim: do j = 1, self%config%n_dim
        call rng%generate (r)
        z = ((self%box(j) - 1 + r) / self%config%n_boxes) * self%config%n_bins + 1
        self%bin(j) = max (min (int (z), self%config%n_bins), 1)
        if (self%bin(j) == 1) then
           bin_width = self%grid%xi(2, j)
           y = (z - self%bin(j)) * bin_width
        else
           bin_width = self%grid%xi(self%bin(j) + 1, j) - self%grid%xi(self%bin(j), j)
           y = self%grid%xi(self%bin(j), j) + (z - self%bin(j)) * bin_width
        end if
        x(j) = self%grid%x_lower(j) + y * self%grid%delta_x(j)
        bin_volume = bin_volume * bin_width
     end do ndim
   end subroutine vegas_random_point
 
 @ %def vegas_random_point
 @ Obtain a random point inside the $n$-dimensional hyper-cube. We neglect
 stratification and generate the random point in the most simple way. Hence, we
 do not need to know in which box we are actually sampling. This is useful for
 only for event generation.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, private :: simple_random_point => vegas_simple_random_point
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_simple_random_point (self, rng, x, bin_volume)
     class(vegas_t), intent(inout) :: self
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out) :: x
     real(default), intent(out) :: bin_volume
     integer :: j, k
     real(default) :: r, y, z, bin_width
     bin_volume = 1.
     ndim: do j = 1, self%config%n_dim
        call rng%generate (r)
        z = r * self%config%n_bins + 1
        k = max (min (int (z), self%config%n_bins), 1)
        if (k == 1) then
           bin_width = self%grid%xi(2, j)
           y = (z - 1) * bin_width
        else
           bin_width = self%grid%xi(k + 1, j) - self%grid%xi(k, j)
           y = self%grid%xi(k, j) + (z - k) * bin_width
        end if
        x(j) = self%grid%x_lower(j) + y * self%grid%delta_x(j)
        bin_volume = bin_volume * bin_width
     end do ndim
   end subroutine vegas_simple_random_point
 
 @ %def vegas_simple_random_point
 @
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, private :: accumulate_distribution => vegas_accumulate_distribution
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_accumulate_distribution (self, y)
     class(vegas_t), intent(inout) :: self
     real(default), intent(in) :: y
     integer :: j
     do j = 1, self%config%n_dim
        self%d(self%bin(j), j) = self%d(self%bin(j), j) + y
     end do
   end subroutine vegas_accumulate_distribution
 
 @ %def vegas_accumulate_distribution
 @ Generate weighted random event.
 
 The weight given by the overall jacobian
 \begin{equation}
  \Delta_{\text{jac}} = \prod_{j=1}^{d} \left( x_j^+ - x_j^- \right) \frac{N_{\text{bins}}^d}{N_{\text{calls}}}
 \end{equation}
 includes the overall non-changing factors $\frac{1}{N_{\text{calls}}}$-factor (divisions are
 expensive) and $N_{\text{bins}}^{d}$, the latter combined with [[bin_volume]]
 gives rise to the probability, see [[vegas_init_grid]] for details.
 We have to factor out $N_{\text{calls}}$ to retrieve the correct weight.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure :: generate_weighted => vegas_generate_weighted_event
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_generate_weighted_event (self, func, rng, x)
     class(vegas_t), intent(inout) :: self
     class(vegas_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(inout) :: x
     real(default) :: bin_volume
     call self%simple_random_point (rng, x, bin_volume)
     ! Cancel n_calls from jacobian with n_calls
     self%result%evt_weight = self%config%n_calls * self%jacobian * bin_volume &
          & * func%evaluate (x)
   end subroutine vegas_generate_weighted_event
 
 @ %def vegas_generate_weighted_event
 @ Generate random event. We accept on the rate
 \begin{equation}
   \frac{|f(x)|}{\underset{x}{\max} |f(x)|}.
 \end{equation}
 We keep separate maximum weights for positive and negative weights, and use
 them, accordingly.
 In the case of unweighted event generation, if the current weight exceeds the
 the maximum weight, we update the maximum, accordingly.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: generate_unweighted=> vegas_generate_unweighted_event
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_generate_unweighted_event (self, func, rng, x)
     class(vegas_t), intent(inout) :: self
     class(vegas_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out) :: x
     real(default) :: bin_volume
     real(default) :: max_abs_f
     real(default) :: r
     associate (result => self%result)
       generate: do
          call self%generate_weighted (func, rng, x)
          max_abs_f = merge (result%max_abs_f_pos, result%max_abs_f_neg, &
               & result%evt_weight > 0.)
          if (result%evt_weight > max_abs_f) then
             result%evt_weight_excess = &
                  & result%evt_weight / max_abs_f - 1._default
             exit generate
          end if
          call rng%generate (r)
          ! Do not use division, because max_abs_f could be zero.
          if (max_abs_f * r <= abs(result%evt_weight)) then
           exit generate
          end if
       end do generate
     end associate
   end subroutine vegas_generate_unweighted_event
 
 @ %def vegas_random_event
 \section{I/0 operation}
 \label{sec:i0-operation}
 @ Write grid to file. We use the original VAMP formater.
-<<vegas: parameters>>=
+<<Vegas: parameters>>=
   character(len=*), parameter, private :: &
      descr_fmt =         "(1X,A)", &
      integer_fmt =       "(1X,A18,1X,I15)", &
      integer_array_fmt = "(1X,I18,1X,I15)", &
      logical_fmt =       "(1X,A18,1X,L1)", &
      double_fmt =        "(1X,A18,1X,E24.16E4)", &
      double_array_fmt =  "(1X,I18,1X,E24.16E4)", &
      double_array_pac_fmt = "(1X,I18,1X,E16.8E4)", &
      double_array2_fmt =  "(1X,2(1X,I8),1X,E24.16E4)", &
      double_array2_pac_fmt = "(1X,2(1X,I8),1X,E16.8E4)"
 
 @ %def descr_fmt integer_fmt integer_array_fmt logical_fmt
 @ %def double_fmt double_array_fmt double_array2_fmt
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: write_grid => vegas_write_grid
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_write_grid (self, unit)
     class(vegas_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer :: u
     integer :: i, j
     u = given_output_unit (unit)
     write (u, descr_fmt) "begin type(vegas_t)"
     write (u, integer_fmt) "n_dim =", self%config%n_dim
     write (u, integer_fmt) "n_bins_max =", self%config%n_bins_max
     write (u, double_fmt) "alpha =", self%config%alpha
     write (u, integer_fmt) "iterations =", self%config%iterations
     write (u, integer_fmt) "mode =", self%config%mode
     write (u, integer_fmt) "calls_per_box =", self%config%calls_per_box
     write (u, integer_fmt) "n_calls =", self%config%n_calls
     write (u, integer_fmt) "n_calls_min =", self%config%n_calls_min
     write (u, integer_fmt) "n_boxes =", self%config%n_boxes
     write (u, integer_fmt) "n_bins =", self%config%n_bins
     write (u, integer_fmt) "it_start =", self%result%it_start
     write (u, integer_fmt) "it_num =", self%result%it_num
     write (u, integer_fmt) "samples =", self%result%samples
     write (u, double_fmt) "sum_int_wgtd =", self%result%sum_int_wgtd
     write (u, double_fmt) "sum_wgts =", self%result%sum_wgts
     write (u, double_fmt) "sum_chi =", self%result%sum_chi
     write (u, double_fmt) "chi2 =", self%result%chi2
     write (u, double_fmt) "efficiency =", self%result%efficiency
     write (u, double_fmt) "efficiency =", self%result%efficiency_pos
     write (u, double_fmt) "efficiency =", self%result%efficiency_neg
     write (u, double_fmt) "max_abs_f =", self%result%max_abs_f
     write (u, double_fmt) "max_abs_f_pos =", self%result%max_abs_f_pos
     write (u, double_fmt) "max_abs_f_neg =", self%result%max_abs_f_neg
     write (u, double_fmt) "result =", self%result%result
     write (u, double_fmt) "std =", self%result%std
     write (u, double_fmt) "hypercube_volume =", self%hypercube_volume
     write (u, double_fmt) "jacobian =", self%jacobian
     write (u, descr_fmt) "begin x_lower"
     do j = 1, self%config%n_dim
        write (u, double_array_fmt) j, self%grid%x_lower(j)
     end do
     write (u, descr_fmt) "end x_lower"
     write (u, descr_fmt) "begin x_upper"
     do j = 1, self%config%n_dim
        write (u, double_array_fmt) j, self%grid%x_upper(j)
     end do
     write (u, descr_fmt) "end x_upper"
     write (u, descr_fmt) "begin delta_x"
     do j = 1, self%config%n_dim
        write (u, double_array_fmt) j, self%grid%delta_x(j)
     end do
     write (u, descr_fmt) "end delta_x"
     write (u, integer_fmt) "n_bins =", self%config%n_bins
     write (u, descr_fmt) "begin bin"
     do j = 1, self%config%n_dim
        write (u, integer_array_fmt) j, self%bin(j)
     end do
     write (u, descr_fmt) "end n_bin"
     write (u, integer_fmt) "n_boxes =", self%config%n_boxes
     write (u, descr_fmt) "begin box"
     do j = 1, self%config%n_dim
        write (u, integer_array_fmt) j, self%box(j)
     end do
     write (u, descr_fmt) "end box"
     write (u, descr_fmt) "begin d"
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max
           write (u, double_array2_fmt) i, j, self%d(i, j)
        end do
     end do
     write (u, descr_fmt) "end d"
     write (u, descr_fmt) "begin xi"
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max + 1
           write (u,  double_array2_fmt) i, j, self%grid%xi(i, j)
        end do
     end do
     write (u, descr_fmt) "end xi"
     write (u, descr_fmt) "end type(vegas_t)"
   end subroutine vegas_write_grid
 
 @ %def vegas_write_grid
 @ Read grid configuration from file.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure, public :: read_grid => vegas_read_grid
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_read_grid (self, unit)
     class(vegas_t), intent(out) :: self
     integer, intent(in) :: unit
     integer :: i, j
     character(len=80) :: buffer
     integer :: ibuffer, jbuffer
     read (unit, descr_fmt) buffer
     read (unit, integer_fmt) buffer, ibuffer
     read (unit, integer_fmt) buffer, jbuffer
     select type(self)
     type is (vegas_t)
        self = vegas_t (n_dim = ibuffer, n_bins_max = jbuffer)
     end select
     read (unit, double_fmt) buffer, self%config%alpha
     read (unit, integer_fmt) buffer, self%config%iterations
     read (unit, integer_fmt) buffer, self%config%mode
     read (unit, integer_fmt) buffer, self%config%calls_per_box
     read (unit, integer_fmt) buffer, self%config%n_calls
     read (unit, integer_fmt) buffer, self%config%n_calls_min
     read (unit, integer_fmt) buffer, self%config%n_boxes
     read (unit, integer_fmt) buffer, self%config%n_bins
     self%grid%n_bins = self%config%n_bins
     read (unit, integer_fmt) buffer, self%result%it_start
     read (unit, integer_fmt) buffer, self%result%it_num
     read (unit, integer_fmt) buffer, self%result%samples
     read (unit, double_fmt) buffer, self%result%sum_int_wgtd
     read (unit, double_fmt) buffer, self%result%sum_wgts
     read (unit, double_fmt) buffer, self%result%sum_chi
     read (unit, double_fmt) buffer, self%result%chi2
     read (unit, double_fmt) buffer, self%result%efficiency
     read (unit, double_fmt) buffer, self%result%efficiency_pos
     read (unit, double_fmt) buffer, self%result%efficiency_neg
     read (unit, double_fmt) buffer, self%result%max_abs_f
     read (unit, double_fmt) buffer, self%result%max_abs_f_pos
     read (unit, double_fmt) buffer, self%result%max_abs_f_neg
     read (unit, double_fmt) buffer, self%result%result
     read (unit, double_fmt) buffer, self%result%std
     read (unit, double_fmt) buffer, self%hypercube_volume
     read (unit, double_fmt) buffer, self%jacobian
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, double_array_fmt) jbuffer, self%grid%x_lower(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, double_array_fmt) jbuffer, self%grid%x_upper(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, double_array_fmt) jbuffer, self%grid%delta_x(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, integer_fmt) buffer, self%config%n_bins
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, integer_array_fmt) jbuffer, self%bin(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, integer_fmt) buffer, self%config%n_boxes
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        read (unit, integer_array_fmt) jbuffer, self%box(j)
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max
           read (unit, double_array2_fmt) ibuffer, jbuffer, self%d(i, j)
        end do
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max + 1
           read (unit,  double_array2_fmt) ibuffer, jbuffer, self%grid%xi(i, j)
        end do
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
   end subroutine vegas_read_grid
 
 @ %def vegas_read_grid
 Read and write a grid from an unformatted file.
-<<vegas: vegas: TBP>>=
+<<Vegas: vegas: TBP>>=
   procedure :: write_binary_grid => vegas_write_binary_grid
   procedure :: read_binary_grid => vegas_read_binary_grid
-<<vegas: procedures>>=
+<<Vegas: procedures>>=
   subroutine vegas_write_binary_grid (self, unit)
     class(vegas_t), intent(in) :: self
     integer, intent(in) :: unit
     integer :: i, j
     write (unit) self%config%n_dim
     write (unit) self%config%n_bins_max
     write (unit) self%config%alpha
     write (unit) self%config%iterations
     write (unit) self%config%mode
     write (unit) self%config%calls_per_box
     write (unit) self%config%n_calls
     write (unit) self%config%n_calls_min
     write (unit) self%config%n_boxes
     write (unit) self%config%n_bins
     write (unit) self%result%it_start
     write (unit) self%result%it_num
     write (unit) self%result%samples
     write (unit) self%result%sum_int_wgtd
     write (unit) self%result%sum_wgts
     write (unit) self%result%sum_chi
     write (unit) self%result%chi2
     write (unit) self%result%efficiency
     write (unit) self%result%efficiency_pos
     write (unit) self%result%efficiency_neg
     write (unit) self%result%max_abs_f
     write (unit) self%result%max_abs_f_pos
     write (unit) self%result%max_abs_f_neg
     write (unit) self%result%result
     write (unit) self%result%std
     write (unit) self%hypercube_volume
     write (unit) self%jacobian
     do j = 1, self%config%n_dim
        write (unit) j, self%grid%x_lower(j)
     end do
     do j = 1, self%config%n_dim
        write (unit) j, self%grid%x_upper(j)
     end do
     do j = 1, self%config%n_dim
        write (unit) j, self%grid%delta_x(j)
     end do
     write (unit) self%config%n_bins
     do j = 1, self%config%n_dim
        write (unit) j, self%bin(j)
     end do
     write (unit) self%config%n_boxes
     do j = 1, self%config%n_dim
        write (unit) j, self%box(j)
     end do
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max
           write (unit) i, j, self%d(i, j)
        end do
     end do
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max + 1
           write (unit) i, j, self%grid%xi(i, j)
        end do
     end do
   end subroutine vegas_write_binary_grid
 
   subroutine vegas_read_binary_grid (self, unit)
     class(vegas_t), intent(out) :: self
     integer, intent(in) :: unit
     integer :: i, j
     integer :: ibuffer, jbuffer
     read (unit) ibuffer
     read (unit) jbuffer
     select type(self)
     type is (vegas_t)
        self = vegas_t (n_dim = ibuffer, n_bins_max = jbuffer)
     end select
     read (unit) self%config%alpha
     read (unit) self%config%iterations
     read (unit) self%config%mode
     read (unit) self%config%calls_per_box
     read (unit) self%config%n_calls
     read (unit) self%config%n_calls_min
     read (unit) self%config%n_boxes
     read (unit) self%config%n_bins
     self%grid%n_bins = self%config%n_bins
     read (unit) self%result%it_start
     read (unit) self%result%it_num
     read (unit) self%result%samples
     read (unit) self%result%sum_int_wgtd
     read (unit) self%result%sum_wgts
     read (unit) self%result%sum_chi
     read (unit) self%result%chi2
     read (unit) self%result%efficiency
     read (unit) self%result%efficiency_pos
     read (unit) self%result%efficiency_neg
     read (unit) self%result%max_abs_f
     read (unit) self%result%max_abs_f_pos
     read (unit) self%result%max_abs_f_neg
     read (unit) self%result%result
     read (unit) self%result%std
     read (unit) self%hypercube_volume
     read (unit) self%jacobian
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%grid%x_lower(j)
     end do
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%grid%x_upper(j)
     end do
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%grid%delta_x(j)
     end do
     read (unit) self%config%n_bins
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%bin(j)
     end do
     read (unit) self%config%n_boxes
     do j = 1, self%config%n_dim
        read (unit) jbuffer, self%box(j)
     end do
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max
           read (unit) ibuffer, jbuffer, self%d(i, j)
        end do
     end do
     do j = 1, self%config%n_dim
        do i = 1, self%config%n_bins_max + 1
           read (unit) ibuffer, jbuffer, self%grid%xi(i, j)
        end do
     end do
   end subroutine vegas_read_binary_grid
 
 @ %def vegas_write_binary_grid, vegas_read_binary_grid
 
 \section{Unit tests}
 \label{sec:unit-tests}
 
 Test module, followed by the corresponding implementation module.
 
 <<[[vegas_ut.f90]]>>=
 <<File header>>
 
 module vegas_ut
   use unit_tests
   use vegas_uti
 
 <<Standard module head>>
 
-<<vegas: public test>>
+<<Vegas: public test>>
 
 contains
-<<vegas: test driver>>
+<<Vegas: test driver>>
 end module vegas_ut
 @ %def vegas_ut
 @
 <<[[vegas_uti.f90]]>>=
 <<File header>>
 
 module vegas_uti
 <<Use kinds>>
   use io_units
   use constants, only: pi
   use format_defs, only: FMT_10, FMT_12
   use rng_base
   use rng_stream
   use vegas
 
 <<Standard module head>>
 
-<<vegas: test declaration>>
+<<Vegas: test declaration>>
 
-<<vegas: test types>>
+<<Vegas: test types>>
 
 contains
-<<vegas: tests>>
+<<Vegas: tests>>
 end module vegas_uti
 @ %def vegas_uti
 @ API: driver for the unit tests below.
-<<vegas: public test>>=
+<<Vegas: public test>>=
   public :: vegas_test
-<<vegas: test driver>>=
+<<Vegas: test driver>>=
   subroutine vegas_test (u, results)
     integer, intent(in) :: u
     type(test_results_t), intent(inout) :: results
-    <<vegas: execute tests>>
+    <<Vegas: execute tests>>
   end subroutine vegas_test
 
 @ %def vegas_test
 @
 \subsubsection{Test function}
 \label{sec:test-function}
 
 We use the example from the Monte Carlo Examples of the GSL library
 \begin{equation}
   I = \int_{-pi}^{+pi} {dk_x/(2 pi)}  \int_{-pi}^{+pi} {dk_y/(2 pi)}  \int_{-pi}^{+pi} {dk_z/(2 pi)}  1 / (1 - cos(k_x)cos(k_y)cos(k_z)).
 \end{equation}
 The integral is reduced to region (0,0,0) $\rightarrow$ ($\pi$, $\pi$, $\pi$) and multiplied by 8.
-<<vegas: test types>>=
+<<Vegas: test types>>=
   type, extends (vegas_func_t) :: vegas_test_func_t
      !
    contains
-   <<vegas: vegas test func: TBP>>
+   <<Vegas: vegas test func: TBP>>
   end type vegas_test_func_t
 
 @ %def vegas_test_func_t
 @ Evaluate the integrand.
-<<vegas: vegas test func: TBP>>=
+<<Vegas: vegas test func: TBP>>=
   procedure, public :: evaluate => vegas_test_func_evaluate
-<<vegas: tests>>=
+<<Vegas: tests>>=
   real(default) function vegas_test_func_evaluate (self, x) result (f)
     class(vegas_test_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     f = 1.0 / (pi**3)
     f = f / ( 1.0 - cos (x(1)) * cos (x(2)) * cos (x(3)))
   end function vegas_test_func_evaluate
 
 @ %def vegas_test_func_evaluate
 @ The second test function is the normalised n-dim.\@ gaussian distribution.
-<<vegas: test types>>=
+<<Vegas: test types>>=
   type, extends (vegas_func_t) :: vegas_gaussian_test_func_t
      !
    contains
-   <<vegas: vegas gaussian test func: TBP>>
+   <<Vegas: vegas gaussian test func: TBP>>
   end type vegas_gaussian_test_func_t
 
 @ %def vegas_gaussian_test_func_t
 @ Evaluate the integrand.
-<<vegas: vegas gaussian test func: TBP>>=
+<<Vegas: vegas gaussian test func: TBP>>=
   procedure, public :: evaluate => vegas_gaussian_evaluate
-<<vegas: tests>>=
+<<Vegas: tests>>=
   real(default) function vegas_gaussian_evaluate (self, x) result (f)
     class(vegas_gaussian_test_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     real(default), parameter :: inv_sqrt_pi = 1._default / sqrt(pi)
     f = inv_sqrt_pi**size (x)
     f = f * exp (- dot_product(x, x))
   end function vegas_gaussian_evaluate
 
 @ %def vegas_gaussian_evaluate
 @ The third test function is a three-dimensional polynomial function which
 factories. The function is defined in such a way that the integral in the unit
 range is normalised to zero.
 \begin{equation}
   f(x) = - \frac{8}{3} (x + 1)*(y-1)*z
 \end{equation}
-<<vegas: test types>>=
+<<Vegas: test types>>=
   type, extends (vegas_func_t) :: vegas_polynomial_func_t
      !
    contains
-   <<vegas: vegas polynomial func: TBP>>
+   <<Vegas: vegas polynomial func: TBP>>
    end type vegas_polynomial_func_t
 
 @ %def vegas_polynomial_func_t
-<<vegas: vegas polynomial func: TBP>>=
+<<Vegas: vegas polynomial func: TBP>>=
   procedure, public :: evaluate => vegas_polynomial_evaluate
-<<vegas: tests>>=
+<<Vegas: tests>>=
   real(default) function vegas_polynomial_evaluate (self, x) result (f)
     class(vegas_polynomial_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     f = - 8. / 3. * (x(1) + 1.) * (x(2) - 1.) * x(3)
   end function vegas_polynomial_evaluate
 
 @ %def vegas_polynomial_evaluate
 @
 \subsubsection{MC Integrator check}
 \label{sec:mc-integrator-check}
 
 Initialise the VEGAS MC integrator and call to [[vegas_init_grid]] for the
 initialisation of the grid.
-<<vegas: execute tests>>=
+<<Vegas: execute tests>>=
   call test (vegas_1, "vegas_1", "VEGAS initialisation and&
        & grid preparation", u, results)
-<<vegas: test declaration>>=
+<<Vegas: test declaration>>=
   public :: vegas_1
-<<vegas: tests>>=
+<<Vegas: tests>>=
   subroutine vegas_1 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower = 0., &
          x_upper = pi
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_1"
     write (u, "(A)") "*   Purpose: initialise the VEGAS MC integrator and the grid"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_test_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vegas_1
 @ %def vegas_1
 @
 \subsubsection{Configuration and result check}
 \label{sec:conf-result-check}
 
 Initialise the MC integrator. Get and write the config object, also the (empty) result object.
-<<vegas: execute tests>>=
+<<Vegas: execute tests>>=
   call test (vegas_2, "vegas_2", "VEGAS configuration and result object", u, results)
-<<vegas: test declaration>>=
+<<Vegas: test declaration>>=
   public :: vegas_2
-<<vegas: tests>>=
+<<Vegas: tests>>=
   subroutine vegas_2 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_result_t) :: mc_integrator_result
 
     write (u, "(A)") "* Test output: vegas_2"
     write (u, "(A)") "*   Purpose: use transparent containers for&
          & configuration and result."
     write (u, "(A)")
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 10"
     write (u, "(A)")
 
     mc_integrator = vegas_t (10)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000 (Importance Sampling)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS config object and write out"
     write (u, "(A)")
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS empty result object and write out"
     write (u, "(A)")
 
     mc_integrator_result = mc_integrator%get_result ()
     call mc_integrator_result%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_2
 @ %def vegas_2
 @
 \subsubsection{Grid check}
 \label{sec:conf-result-check}
 
 Initialise the MC integrator. Get and write the config object. Integrate the gaussian distribution.
 Get and write the result object. Before and after integration get the grid
 object and output both.
 Repeat with different number of dimensions.
-<<vegas: execute tests>>=
+<<Vegas: execute tests>>=
   call test (vegas_3, "vegas_3", "VEGAS integration of multi-dimensional gaussian", u, results)
-<<vegas: test declaration>>=
+<<Vegas: test declaration>>=
   public :: vegas_3
-<<vegas: tests>>=
+<<Vegas: tests>>=
   subroutine vegas_3 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower_3 = -10._default, &
          x_upper_3 = 10._default
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_grid_t) :: mc_integrator_grid
     type(vegas_result_t) :: mc_integrator_result
 
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_3"
     write (u, "(A)") "*   Purpose: Integrate gaussian distribution."
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_gaussian_test_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower_3, x_upper_3)
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS config object and write out"
     write (u, "(A)")
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS grid object and write out"
     write (u, "(A)")
 
     mc_integrator_grid = mc_integrator%get_grid ()
     call mc_integrator_grid%write (u, pacify = .true.)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 20000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS result object and write out"
     write (u, "(A)")
 
     mc_integrator_result = mc_integrator%get_result ()
     call mc_integrator_result%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Get VEGAS grid object and write out"
     write (u, "(A)")
 
     mc_integrator_grid = mc_integrator%get_grid ()
     call mc_integrator_grid%write (u, pacify = .true.)
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_3
 @ %def vegas_3
 \subsubsection{Three-dimensional integration with polynomial function}
 \label{sec:conf-result-check}
 
 Initialise the MC integrator. Get and write the config object. Integrate the
 factorisable polynomial function. Get and write the result object. Repeat with
 different number of dimensions.
-<<vegas: execute tests>>=
+<<Vegas: execute tests>>=
   call test (vegas_4, "vegas_4", "VEGAS integration of three&
        &-dimensional factorisable polynomial function", u, results)
-<<vegas: test declaration>>=
+<<Vegas: test declaration>>=
   public :: vegas_4
-<<vegas: tests>>=
+<<Vegas: tests>>=
   subroutine vegas_4 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower_3 = 0._default, &
          x_upper_3 = 1._default
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_result_t) :: mc_integrator_result
 
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_4"
     write (u, "(A)") "*   Purpose: Integrate gaussian distribution."
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_polynomial_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 2000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower_3, x_upper_3)
     call mc_integrator%set_calls (2000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
 
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 20000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (20000)
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_4
 
 @ %def vegas_4
 @
 \subsubsection{Event generation}
 
 Initialise the MC integrator. Integrate the gaussian distribution.
 Get and write the result object. Finally, generate events in accordance to the
 adapted grid and print them out.
-<<vegas: execute tests>>=
+<<Vegas: execute tests>>=
   call test (vegas_5, "vegas_5", "VEGAS integration and event&
        & generation of multi-dimensional gaussian", u, results)
-<<vegas: test declaration>>=
+<<Vegas: test declaration>>=
   public :: vegas_5
-<<vegas: tests>>=
+<<Vegas: tests>>=
   subroutine vegas_5 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(1), parameter :: x_lower_1 = -10._default, &
          x_upper_1 = 10._default
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_result_t) :: mc_integrator_result
 
     integer :: i, u_event
     real(default), dimension(1) :: event, mean, delta, M2
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_5"
     write (u, "(A)") "*   Purpose: Integrate gaussian distribution."
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 1"
     write (u, "(A)")
 
     allocate (vegas_gaussian_test_func_t :: func)
     mc_integrator = vegas_t (1)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 20000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower_1, x_upper_1)
     call mc_integrator%set_calls (20000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, verbose=.true., result=result, abserr=abserr)
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") &
          & "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%integrate (func, rng, 3, verbose=.true., result=result, abserr=abserr)
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") &
          & "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Generate 10000 events based on the adaptation and&
          & calculate mean and variance"
     write (u, "(A)")
 
     mean = 0._default
     M2 = 0._default
     do i = 1, 10000
        call mc_integrator%generate_unweighted (func, rng, event)
        delta = event - mean
        mean = mean + delta / i
        M2 = M2 + delta * (event - mean)
     end do
 
     write (u, "(2X,A)") "Result:"
     write (u, "(4X,A," // FMT_12 //")") &
          & "mean               = ", mean
     write (u, "(4X,A," // FMT_12 //")") &
          & "(sample) std. dev. = ", sqrt (M2 / (9999))
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_5
 
 @ %def vegas_5
 @
 \subsubsection{Grid I/O}
 \label{sec:grid-io}
 
 Initialise the MC integrator. Get and write the config object. Integrate the
 factorisable polynomial function. Get and write the result object. Write grid to
 file and start with fresh grid.
-<<vegas: execute tests>>=
+<<Vegas: execute tests>>=
   call test (vegas_6, "vegas_6", "VEGAS integrate and write grid, &
        & read grid and continue", u, results)
-<<vegas: test declaration>>=
+<<Vegas: test declaration>>=
   public :: vegas_6
-<<vegas: tests>>=
+<<Vegas: tests>>=
   subroutine vegas_6 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower_3 = 0._default, &
          x_upper_3 = 1._default
     type(vegas_config_t) :: mc_integrator_config
     type(vegas_result_t) :: mc_integrator_result
 
     real(default) :: result, abserr
     integer :: unit
 
     write (u, "(A)") "* Test output: vegas_6"
     write (u, "(A)") "*   Purpose: Write and read grid, and continue."
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_polynomial_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 2000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower_3, x_upper_3)
     call mc_integrator%set_calls (2000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
 
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Write grid to file vegas_io.grid"
     write (u, "(A)")
 
     unit = free_unit ()
     open (unit, file = "vegas_io.grid", &
          action = "write", status = "replace")
     call mc_integrator%write_grid (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Read grid from file vegas_io.grid"
     write (u, "(A)")
 
     call mc_integrator%final ()
     open (unit, file = "vegas_io.grid", &
          action = "read", status = "old")
     call mc_integrator%read_grid (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 20000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (20000)
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
 
     call mc_integrator%get_config (mc_integrator_config)
     call mc_integrator_config%write (u)
 
     write (u, "(A)")
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
   end subroutine vegas_6
 
 @ %def vegas_6
 @
 \subsubsection{Numeric stability}
 \label{sec:numeric-stability}
 
 We wrap a previous testing function to produce a single [[NaN]].
-<<vegas: test types>>=
+<<Vegas: test types>>=
   type, extends (vegas_test_func_t) :: vegas_nan_test_func_t
     private
     logical :: evaluate_to_nan = .true.
    contains
-   <<vegas: vegas nan test func: TBP>>
+   <<Vegas: vegas nan test func: TBP>>
   end type vegas_nan_test_func_t
 
 @ %def vegas_nan_test_func_t
 @ Evaluate the integrand.
-<<vegas: vegas nan test func: TBP>>=
+<<Vegas: vegas nan test func: TBP>>=
   procedure, public :: evaluate => vegas_nan_test_func_evaluate
-<<vegas: tests>>=
+<<Vegas: tests>>=
   real(default) function vegas_nan_test_func_evaluate (self, x) result (f)
     use, intrinsic :: ieee_arithmetic, only: ieee_value, ieee_quiet_nan
     class(vegas_nan_test_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     if (self%evaluate_to_nan) then
         f = ieee_value(1.0_default, ieee_quiet_nan)
         self%evaluate_to_nan = .false.
     else
         f = self%vegas_test_func_t%evaluate (x)
     end if
   end function vegas_nan_test_func_evaluate
 
 @ %def vegas_test_func_evaluate
 
 Initialise the VEGAS MC integrator. Run a integration pass and insert a single
 [[NaN]] that leads to a [[NaN]] result. However, the integration grid will be
 fine.
 
 Proceed with a second pass, and get the correct result.
-<<vegas: execute tests>>=
+<<Vegas: execute tests>>=
   !!! Disabled for the moment as NAGFOR stops execution on NaNs as intended
   ! call test (vegas_7, "vegas_7", "VEGAS NaN stability test", u, results)
-<<vegas: test declaration>>=
+<<Vegas: test declaration>>=
   public :: vegas_7
-<<vegas: tests>>=
+<<Vegas: tests>>=
   subroutine vegas_7 (u)
     integer, intent(in) :: u
     type(vegas_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vegas_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower = 0., &
          x_upper = pi
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vegas_7"
     write (u, "(A)") "*   Purpose: initialise the VEGAS MC integrator and the grid"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_dim = 3"
     write (u, "(A)")
 
     allocate (vegas_nan_test_func_t :: func)
     mc_integrator = vegas_t (3)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vegas_7
 
 @ %def vegas_7
 \clearpage
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{VAMP2}
 \label{sec:vamp2}
 
 We concentrate all configuration and run-time data in a derived-type, such that,
 [[mci_t]] can spwan each time a distinctive MCI VEGAS integrator object.
 
 <<[[vamp2.f90]]>>=
 <<File header>>
 
 module vamp2
 
 <<Use kinds>>
   use io_units
   use format_utils, only: pac_fmt
   use format_utils, only: write_separator, write_indent
   use format_defs, only: FMT_17
   use diagnostics
   use iterator
   use rng_base
   use rng_stream, only: rng_stream_t
 
   use vegas
 
-<<vamp2: modules>>
+<<VAMP2: modules>>
 
 <<Standard module head>>
 
-<<vamp2: public>>
+<<VAMP2: public>>
 
-<<vamp2: parameters>>
+<<VAMP2: parameters>>
 
-<<vamp2: types>>
+<<VAMP2: types>>
 
-<<vamp2: interfaces>>
+<<VAMP2: interfaces>>
 
 contains
 
-<<vamp2: procedures>>
+<<VAMP2: procedures>>
 
 end module vamp2
 @ %def vamp2
-<<vamp2: modules>>=
+<<VAMP2: modules>>=
 @
-<<MPI: vamp2: modules>>=
+<<MPI: VAMP2: modules>>=
   use request_base
   use request_simple
   use request_caller
   use balancer_base
   use request_callback
   use mpi_f08 !NODEP!
 @
 \subsection{Type: vamp2\_func\_t}
 \label{sec:vamp2-func}
 
 We extend [[vegas_func_t]] with the multi-channel weights and the
 [[vegas_grid_t]], such that, the overall multi-channel weight can be calculated
 by the function itself.
 
 We add an additional logicial [[valid_x]], if it is set to [[.false.]], we do
 not compute weighted function and just set the weighted integrand to zero.
 This behavior is in particular very useful, if a mapping is prohibited or fails.
 Or in the case of WHIZARD, a phase cut is applied.
-<<vamp2: public>>=
+<<VAMP2: public>>=
   public :: vamp2_func_t
-<<vamp2: types>>=
+<<VAMP2: types>>=
   type, abstract, extends(vegas_func_t) :: vamp2_func_t
      integer :: current_channel = 0
      integer :: n_dim = 0
      integer :: n_channel = 0
      integer :: n_calls = 0
      logical :: valid_x = .false.
      real(default), dimension(:, :), allocatable :: xi
      real(default), dimension(:), allocatable :: det
      real(default), dimension(:), allocatable :: wi
      real(default), dimension(:), allocatable :: gi
      type(vegas_grid_t), dimension(:), allocatable :: grids
      real(default) :: g = 0.
    contains
-   <<vamp2: vamp2 func: TBP>>
+   <<VAMP2: vamp2 func: TBP>>
   end type vamp2_func_t
 
 @ %def vamp2_func_t
 @ Init.
-<<vamp2: vamp2 func: TBP>>=
+<<VAMP2: vamp2 func: TBP>>=
   procedure, public :: init => vamp2_func_init
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_func_init (self, n_dim, n_channel)
     class(vamp2_func_t), intent(out) :: self
     integer, intent(in) :: n_dim
     integer, intent(in) :: n_channel
     self%n_dim = n_dim
     self%n_channel = n_channel
     allocate (self%xi(n_dim, n_channel), source=0._default)
     allocate (self%det(n_channel), source=1._default)
     allocate (self%wi(n_channel), source=0._default)
     allocate (self%gi(n_channel), source=0._default)
     allocate (self%grids(n_channel))
   end subroutine vamp2_func_init
 
 @ %def vamp2_func_init
 @ Set current channel.
-<<vamp2: vamp2 func: TBP>>=
+<<VAMP2: vamp2 func: TBP>>=
   procedure, public :: set_channel => vamp2_func_set_channel
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_func_set_channel (self, channel)
     class(vamp2_func_t), intent(inout) :: self
     integer, intent(in) :: channel
     self%current_channel = channel
   end subroutine vamp2_func_set_channel
 
 @ %def vamp2_func_set_channel
 @ Get number of function calls for which $f \neq 0$.
-<<vamp2: vamp2 func: TBP>>=
+<<VAMP2: vamp2 func: TBP>>=
   procedure, public :: get_n_calls => vamp2_func_get_n_calls
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   integer function vamp2_func_get_n_calls (self) result (n_calls)
     class(vamp2_func_t), intent(in) :: self
     n_calls = self%n_calls
   end function vamp2_func_get_n_calls
 
 @ %def vamp2_func_get_func_calls
 @ Reset number of calls.
-<<vamp2: vamp2 func: TBP>>=
+<<VAMP2: vamp2 func: TBP>>=
   procedure, public :: reset_n_calls => vamp2_func_reset_n_calls
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_func_reset_n_calls (self)
     class(vamp2_func_t), intent(inout) :: self
     self%n_calls = 0
   end subroutine vamp2_func_reset_n_calls
 
 @ %def vamp2_func_reset_n_calls
 @ Evaluate mappings. We defer this method to be implemented by the user.
 The result must be written to [[xi]] and [[det]].
 
 The mapping is defined by $\phi : U \rightarrow M$. We map $x \in M$ to the
 different mappings of the hypercube $U_{i}$, such that $x_{i} \in U_{i}$.
 
 The mapping should determine, whether [[x]] is a valid point, e.g. can be
 mapped, or is restricted otherwise.
-<<vamp2: vamp2 func: TBP>>=
+<<VAMP2: vamp2 func: TBP>>=
   procedure(vamp2_func_evaluate_maps), deferred :: evaluate_maps
-<<vamp2: interfaces>>=
+<<VAMP2: interfaces>>=
  abstract interface
     subroutine vamp2_func_evaluate_maps (self, x)
       import :: vamp2_func_t, default
       class(vamp2_func_t), intent(inout) :: self
       real(default), dimension(:), intent(in) :: x
     end subroutine vamp2_func_evaluate_maps
  end interface
 
 @ %def vamp2_evaluate_func
 @ Evaluate channel weights.
 
 The calling procedure must handle the case of a vanishing overall probability
 density where either a channel weight or a channel probability vanishes.
-<<vamp2: vamp2 func: TBP>>=
+<<VAMP2: vamp2 func: TBP>>=
   procedure, private :: evaluate_weight => vamp2_func_evaluate_weight
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_func_evaluate_weight (self)
     class(vamp2_func_t), intent(inout) :: self
     integer :: ch
     self%g = 0.
     self%gi = 0.
     !$OMP PARALLEL DO PRIVATE(ch) SHARED(self)
     do ch = 1, self%n_channel
        if (self%wi(ch) /= 0) then
           self%gi(ch) = self%grids(ch)%get_probability (self%xi(:, ch))
        end if
     end do
     !$OMP END PARALLEL DO
     if (self%gi(self%current_channel) /= 0) then
        do ch = 1, self%n_channel
           if (self%wi(ch) /= 0 .and. self%det(ch) /= 0) then
              self%g = self%g + self%wi(ch) * self%gi(ch) / self%det(ch)
           end if
        end do
        self%g = self%g / self%gi(self%current_channel)
     end if
   end subroutine vamp2_func_evaluate_weight
 
 @ %def vamp2_func_evaluate_weight
 @ Evaluate function at [[x]]. We call this procedure in [[vamp2_func_evaluate]].
-<<vamp2: vamp2 func: TBP>>=
+<<VAMP2: vamp2 func: TBP>>=
   procedure(vamp2_func_evaluate_func), deferred :: evaluate_func
-<<vamp2: interfaces>>=
+<<VAMP2: interfaces>>=
   abstract interface
      real(default) function vamp2_func_evaluate_func (self, x) result (f)
        import :: vamp2_func_t, default
        class(vamp2_func_t), intent(in) :: self
        real(default), dimension(:), intent(in) :: x
      end function vamp2_func_evaluate_func
   end interface
 
 @ %def vamp2_func_evaluate_func
-<<vamp2: vamp2 func: TBP>>=
+<<VAMP2: vamp2 func: TBP>>=
   procedure, public :: evaluate => vamp2_func_evaluate
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   real(default) function vamp2_func_evaluate (self, x) result (f)
     class(vamp2_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     call self%evaluate_maps (x)
     f = 0.
     self%gi = 0.
     self%g = 1
     if (self%valid_x) then
        call self%evaluate_weight ()
        if (self%g /= 0) then
           f = self%evaluate_func (x) / self%g
           self%n_calls = self%n_calls + 1
        end if
     end if
   end function vamp2_func_evaluate
 
 @ %def vamp2_func_evaluate
 \subsection{Type: vamp2\_config\_t}
 \label{sec:vamp2-config}
 
 This is a transparent container which incorporates and extends the definitions
 in [[vegas_config]]. The parent object can then be used to parametrise the VEGAS
 grids directly, where the new parameters are exclusively used in the
 multi-channel implementation of VAMP2.
 [[n_calls_min]] is calculated by [[n_calls_min_per_channel]] and [[n_channel]].
 The channels weights (and the result [[n_calls]] for each channel) are
 calculated regarding [[n_calls_threshold]].
-<<vamp2: public>>=
+<<VAMP2: public>>=
   public :: vamp2_config_t
-<<vamp2: types>>=
+<<VAMP2: types>>=
   type, extends(vegas_config_t) :: vamp2_config_t
      integer :: n_channel = 0
      integer :: n_calls_min_per_channel = 20
      integer :: n_calls_threshold = 10
      integer :: n_chains = 0
      logical :: stratified = .true.
      logical :: equivalences = .false.
      real(default) :: beta = 0.5_default
      real(default) :: accuracy_goal = 0._default
      real(default) :: error_goal = 0._default
      real(default) :: rel_error_goal = 0._default
    contains
-   <<vamp2: vamp2 config: TBP>>
+   <<VAMP2: vamp2 config: TBP>>
   end type vamp2_config_t
 
 @ %def vamp2_config_t
 @ Write.
-<<vamp2: vamp2 config: TBP>>=
+<<VAMP2: vamp2 config: TBP>>=
   procedure, public :: write => vamp2_config_write
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_config_write (self, unit, indent)
     class(vamp2_config_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call self%vegas_config_t%write (unit, indent)
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of channels                               = ", self%n_channel
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Min. number of calls per channel (setting calls) = ", &
          & self%n_calls_min_per_channel
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Threshold number of calls (adapting weights)     = ", &
          & self%n_calls_threshold
     call write_indent (u, ind)
     write (u, "(2x,A,I0)") &
          & "Number of chains                                 = ", self%n_chains
     call write_indent (u, ind)
     write (u, "(2x,A,L1)") &
          & "Stratified                                       = ", self%stratified
     call write_indent (u, ind)
     write (u, "(2x,A,L1)") &
          & "Equivalences                                     = ", self%equivalences
     call write_indent (u, ind)
     write (u, "(2x,A," // FMT_17 // ")") &
          & "Adaption power (beta)                            = ", self%beta
     if (self%accuracy_goal > 0) then
        call write_indent (u, ind)
        write (u, "(2x,A," // FMT_17 // ")") &
             & "accuracy_goal                                 = ", self%accuracy_goal
     end if
     if (self%error_goal > 0) then
        call write_indent (u, ind)
        write (u, "(2x,A," // FMT_17 // ")") &
             & "error_goal                                    = ", self%error_goal
     end if
     if (self%rel_error_goal > 0) then
        call write_indent (u, ind)
        write (u, "(2x,A," // FMT_17 // ")") &
             & "rel_error_goal                                = ", self%rel_error_goal
     end if
   end subroutine vamp2_config_write
 
 @ %def vamp2_config_write
 @
 \subsection{Type: vamp2\_result\_t}
 \label{sec:vamp2-result}
 
 This is a transparent container which incorporates and extends the definitions
 of [[vegas_result_t]].
 
-<<vamp2: public>>=
+<<VAMP2: public>>=
   public :: vamp2_result_t
-<<vamp2: types>>=
+<<VAMP2: types>>=
   type, extends(vegas_result_t) :: vamp2_result_t
    contains
-   <<vamp2: vamp2 result: TBP>>
+   <<VAMP2: vamp2 result: TBP>>
   end type vamp2_result_t
 
 @ %def vamp2_result_t
 @ Output.
-<<vamp2: vamp2 result: TBP>>=
+<<VAMP2: vamp2 result: TBP>>=
   procedure, public :: write => vamp2_result_write
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_result_write (self, unit, indent)
     class(vamp2_result_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call self%vegas_result_t%write (unit, indent)
   end subroutine vamp2_result_write
 
 @ %def vamp2_result_write
 @
 \subsection{Type: vamp2\_equivalences\_t}
 \label{sec:vamp2-eqv}
 
-<<vamp2: parameters>>=
+<<VAMP2: parameters>>=
 integer, parameter, public :: &
      VEQ_IDENTITY = 0, VEQ_INVERT = 1, VEQ_SYMMETRIC = 2, VEQ_INVARIANT = 3
 @
 @ Channel equivalences. Store retrieving and sourcing channel.
-<<vamp2: types>>=
+<<VAMP2: types>>=
   type :: vamp2_equi_t
      integer :: ch
      integer :: ch_src
      integer, dimension(:), allocatable :: perm
      integer, dimension(:), allocatable :: mode
    contains
-   <<vamp2: vamp2 equi: TBP>>
+   <<VAMP2: vamp2 equi: TBP>>
   end type vamp2_equi_t
 
 @ %def vamp2_equi_t
 @ Write equivalence.
-<<vamp2: vamp2 equi: TBP>>=
+<<VAMP2: vamp2 equi: TBP>>=
   procedure :: write => vamp2_equi_write
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_equi_write (self, unit, indent)
     class(vamp2_equi_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call write_indent (u, ind)
     write (u, "(2(A,1X,I0))") "src:", self%ch_src, "-> dest:", self%ch
     call write_indent (u, ind)
     write (u, "(A,99(1X,I0))") "Perm: ", self%perm
     call write_indent (u, ind)
     write (u, "(A,99(1X,I0))") "Mode: ", self%mode
   end subroutine vamp2_equi_write
 
 @ %def vamp2_equi_write
 @
-<<vamp2: public>>=
+<<VAMP2: public>>=
   public :: vamp2_equivalences_t
-<<vamp2: types>>=
+<<VAMP2: types>>=
   type :: vamp2_equivalences_t
       private
       integer :: n_eqv = 0
       integer :: n_channel = 0
       integer :: n_dim = 0
       type(vamp2_equi_t), dimension(:), allocatable :: eqv
       integer, dimension(:), allocatable :: map
       integer, dimension(:), allocatable :: multiplicity
       integer, dimension(:), allocatable :: symmetry
       logical, dimension(:), allocatable :: independent
       integer, dimension(:), allocatable :: equivalent_to_ch
       logical, dimension(:, :), allocatable :: dim_is_invariant
    contains
-   <<vamp2: vamp2 equivalences: TBP>>
+   <<VAMP2: vamp2 equivalences: TBP>>
   end type vamp2_equivalences_t
 
 @ %def vamp2_equivalences_t
 @ Constructor.
-<<vamp2: interfaces>>=
+<<VAMP2: interfaces>>=
   interface vamp2_equivalences_t
      module procedure vamp2_equivalences_init
   end interface vamp2_equivalences_t
 
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   type(vamp2_equivalences_t) function vamp2_equivalences_init (&
        n_eqv, n_channel, n_dim) result (eqv)
     integer, intent(in) :: n_eqv, n_channel, n_dim
     eqv%n_eqv = n_eqv
     eqv%n_channel = n_channel
     eqv%n_dim = n_dim
     allocate (eqv%eqv(n_eqv))
     allocate (eqv%map(n_channel), source = 0)
     allocate (eqv%multiplicity(n_channel), source = 0)
     allocate (eqv%symmetry(n_channel), source = 0)
     allocate (eqv%independent(n_channel), source = .true.)
     allocate (eqv%equivalent_to_ch(n_channel), source = 0)
     allocate (eqv%dim_is_invariant(n_dim, n_channel), source = .false.)
   end function vamp2_equivalences_init
 
 @ %def vamp2_equivlences_init
 @ Write equivalences.
-<<vamp2: vamp2 equivalences: TBP>>=
+<<VAMP2: vamp2 equivalences: TBP>>=
   procedure :: write => vamp2_equivalences_write
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_equivalences_write (self, unit, indent)
     class(vamp2_equivalences_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind, i_eqv, ch
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     write (u, "(A)") "Inequivalent channels:"
     if (allocated (self%independent)) then
        do ch = 1, self%n_channel
           if (self%independent(ch)) then
              write (u, "(2X,A,1x,I0,A,4x,A,I0,4x,A,I0,4x,A,999(L1))") &
                   "Channel", ch, ":", &
                   "Mult. = ", self%multiplicity(ch), &
                   "Symm. = ", self%symmetry(ch), &
                   "Invar.: ", self%dim_is_invariant(:, ch)
           end if
        end do
     else
        write (u, "(A)") "[not allocated]"
     end if
     write (u, "(A)") "Equivalence list:"
     if (allocated (self%eqv)) then
        do i_eqv = 1, self%n_eqv
           write (u, "(2X,A,1X,I0)") "i_eqv:", i_eqv
           call self%eqv(i_eqv)%write (unit, indent = ind + 4)
        end do
     else
        write (u, "(A)") "[not allocated]"
     end if
   end subroutine vamp2_equivalences_write
 
 @ %def vamp2_equivalences_write
 @ Is allocated.
-<<vamp2: vamp2 equivalences: TBP>>=
+<<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: is_allocated => vamp2_equivalences_is_allocated
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   logical function vamp2_equivalences_is_allocated (self) result (yorn)
     class(vamp2_equivalences_t), intent(in) :: self
     yorn = allocated (self%eqv)
   end function vamp2_equivalences_is_allocated
 
 @ %def vamp2_equivalences_is_allocated
 @ Get source channel and destination channel for given equivalence.
-<<vamp2: vamp2 equivalences: TBP>>=
+<<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: get_channels => vamp2_equivalences_get_channels
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_equivalences_get_channels (eqv, i_eqv, dest, src)
    class(vamp2_equivalences_t), intent(in) :: eqv
    integer, intent(in) :: i_eqv
    integer, intent(out) :: dest, src
    dest = eqv%eqv(i_eqv)%ch
    src = eqv%eqv(i_eqv)%ch_src
   end subroutine vamp2_equivalences_get_channels
 
 @ %def vamp2_equivalences_get_channels
 @
-<<vamp2: vamp2 equivalences: TBP>>=
+<<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: get_mode => vamp2_equivalences_get_mode
   procedure, public :: get_perm => vamp2_equivalences_get_perm
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   function vamp2_equivalences_get_mode (eqv, i_eqv) result (mode)
     class(vamp2_equivalences_t), intent(in) :: eqv
     integer, intent(in) :: i_eqv
     integer, dimension(:), allocatable :: mode
     mode = eqv%eqv(i_eqv)%mode
   end function vamp2_equivalences_get_mode
 
   function vamp2_equivalences_get_perm (eqv, i_eqv) result (perm)
     class(vamp2_equivalences_t), intent(in) :: eqv
     integer, intent(in) :: i_eqv
     integer, dimension(:), allocatable :: perm
     perm = eqv%eqv(i_eqv)%perm
   end function vamp2_equivalences_get_perm
 
 @ %def vamp2_equivalences_get_perm, vamp2_equivalences_get_mode
 @
-<<vamp2: vamp2 equivalences: TBP>>=
+<<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: set_equivalence => vamp2_equivalences_set_equivalence
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_equivalences_set_equivalence &
        (eqv, i_eqv, dest, src, perm, mode)
     class(vamp2_equivalences_t), intent(inout) :: eqv
     integer, intent(in) :: i_eqv
     integer, intent(in) :: dest, src
     integer, dimension(:), intent(in) :: perm, mode
     integer :: i
     if (dest < 1 .or. dest > eqv%n_channel)  call msg_bug &
        ("VAMP2: set_equivalences: destination channel out of range.")
     if (src < 1 .or. src > eqv%n_channel)  call msg_bug &
          ("VAMP2: set_equivalences: source channel out of range.")
     if (size(perm) /= eqv%n_dim)  call msg_bug &
          ("VAMP2: set_equivalences: size(perm) does not match n_dim.")
     if (size(mode) /= eqv%n_dim)  call msg_bug &
          ("VAMP2: set_equivalences: size(mode) does not match n_dim.")
     eqv%eqv(i_eqv)%ch = dest
     eqv%eqv(i_eqv)%ch_src = src
     allocate (eqv%eqv(i_eqv)%perm (size (perm)))
     do i = 1, size (perm)
        eqv%eqv(i_eqv)%perm(i) = perm(i)
     end do
     allocate (eqv%eqv(i_eqv)%mode (size (mode)))
     do i = 1, size (mode)
        eqv%eqv(i_eqv)%mode(i) = mode(i)
     end do
   end subroutine vamp2_equivalences_set_equivalence
 
 @ %def vamp2_equivalences_set_equivalence
 @ Freeze equivalences.
-<<vamp2: vamp2 equivalences: TBP>>=
+<<VAMP2: vamp2 equivalences: TBP>>=
   procedure, public :: freeze => vamp2_equivalences_freeze
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_equivalences_freeze (self)
     class(vamp2_equivalences_t), intent(inout) :: self
     integer :: i_eqv, ch, upper, lower
     ch = 0
     do i_eqv = 1, self%n_eqv
        if (ch /= self%eqv(i_eqv)%ch) then
           ch = self%eqv(i_eqv)%ch
           self%map(ch) = i_eqv
        end if
     end do
     do ch = 1, self%n_channel
        lower = self%map(ch)
        if (ch == self%n_channel) then
           upper = self%n_eqv
        else
           upper = self%map(ch + 1) - 1
        end if
        associate (eqv => self%eqv, n_eqv => size (self%eqv(lower:upper)))
          if (.not. all(eqv(lower:upper)%ch == ch) .or. &
               eqv(lower)%ch_src > ch) then
             do i_eqv = lower, upper
                call self%eqv(i_eqv)%write ()
             end do
             call msg_bug ("VAMP2: vamp2_equivalences_freeze: &
                  &equivalence order is not correct.")
          end if
          self%symmetry(ch) = count (eqv(lower:upper)%ch_src == ch)
          if (mod (n_eqv, self%symmetry(ch)) /= 0) then
             do i_eqv = lower, upper
                call self%eqv(i_eqv)%write ()
             end do
             call msg_bug ("VAMP2: vamp2_equivalences_freeze: &
                  &permutation count is not correct.")
          end if
          self%multiplicity(ch) = n_eqv / self%symmetry(ch)
          self%independent(ch) = all (eqv(lower:upper)%ch_src >= ch)
          self%equivalent_to_ch(ch) = eqv(lower)%ch_src
          self%dim_is_invariant(:, ch) = eqv(lower)%mode == VEQ_INVARIANT
        end associate
     end do
   end subroutine vamp2_equivalences_freeze
 
 @ %def vamp2_equivalences_freeze
 @
 \subsection{Type: vamp2\_t}
 \label{sec:vamp2-t}
 
-<<vamp2: public>>=
+<<VAMP2: public>>=
   public :: vamp2_t
-<<vamp2: types>>=
+<<VAMP2: types>>=
   type :: vamp2_t
      private
      type(vamp2_config_t) :: config
      type(vegas_t), dimension(:), allocatable :: integrator
      integer, dimension(:), allocatable :: chain
      real(default), dimension(:), allocatable :: weight
      real(default), dimension(:), allocatable :: integral
      real(default), dimension(:), allocatable :: variance
      real(default), dimension(:), allocatable :: efficiency
      type(vamp2_result_t) :: result
      type(vamp2_equivalences_t) :: equivalences
      logical :: event_prepared
      real(default), dimension(:), allocatable :: event_weight
-   <<vamp2: vamp2: type>>
+   <<VAMP2: vamp2: type>>
    contains
-   <<vamp2: vamp2: TBP>>
+   <<VAMP2: vamp2: TBP>>
   end type vamp2_t
-<<MPI: vamp2: vamp2: type>>=
+<<MPI: VAMP2: vamp2: type>>=
   class(request_base_t), allocatable :: request
-<<vamp2: interfaces>>=
+<<VAMP2: interfaces>>=
   interface vamp2_t
      module procedure vamp2_init
   end interface vamp2_t
 
 @ %def vamp2_t
 @ Constructor.
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   type(vamp2_t) function vamp2_init (n_channel, n_dim, alpha, beta, n_bins_max,&
        & n_calls_min_per_channel, iterations, mode) result (self)
     integer, intent(in) :: n_channel
     integer, intent(in) :: n_dim
     integer, intent(in), optional :: n_bins_max
     integer, intent(in), optional :: n_calls_min_per_channel
     real(default), intent(in), optional :: alpha
     real(default), intent(in), optional :: beta
     integer, intent(in), optional :: iterations
     integer, intent(in), optional :: mode
     integer :: ch
     self%config%n_dim = n_dim
     self%config%n_channel = n_channel
     call set_options ()
     allocate (self%chain(n_channel), source=0)
     allocate (self%integrator(n_channel))
     allocate (self%weight(n_channel), source=0._default)
     do ch = 1, n_channel
        self%integrator(ch) = vegas_t (n_dim, alpha, n_bins_max, 1, mode)
     end do
     self%weight = 1._default / self%config%n_channel
     call self%reset_result ()
     allocate (self%event_weight(self%config%n_channel), source = 0._default)
     self%event_prepared = .false.
   contains
     subroutine set_options ()
       if (present (n_bins_max)) self%config%n_bins_max = n_bins_max
       if (present (n_calls_min_per_channel)) self%config%n_calls_min_per_channel = n_calls_min_per_channel
       if (present (alpha)) self%config%alpha = alpha
       if (present (beta)) self%config%beta = beta
       if (present (iterations)) self%config%iterations = iterations
       if (present (mode)) self%config%mode = mode
     end subroutine set_options
   end function vamp2_init
 
 @ %def vamp2_init
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: final => vamp2_final
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_final (self)
     class(vamp2_t), intent(inout) :: self
     integer :: ch
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%final ()
     end do
   end subroutine vamp2_final
 
 @ %def vamp2_final
 @ Output.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: write => vamp2_write
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_write (self, unit, indent)
     class(vamp2_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer, intent(in), optional :: indent
     integer :: u, ind, ch
     u = given_output_unit (unit)
     ind = 0; if (present (indent)) ind = indent
     call write_indent (u, ind)
     write (u, "(A)") "VAMP2: VEGAS AMPlified 2"
     call write_indent (u, ind)
     call self%config%write (unit, indent)
     call self%result%write (unit, indent)
   end subroutine vamp2_write
 
 @ %def vamp2_write
 @ Get the config object.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: get_config => vamp2_get_config
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_get_config (self, config)
     class(vamp2_t), intent(in) :: self
     type(vamp2_config_t), intent(out) :: config
     config = self%config
   end subroutine vamp2_get_config
 
 @ %def vamp2_get_config
 @ Set non-runtime dependent configuration. It will no be possible to change
 [[n_bins_max]].
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: set_config => vamp2_set_config
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_set_config (self, config)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_config_t), intent(in) :: config
     integer :: ch
     self%config%equivalences = config%equivalences
     self%config%n_calls_min_per_channel = config%n_calls_min_per_channel
     self%config%n_calls_threshold = config%n_calls_threshold
     self%config%n_calls_min = config%n_calls_min
     self%config%beta = config%beta
     self%config%accuracy_goal = config%accuracy_goal
     self%config%error_goal = config%error_goal
     self%config%rel_error_goal = config%rel_error_goal
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%set_config (config)
     end do
   end subroutine vamp2_set_config
 
 @ %def vamp2_set_config
 @ Set the overall number of calls. The number of calls each channel is scaled by
 the channel weights
 \begin{equation}
   N_i = \alpha_i N.
 \end{equation}
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: set_calls => vamp2_set_n_calls
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_set_n_calls (self, n_calls)
     class(vamp2_t), intent(inout) :: self
     integer, intent(in) :: n_calls
     integer :: ch
     self%config%n_calls_min = self%config%n_calls_min_per_channel &
          & * self%config%n_channel
     self%config%n_calls = max(n_calls, self%config%n_calls_min)
     if (self%config%n_calls > n_calls) then
        write (msg_buffer, "(A,I0)") "VAMP2: [set_calls] number of calls too few,&
             & reset to = ", self%config%n_calls
        call msg_message ()
     end if
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%set_calls (max (nint (self%config%n_calls *&
             & self%weight(ch)), self%config%n_calls_min_per_channel))
     end do
   end subroutine vamp2_set_n_calls
 
 @ %def vamp2_set_n_calls
 @ Set limits. We only support same limits for all channels.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: set_limits => vamp2_set_limits
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_set_limits (self, x_upper, x_lower)
     class(vamp2_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x_upper
     real(default), dimension(:), intent(in) :: x_lower
     integer :: ch
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%set_limits (x_upper, x_lower)
     end do
   end subroutine vamp2_set_limits
 
 @ %def vamp2_set_limits
 @ Set [[n_chains]] and the (actual) chains. [[chain]] must have size
 [[n_channels]] and each elements must store an index to a corresponding chain.
 This means, that channels with equal index correspond to the same chain, and we
 refer to those as chained weights, where we average the contributions of the
 chained weights in [[vamp2_adapt_weights]].
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: set_chain => vamp2_set_chain
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_set_chain (self, n_chains, chain)
     class(vamp2_t), intent(inout) :: self
     integer, intent(in) :: n_chains
     integer, dimension(:), intent(in) :: chain
     if (size (chain) /= self%config%n_channel) then
        call msg_bug ("VAMP2: set chain: size of chain array does not match n_channel.")
     else
        call msg_message ("VAMP2: set chain: use chained weights.")
     end if
     self%config%n_chains = n_chains
     self%chain = chain
   end subroutine vamp2_set_chain
 
 @ %def vamp2_set_chain
 @ Set channel equivalences.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: set_equivalences => vamp2_set_equivalences
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_set_equivalences (self, equivalences)
     class(vamp2_t), intent(inout) :: self
     type(vamp2_equivalences_t), intent(in) :: equivalences
     self%equivalences = equivalences
   end subroutine vamp2_set_equivalences
 
 @ %def vamp2_set_equivalences
 @ Move allocated (and prepared!) request object into VAMP2.
-<<MPI: vamp2: vamp2: TBP>>=
+<<MPI: VAMP2: vamp2: TBP>>=
   generic, public :: allocate_request => allocate_request_by_method, &
      allocate_request_by_object
   procedure, private :: allocate_request_by_method => vamp2_allocate_request_by_method
   procedure, private :: allocate_request_by_object => vamp2_allocate_request_by_object
-<<MPI: vamp2: procedures>>=
+<<MPI: VAMP2: procedures>>=
   subroutine vamp2_allocate_request_by_method (self, method)
     class(vamp2_t), intent(inout) :: self
     character(len=*), intent(in) :: method
     class(request_base_t), allocatable :: request
     select case (trim(method))
     case("simple", "Simple", "SIMPLE")
        allocate (request_simple_t :: request)
     case("load", "Load", "LOAD")
        allocate (request_caller_t :: request)
     case default
        call msg_bug ("VAMP2: Unknown method for MPI request module.")
     end select
     select type (request)
     type is (request_simple_t)
        call request%init (MPI_COMM_WORLD, n_channels = self%config%n_channel)
     type is (request_caller_t)
        call request%init (MPI_COMM_WORLD, n_channels = self%config%n_channel)
     end select
     call self%allocate_request_by_object (request)
   end subroutine vamp2_allocate_request_by_method
 
   subroutine vamp2_allocate_request_by_object (self, request)
     class(vamp2_t), intent(inout) :: self
     class(request_base_t), allocatable, intent(inout) :: request
     !! Only output in case of "parallel" integration.
     if (request%has_workers ()) then
        select type (request)
        type is (request_simple_t)
           call msg_message ("VAMP2: Simple Request Balancing.")
        type is (request_caller_t)
           call msg_message ("VAMP2: Request with load balancing.")
        class default
           call msg_bug ("VAMP2: Unknown extension of request_base.")
        end select
     end if
     call move_alloc (request, self%request)
   end subroutine vamp2_allocate_request_by_object
 
 @ %def vamp2_allocate_request
 @ Get [[n_calls]] calculated by [[VEGAS]].
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: get_n_calls => vamp2_get_n_calls
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   elemental real(default) function vamp2_get_n_calls (self) result (n_calls)
     class(vamp2_t), intent(in) :: self
     n_calls = sum (self%integrator%get_calls ())
   end function vamp2_get_n_calls
 
 @ %def vamp2_get_n_calls
 @ Get the cumulative result of the integration. Recalculate weighted average of the integration.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: get_integral => vamp2_get_integral
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   elemental real(default) function vamp2_get_integral (self) result (integral)
     class(vamp2_t), intent(in) :: self
     integral = 0.
     if (self%result%sum_wgts > 0.) then
        integral = self%result%sum_int_wgtd / self%result%sum_wgts
     end if
   end function vamp2_get_integral
 
 @ %def vamp2_get_integral
 @ Get the cumulative variance of the integration. Recalculate the variance.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: get_variance => vamp2_get_variance
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   elemental real(default) function vamp2_get_variance (self) result (variance)
     class(vamp2_t), intent(in) :: self
     variance = 0.
     if (self%result%sum_wgts > 0.) then
        variance = 1.0 / self%result%sum_wgts
     end if
   end function vamp2_get_variance
 
 @ %def vamp2_get_variance
 @ Get efficiency.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: get_efficiency => vamp2_get_efficiency
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   elemental real(default) function vamp2_get_efficiency (self) result (efficiency)
     class(vamp2_t), intent(in) :: self
     efficiency = 0.
     if (self%result%efficiency > 0.) then
        efficiency = self%result%efficiency
     end if
   end function vamp2_get_efficiency
 
 @ %def vamp2_get_efficiency
 @ Get event weight and event weight excess.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure :: get_evt_weight => vamp2_get_evt_weight
   procedure :: get_evt_weight_excess => vamp2_get_evt_weight_excess
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   real(default) function vamp2_get_evt_weight (self) result (evt_weight)
     class(vamp2_t), intent(in) :: self
     evt_weight = self%result%evt_weight
   end function vamp2_get_evt_weight
 
   real(default) function vamp2_get_evt_weight_excess (self) result (evt_weight_excess)
     class(vamp2_t), intent(in) :: self
     evt_weight_excess = self%result%evt_weight_excess
   end function vamp2_get_evt_weight_excess
 
 @ %def vamp2_get_evt_weight, vamp2_get_evt_weight_excess
 @ Get procedure to retrieve channel-th grid.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure :: get_grid => vamp2_get_grid
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   type(vegas_grid_t) function vamp2_get_grid (self, channel) result (grid)
     class(vamp2_t), intent(in) :: self
     integer, intent(in) :: channel
     if (channel < 1 .or. channel > self%config%n_channel) &
          call msg_bug ("VAMP2: vamp2_get_grid: channel index < 1 or > n_channel.")
     grid = self%integrator(channel)%get_grid ()
   end function vamp2_get_grid
 
 @ %def vamp2_get_grid
 @ Adapt. We adapt the weights due the contribution of variances with $\beta >
 0$.
 \begin{equation}
   \alpha_i = \frac{\alpha_i V_i^\beta}{\sum_i \alpha_i V_i^\beta}
 \end{equation}
 If [[n_calls_threshold]] is set, we rescale the weights in such a way, that the
 [[n_calls]] for each channel are greater than [[n_calls_threshold]].
 We calculate the distance of the weights to the [[weight_min]] and reset those weights
 which are less than [[weight_mins]] to this value. The other values are
 accordingly resized to fit the boundary condition of the partition of unity.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, private :: adapt_weights => vamp2_adapt_weights
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_adapt_weights (self)
     class(vamp2_t), intent(inout) :: self
     integer :: n_weights_underflow
     real(default) :: weight_min, sum_weights_underflow
     self%weight = self%weight * self%integrator%get_variance ()**self%config%beta
     if (sum (self%weight) == 0) self%weight = real(self%config%n_calls, default)
     if (self%config%n_chains > 0) then
        call chain_weights ()
     end if
     self%weight = self%weight / sum(self%weight)
     if (self%config%n_calls_threshold /= 0) then
        weight_min = real(self%config%n_calls_threshold, default) &
             & / self%config%n_calls
        sum_weights_underflow = sum (self%weight, self%weight < weight_min)
        n_weights_underflow = count (self%weight < weight_min)
        where (self%weight < weight_min)
           self%weight = weight_min
        elsewhere
           self%weight = self%weight * (1. - n_weights_underflow * weight_min) &
                & / (1. - sum_weights_underflow)
        end where
     end if
     call self%set_calls (self%config%n_calls)
   contains
-  <<vamp2: vamp2 adapt weights: procedures>>
+  <<VAMP2: vamp2 adapt weights: procedures>>
   end subroutine vamp2_adapt_weights
 
 @ %def vamp2_adapt_weights
 @ We average the weights over their respective chain members.
-<<vamp2: vamp2 adapt weights: procedures>>=
+<<VAMP2: vamp2 adapt weights: procedures>>=
   subroutine chain_weights ()
     integer :: ch
     real(default) :: average
     do ch = 1, self%config%n_chains
        average = max (sum (self%weight, self%chain == ch), 0._default)
        if (average /= 0) then
           average = average / count (self%chain == ch)
           where (self%chain == ch)
              self%weight = average
           end where
        end if
     end do
   end subroutine chain_weights
 
 @ %def chain_weights
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, private :: apply_equivalences => vamp2_apply_equivalences
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_apply_equivalences (self)
     class(vamp2_t), intent(inout) :: self
     integer :: ch, ch_src, j, j_src, i_eqv
     real(default), dimension(:, :, :), allocatable :: d
     real(default), dimension(:, :), allocatable :: d_src
     integer, dimension(:), allocatable :: mode, perm
     if (.not. self%equivalences%is_allocated ()) then
        call msg_bug ("VAMP2: vamp2_apply_equivalences: &
             &cannot apply not-allocated equivalences.")
     end if
     allocate (d(self%config%n_bins_max, self%config%n_dim, &
          self%config%n_channel), source=0._default)
     associate (eqv => self%equivalences, nb => self%config%n_bins_max)
       do i_eqv = 1, self%equivalences%n_eqv
          call eqv%get_channels (i_eqv, ch, ch_src)
          d_src = self%integrator(ch_src)%get_distribution ()
          mode = eqv%get_mode (i_eqv)
          perm = eqv%get_perm (i_eqv)
          do j = 1, self%config%n_dim
             select case (mode (j))
             case (VEQ_IDENTITY)
                d(:, j, ch) = d(:, j, ch) + &
                     d_src(:, perm(j))
             case (VEQ_INVERT)
                d(:, j, ch) = d(:, j, ch) + &
                     d_src(nb:1:-1, perm(j))
             case (VEQ_SYMMETRIC)
                d(:, j, ch) = d(:, j, ch) + &
                     d_src(:, perm(j)) / 2. + &
                     d_src(nb:1:-1, perm(j)) / 2.
             case (VEQ_INVARIANT)
                d(:, j, ch) = 1._default
             end select
          end do
       end do
     end associate
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%set_distribution (d(:, :, ch))
     end do
   end subroutine vamp2_apply_equivalences
 
 @ %def vamp2_apply_equivalences
 @ Reset the cumulative result.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: reset_result => vamp2_reset_result
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_reset_result (self)
     class(vamp2_t), intent(inout) :: self
     call self%result%reset ()
   end subroutine vamp2_reset_result
 
 @ %def vamp2_reset_result
 @ Integrate. We integrate each channel separately and combine the results
 \begin{align}
   I & = \sum_i \alpha_i I_i, \\
   \sigma^2 & = \sum_i \alpha_i^2 \sigma^2_i.
 \end{align}
 Although, the (population) variance is given by
 \begin{equation}
   \begin{split}
     \sigma^2 & = \frac{1}{N} \left( \sum_i \alpha_i I^2_i - I^2 \right) \\
     & = \frac{1}{N - 1} \left( \sum_i \left( N_i \sigma^2_i + I^2_i \right) -I^2
     \right) \\
     & = \frac{1}{N - 1} \left( \sum_i \alpha_i \sigma^2_i + \alpha_i I^2_i - I^2
     \right),
   \end{split}
 \end{equation}
 where we used $\sigma^2_i = \frac{1}{N} \left( \langle I^2_i \rangle - \langle
   I_i \rangle^2 \right)$, we use the approximation for numeric stability.
 The population variance relates to sample variance
 \begin{equation}
   s^2 = \frac{n}{n - 1} \sigma^2,
 \end{equation}
 which gives an unbiased error estimate.
 
 Beside those adaption to multichannel, the overall processing of
 [[total_integral]], [[total_sq_integral]] and [[total_variance]] is the same as
 in [[vegas_integrate]].
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: integrate => vamp2_integrate
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_integrate (self, func, rng, iterations, reset_result,&
        & refine_grids, adapt_weights, verbose, result, abserr)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     integer, intent(in), optional :: iterations
     logical, intent(in), optional :: reset_result
     logical, intent(in), optional :: refine_grids
     logical, intent(in), optional :: adapt_weights
     logical, intent(in), optional :: verbose
     real(default), optional, intent(out) :: result, abserr
     integer :: it, ch
     type(iterator_t) :: channel_iterator
     real(default) :: cumulative_int, cumulative_std
     logical :: opt_reset_result
     logical :: opt_adapt_weights
     logical :: opt_refine_grids
     logical :: opt_verbose
-  <<vamp2: vamp2 integrate: variables>>
+  <<VAMP2: vamp2 integrate: variables>>
     call set_options ()
     if (opt_verbose) then
        call msg_message ("Results: [it, calls, integral, error, chi^2, eff.]")
     end if
     if (opt_reset_result) call self%reset_result ()
-  <<vamp2: vamp2 integrate: init>>
+  <<VAMP2: vamp2 integrate: init>>
     iteration: do it = 1, self%config%iterations
        call channel_iterator%init (1, self%config%n_channel)
        call self%prepare_integrate_iteration (func)
-     <<vamp2: vamp2 integrate: presampling>>
+     <<VAMP2: vamp2 integrate: presampling>>
      channel: do
-        <<vamp2: vamp2 integrate: sampling>>
+        <<VAMP2: vamp2 integrate: sampling>>
           call func%set_channel (ch)
           call self%integrator(ch)%integrate ( &
                & func, rng, iterations, refine_grid = .false., verbose = .false.)
-        <<vamp2: vamp2 integrate: callback>>
+        <<VAMP2: vamp2 integrate: callback>>
           call channel_iterator%next_step ()
        end do channel
-     <<vamp2: vamp2 integrate: postsampling>>
+     <<VAMP2: vamp2 integrate: postsampling>>
        call self%compute_result_and_efficiency ()
        associate (result => self%result)
          cumulative_int = result%sum_int_wgtd / result%sum_wgts
          cumulative_std = sqrt (1 / result%sum_wgts)
          if (opt_verbose) then
             write (msg_buffer, "(I0,1x,I0,1x, 4(E24.16E4,1x))") &
                  & it, self%config%n_calls, cumulative_int, cumulative_std, &
                  & result%chi2, result%efficiency
             call msg_message ()
          end if
        end associate
        if (opt_adapt_weights) then
           call self%adapt_weights ()
        end if
        if (opt_refine_grids) then
           if (self%config%equivalences .and. self%equivalences%is_allocated ()) then
              call self%apply_equivalences ()
           end if
           do ch = 1, self%config%n_channel
              !! When we apply the grid refinement outside of VEGAS, then we do not average over distribution
              !! as VEGAS averaged the distribution internally.
              call self%integrator(ch)%refine (average = .false.)
           end do
        end if
     end do iteration
     if (present (result)) result = cumulative_int
     if (present (abserr)) abserr = abs (cumulative_std)
   contains
-  <<vamp2: vamp2 integrate: procedures>>
+  <<VAMP2: vamp2 integrate: procedures>>
   end subroutine vamp2_integrate
 
 @ %def vamp2_integrate
 @ Set optional parameters.
-<<vamp2: vamp2 integrate: procedures>>=
+<<VAMP2: vamp2 integrate: procedures>>=
   subroutine set_options ()
     if (present (iterations)) self%config%iterations = iterations
     opt_reset_result = .true.
     if (present (reset_result)) opt_reset_result = reset_result
     opt_adapt_weights = .true.
     if (present (adapt_weights)) opt_adapt_weights = adapt_weights
     opt_refine_grids = .true.
     if (present (refine_grids)) opt_refine_grids = refine_grids
     opt_verbose = .false.
     if (present (verbose)) opt_verbose = verbose
   end subroutine set_options
 
 @ %def set_options
 @
 We define additional chunks, which we use to insert parallel/MPI code.
-<<vamp2: vamp2 integrate: variables>>=
+<<VAMP2: vamp2 integrate: variables>>=
 @
-<<vamp2: vamp2 integrate: init>>=
+<<VAMP2: vamp2 integrate: init>>=
 @
-<<vamp2: vamp2 integrate: presampling>>=
+<<VAMP2: vamp2 integrate: presampling>>=
 @ Conditional handling.
 We introduce a different behavior for the MPI-/non-MPI variant.
-<<NOMPI: vamp2: vamp2 integrate: sampling>>=
+<<NOMPI: VAMP2: vamp2 integrate: sampling>>=
   if (.not. channel_iterator%is_iterable ()) exit channel
   ch = channel_iterator%get_current ()
 @
-<<vamp2: vamp2 integrate: callback>>=
+<<VAMP2: vamp2 integrate: callback>>=
 @
-<<vamp2: vamp2 integrate: postsampling>>=
+<<VAMP2: vamp2 integrate: postsampling>>=
 @
-<<MPI: vamp2: vamp2 integrate: variables>>=
+<<MPI: VAMP2: vamp2 integrate: variables>>=
   type(request_t) :: request
 @
 Verify that we have an allocated request object, else fallback to simple method.
 Be aware that we keep the fallback silent (!) in order to keep any possible testing output uninterrupted.
-<<MPI: vamp2: vamp2 integrate: init>>=
+<<MPI: VAMP2: vamp2 integrate: init>>=
   if (.not. allocated (self%request)) then
      call self%allocate_request_by_method ("simple")
   end if
 @
-<<MPI: vamp2: vamp2 integrate: presampling>>=
+<<MPI: VAMP2: vamp2 integrate: presampling>>=
   if (self%request%is_master ()) then
      select type (req => self%request)
      type is (request_caller_t)
         request%terminate = .true.
         call update_iter_and_rng (request, channel_iterator, rng)
         !! channel_iter is already drained for master.
         !! Do not descent into channel integration (later on).
         call req%handle_workload ()
      end select
   end if
 @ Conditional handling for the MPI-version of sampling.
 
 We have a request object and an advanced [[channel_iterator]] and have to consider three cases:
 \begin{enumerate}
 \item [[channel_iterator]] drained,
 \item [[request]] terminated,
 \item [[request]] received.
 \end{enumerate}
 When the [[channel_iterator]] is drained by [[update_iter_and_rng]], we send a terminate request to the master
 and await in the next cycle of channel loop the terminated request.
 When the request is terminated, we can gracefully exit the channel loop as [[channel_iterator]] and [[rng]] are already advanced by [[update_iter_and_rng]].
 When we received a genuine request, then, we proceed as prophisied.
-<<MPI: vamp2: vamp2 integrate: sampling>>=
+<<MPI: VAMP2: vamp2 integrate: sampling>>=
   select type (req => self%request)
   type is (request_caller_t)
      if (self%request%is_master ()) exit channel
   end select
   call self%request%request_workload (request)
   call update_iter_and_rng (request, channel_iterator, rng)
   if (request%terminate) then
      exit channel
   else if (.not. channel_iterator%is_iterable ()) then
      select type (req => self%request)
      type is (request_caller_t)
         call req%request_terminate ()
         cycle channel
      class is (request_base_t)
         exit channel
      end select
   end if
   if (request%group) call MPI_BARRIER (request%comm)
   ch = request%handler_id
   call self%integrator(ch)%prepare_parallel_integrate(request%comm, &
        duplicate_comm = .false., &
        parallel_mode = request%group)
 @
-<<MPI: vamp2: vamp2 integrate: callback>>=
+<<MPI: VAMP2: vamp2 integrate: callback>>=
   if (request%group_master) then
      if (.not. self%request%is_master ())  &
           call allocate_handler (self%request, ch)
      call self%request%handle_and_release_workload (request)
   else
      call self%request%release_workload (request)
   end if
 @
-<<MPI: vamp2: vamp2 integrate: postsampling>>=
+<<MPI: VAMP2: vamp2 integrate: postsampling>>=
   call reduce_func_calls (func)
   call self%request%await_handler ()
   call self%request%barrier ()
   if (.not. self%request%is_master ()) cycle
 @
-<<MPI: vamp2: vamp2 integrate: procedures>>=
+<<MPI: VAMP2: vamp2 integrate: procedures>>=
   subroutine allocate_handler (req, ch)
     class(request_base_t), intent(inout) :: req
     integer, intent(in) :: ch
     class(request_handler_t), pointer :: vegas_result_handler
     call self%integrator(ch)%allocate_handler (&
          handler_id = ch,&
          handler = vegas_result_handler)
     call req%add_handler (ch, vegas_result_handler)
   end subroutine allocate_handler
 
   !! Advance the random number generator for the skipped channels.
   !!
   !! We set current_channel = request%handler_id, hence, we need to advance
   !! the random number generator until th iterator returns the same channel.
   subroutine update_iter_and_rng (request, iter, rng)
     type(request_t), intent(in) :: request
     type(iterator_t), intent(inout) :: iter
     class(rng_t), intent(inout) :: rng
     advance: do while (iter%is_iterable ())
        !! Advance up to iterator%end when in terminate mode,
        !! else advance until we hit the previous channel (i.e. request%handler_id - 1):
        !! Proof: current_channel <= request%handler_id - 1
        if (.not. request%terminate) then
           if (iter%get_current () >= request%handler_id) &
                exit advance
        end if
        select type (rng)
        type is (rng_stream_t)
           call rng%next_substream ()
        end select
        call iter%next_step ()
     end do advance
   end subroutine update_iter_and_rng
 
   subroutine reduce_func_calls (func)
     class(vamp2_func_t), intent(inout) :: func
     type(MPI_COMM) :: comm
     integer :: root_n_calls
     call self%request%get_external_comm (comm)
     call MPI_reduce (func%n_calls, root_n_calls, 1, MPI_INTEGER, &
          MPI_SUM, 0, comm)
     if (self%request%is_master ()) then
        func%n_calls = root_n_calls
     else
        call func%reset_n_calls ()
     end if
   end subroutine reduce_func_calls
 @
 @ Prepeare current integration's iteration.
 
 Prepare iteration, i.e. provide weights and grids to function object.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, private :: prepare_integrate_iteration => &
        vamp2_prepare_integrate_iteration
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_prepare_integrate_iteration (self, func)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_func_t), intent(inout) :: func
-  <<vamp2: vamp2 prepare integrate iteration: body>>
+  <<VAMP2: vamp2 prepare integrate iteration: body>>
     call fill_func_with_weights_and_grids (func)
   contains
     subroutine fill_func_with_weights_and_grids (func)
       class(vamp2_func_t), intent(inout) :: func
       integer :: ch
       do ch = 1, self%config%n_channel
          func%wi(ch) = self%weight(ch)
          !! \todo Use pointers instead of a deep copy.
          func%grids(ch) = self%integrator(ch)%get_grid ()
       end do
     end subroutine fill_func_with_weights_and_grids
-  <<vamp2: vamp2 prepare integrate iteration: procedures>>
+  <<VAMP2: vamp2 prepare integrate iteration: procedures>>
   end subroutine vamp2_prepare_integrate_iteration
 
 @ %def vamp2_prepare_integrate_iteration
-<<vamp2: vamp2 prepare integrate iteration: body>>=
+<<VAMP2: vamp2 prepare integrate iteration: body>>=
 @
-<<vamp2: vamp2 prepare integrate iteration: procedures>>=
+<<VAMP2: vamp2 prepare integrate iteration: procedures>>=
 @
-<<MPI: vamp2: vamp2 prepare integrate iteration: body>>=
+<<MPI: VAMP2: vamp2 prepare integrate iteration: body>>=
   if (.not. allocated (self%request)) then
      call msg_bug ("VAMP2: prepare integration iteration failed: unallocated request.")
   end if
   call broadcast_weights_and_grids ()
   select type (req => self%request)
   type is (request_simple_t)
      call req%update (self%integrator%is_parallelizable ())
      call init_all_handler (req)
      call call_all_handler (req)
      !! Add all handlers, call all handlers.
   type is (request_caller_t)
      if (req%is_master ()) then
         call req%update_balancer (self%weight, self%integrator%is_parallelizable ())
         call init_all_handler (req)
      else
         call self%request%reset ()
      end if
   class default
      call msg_bug ("VAMP2: prepare integration iteration failed: unknown request type.")
   end select
 @
-<<MPI: vamp2: vamp2 prepare integrate iteration: procedures>>=
+<<MPI: VAMP2: vamp2 prepare integrate iteration: procedures>>=
   subroutine broadcast_weights_and_grids ()
     type(vegas_grid_t) :: grid
     type(MPI_COMM) :: comm
     integer :: ch
     call self%request%get_external_comm (comm)
     call MPI_BCAST (self%weight, self%config%n_channel, &
          MPI_DOUBLE_PRECISION, 0, comm)
     do ch = 1, self%config%n_channel
        grid = self%integrator(ch)%get_grid ()
        call grid%broadcast (comm)
        call self%integrator(ch)%set_grid (grid)
     end do
     call self%set_calls (self%config%n_calls)
   end subroutine broadcast_weights_and_grids
 
   subroutine init_all_handler (req)
     class(request_base_t), intent(inout) :: req
     class(request_handler_t), pointer :: vegas_result_handler
     integer :: ch
     !! The master worker needs always all handler (callback objects)
     !! in order to perform the communication to the client handler (callbacks).
     if (.not. req%is_master ()) return
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%allocate_handler (&
             handler_id = ch,&
             handler = vegas_result_handler)
        call req%add_handler (ch, vegas_result_handler)
     end do
   end subroutine init_all_handler
 
   subroutine call_all_handler (req)
     class(request_base_t), intent(inout) :: req
     integer :: ch
     if (.not. req%is_master ()) return
     do ch = 1, self%config%n_channel
        select type (req)
        type is (request_simple_t)
           call req%call_handler (handler_id = ch, &
                source_rank = req%get_request_master (ch))
        end select
     end do
   end subroutine call_all_handler
 @
 @ Compute the result and efficiency of the current status of the integrator.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, private :: compute_result_and_efficiency => &
        vamp2_compute_result_and_efficiency
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_compute_result_and_efficiency (self)
     class(vamp2_t), intent(inout) :: self
     real(default) :: total_integral, total_variance
     real(default) :: max_abs_f_pos, max_abs_f_neg, &
          sum_abs_f_pos, sum_abs_f_neg
     call compute_integral_and_variance (total_integral, total_variance)
     call self%result%update (total_integral, total_variance)
     call compute_efficiency (max_pos = max_abs_f_pos, max_neg = max_abs_f_neg, &
          sum_pos = sum_abs_f_pos, sum_neg = sum_abs_f_neg)
     !! Do not average of number of calls, we have already averaged the efficiencies of all channels.
     call self%result%update_efficiency (n_calls  = 1, &
          max_pos = max_abs_f_pos, max_neg = max_abs_f_neg, &
          sum_pos = sum_abs_f_pos, sum_neg = sum_abs_f_neg)
   contains
     subroutine compute_integral_and_variance (integral, variance)
       real(default), intent(out) :: integral, variance
       real(default) :: sq_integral
       integral = dot_product (self%weight, self%integrator%get_integral ())
       sq_integral = dot_product (self%weight, self%integrator%get_integral ()**2)
       variance = self%config%n_calls * dot_product (self%weight**2, self%integrator%get_variance ())
       variance = sqrt (variance + sq_integral)
       variance = 1._default / self%config%n_calls * &
            & (variance + integral) * (variance - integral)
     end subroutine compute_integral_and_variance
 
     !> We compute the weight-averaged sum and maximum of the channel (integration) weights \f$w_{i,c}\f$.
     !!
     !! The averaged integration weight and maximum are
     !! \f[
     !!  \langle w \rangle = \sum_i \alpha_i \frac{\sum_j w_{i, j}}{N_i},
     !! \f]
     !! \f[
     !!  \langle \max w \rangle = \sum_i \alpha_i |\max_j w_{i, j}|.
     !! \f]
     subroutine compute_efficiency (max_pos, max_neg, &
          sum_pos, sum_neg)
       real(default), intent(out) :: max_pos, max_neg
       real(default), intent(out) :: sum_pos, sum_neg
       max_abs_f_pos = dot_product (self%weight, self%integrator%get_max_abs_f_pos ())
       max_abs_f_neg = dot_product (self%weight, self%integrator%get_max_abs_f_neg ())
       sum_abs_f_pos = dot_product (self%weight, &
            self%integrator%get_sum_abs_f_pos () / self%integrator%get_calls ())
       sum_abs_f_neg = dot_product (self%weight, &
            self%integrator%get_sum_abs_f_neg () / self%integrator%get_calls ())
     end subroutine compute_efficiency
   end subroutine vamp2_compute_result_and_efficiency
 
 @ %def vamp2_compute_result_and_efficiency
 @ Generate event from multi-channel weight $w(x) = f(x) / g(x)$.
 
 We select a channel using the a-priori weights and $f_{i}^{\text{max}}$, to
 flatten possible unbalanced channel weight(s).
 
 An additional rescale factor [[opt_event_rescale]] is applied to [[f_max]], iff set.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: generate_weighted => vamp2_generate_weighted_event
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_generate_weighted_event (&
        self, func, rng, x)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out)  :: x
     integer :: ch, i
     real(default) :: r
     if (.not. self%event_prepared) then
        call prepare_event ()
     end if
     call rng%generate (r)
     nchannel: do ch = 1, self%config%n_channel
        r = r - self%event_weight(ch)
        if (r <= 0._default) exit nchannel
     end do nchannel
     ch = min (ch, self%config%n_channel)
     call func%set_channel (ch)
     call self%integrator(ch)%generate_weighted (func, rng, x)
     ! Norm weight by f_max, hidden in event_weight(ch), else by 1
     self%result%evt_weight = self%integrator(ch)%get_evt_weight () &
          * self%weight(ch) / self%event_weight(ch)
   contains
-  <<vamp2: vamp2 generate event: procedures>>
+  <<VAMP2: vamp2 generate event: procedures>>
   end subroutine vamp2_generate_weighted_event
 
 @ %def vamp2_generate_weighted_event
 @ Generate unweighted events.
 
 After selecting a channel $ch$ by the inversion method using a random number $r \in [0, 1]$
 \begin{align}
   F^{-1}(r) := \operatorname{inf} \{ c \in \{1, \dots, N_c\} | F(c) \geq r \},
   F(c) = \sum_{c^\prime \leq c} \alpha_c^\prime,
 \end{align}
 we try for an event from the previously selected channel.
 If the event is rejected, we also reject the selected channel.
 
 The inversion method is implemented as a loop over the channel weights \(\alpha_i\) until \(\sum_{c}^{c^prime} \alpha_c - r \leq 0\), the last value of the loop index [[ch]] is then \(c^\prime\).
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: generate_unweighted => vamp2_generate_unweighted_event
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_generate_unweighted_event ( &
        & self, func, rng, x, opt_event_rescale)
     class(vamp2_t), intent(inout) :: self
     class(vamp2_func_t), intent(inout) :: func
     class(rng_t), intent(inout) :: rng
     real(default), dimension(self%config%n_dim), intent(out)  :: x
     real(default), intent(in), optional :: opt_event_rescale
     integer :: ch, i
     real(default) :: r, max_abs_f, event_rescale
     event_rescale = 1._default
     if (present (opt_event_rescale)) then
        event_rescale = opt_event_rescale
     end if
     if (.not. self%event_prepared) then
        call prepare_event ()
     end if
     generate: do
        call rng%generate (r)
        nchannel: do ch = 1, self%config%n_channel
           r = r - self%event_weight(ch)
           if (r <= 0._default) exit nchannel
        end do nchannel
        ch = min (ch, self%config%n_channel)
        call func%set_channel (ch)
        call self%integrator(ch)%generate_weighted (func, rng, x)
        self%result%evt_weight = self%integrator(ch)%get_evt_weight ()
        max_abs_f = merge ( &
             self%integrator(ch)%get_max_abs_f_pos (), &
             self%integrator(ch)%get_max_abs_f_neg (), &
             self%result%evt_weight > 0.)
        self%result%evt_weight_excess = 0._default
        if (self%result%evt_weight > max_abs_f) then
           self%result%evt_weight_excess = self%result%evt_weight / max_abs_f - 1._default
           exit generate
        end if
        call rng%generate (r)
        ! Do not use division, because max_abs_f could be zero.
        if (event_rescale * max_abs_f * r <= abs(self%result%evt_weight)) then
           exit generate
        end if
     end do generate
   contains
-  <<vamp2: vamp2 generate event: procedures>>
+  <<VAMP2: vamp2 generate event: procedures>>
   end subroutine vamp2_generate_unweighted_event
 
 @ %def vamp2_generate_event
 Prepare event generation. We have to set the channel weights and the grids for
 the integrand's object.
 
 We use an ansatz proposed by T. Ohl in the original VAMP code where we do not have to accept on
 \begin{equation*}
    \frac{w_i(x)}{\sum_i \alpha_i \operatorname*{max}_{x} w_i(x)},
 \end{equation*}
 after we have selected a channel by the weights $\alpha_i$. But rather, we use a
 more efficient way where we rescale the channel weights $\alpha_i$
 \begin{equation*}
    \alpha_i \rightarrow \alpha_i \frac{\operatorname*{max}_x w_i(x)}{\sum_i \alpha_i \operatorname*{max}_{x} w_i(x)}.
 \end{equation*}
 The overall magic is to insert a "1" and to  move the uneasy part into the channel selection,
 such that we can generate events likewise in the single channel mode.
 We generate an unweighted event by
 \begin{equation*}
    \frac{w_i(x)}{\operatorname*{max}_{x} w_i{x}},
 \end{equation*}
 after we have selected a channel by the rescaled event channel weights.
 The overall normalization $\operatorname*{max}_{i, x}$ is not needed because we normalize the event channel weights
 to one and therefore the overall normalization cancels.
-<<vamp2: vamp2 generate event: procedures>>=
+<<VAMP2: vamp2 generate event: procedures>>=
   subroutine prepare_event ()
     integer :: i
     self%event_prepared = .false.
     do i = 1, self%config%n_channel
        func%wi(i) = self%weight(i)
        func%grids(i) = self%integrator(i)%get_grid ()
     end do
     if (any (self%integrator%get_max_abs_f () > 0)) then
        self%event_weight = self%weight * self%integrator%get_max_abs_f ()
     else
        self%event_weight = self%weight
     end if
     self%event_weight = self%event_weight / sum (self%event_weight)
     self%event_prepared = .true.
   end subroutine prepare_event
 
 @ %def prepare_event
 @ Write grids to unit.
-<<vamp2: parameters>>=
+<<VAMP2: parameters>>=
   character(len=*), parameter, private :: &
      descr_fmt =         "(1X,A)", &
      integer_fmt =       "(1X,A18,1X,I15)", &
      integer_array_fmt = "(1X,I18,1X,I15)", &
      logical_fmt =       "(1X,A18,1X,L1)", &
      double_fmt =        "(1X,A18,1X,E24.16E4)", &
      double_array_fmt =  "(1X,I18,1X,E24.16E4)", &
      double_array_pac_fmt = "(1X,I18,1X,E16.8E4)", &
      double_array2_fmt = "(1X,2(1X,I8),1X,E24.16E4)", &
      double_array2_pac_fmt = "(1X,2(1X,I8),1X,E16.8E4)"
 @ %def descr_fmt integer_fmt integer_array_fmt logical_fmt
 @ %def double_fmt double_array_fmt double_array2_fmt
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: write_grids => vamp2_write_grids
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_write_grids (self, unit)
     class(vamp2_t), intent(in) :: self
     integer, intent(in), optional :: unit
     integer :: u
     integer :: ch
     u = given_output_unit (unit)
     write (u, descr_fmt) "begin type(vamp2_t)"
     write (u, integer_fmt) "n_channel =", self%config%n_channel
     write (u, integer_fmt) "n_dim =", self%config%n_dim
     write (u, integer_fmt) "n_calls_min_ch =", self%config%n_calls_min_per_channel
     write (u, integer_fmt) "n_calls_thres =", self%config%n_calls_threshold
     write (u, integer_fmt) "n_chains =", self%config%n_chains
     write (u, logical_fmt) "stratified =", self%config%stratified
     write (u, double_fmt) "alpha =", self%config%alpha
     write (u, double_fmt) "beta =", self%config%beta
     write (u, integer_fmt) "n_bins_max =", self%config%n_bins_max
     write (u, integer_fmt) "iterations =", self%config%iterations
     write (u, integer_fmt) "n_calls =", self%config%n_calls
     write (u, integer_fmt) "it_start =", self%result%it_start
     write (u, integer_fmt) "it_num =", self%result%it_num
     write (u, integer_fmt) "samples =", self%result%samples
     write (u, double_fmt) "sum_int_wgtd =", self%result%sum_int_wgtd
     write (u, double_fmt) "sum_wgts =", self%result%sum_wgts
     write (u, double_fmt) "sum_chi =", self%result%sum_chi
     write (u, double_fmt) "chi2 =", self%result%chi2
     write (u, double_fmt) "efficiency =", self%result%efficiency
     write (u, double_fmt) "efficiency_pos =", self%result%efficiency_pos
     write (u, double_fmt) "efficiency_neg =", self%result%efficiency_neg
     write (u, double_fmt) "max_abs_f =", self%result%max_abs_f
     write (u, double_fmt) "max_abs_f_pos =", self%result%max_abs_f_pos
     write (u, double_fmt) "max_abs_f_neg =", self%result%max_abs_f_neg
     write (u, double_fmt) "result =", self%result%result
     write (u, double_fmt) "std =", self%result%std
     write (u, descr_fmt) "begin weight"
     do ch = 1, self%config%n_channel
        write (u, double_array_fmt) ch, self%weight(ch)
     end do
     write (u, descr_fmt) "end weight"
     if (self%config%n_chains > 0) then
        write (u, descr_fmt) "begin chain"
        do ch = 1, self%config%n_channel
           write (u, integer_array_fmt) ch, self%chain(ch)
        end do
        write (u, descr_fmt) "end chain"
     end if
     write (u, descr_fmt) "begin integrator"
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%write_grid (unit)
     end do
     write (u, descr_fmt) "end integrator"
     write (u, descr_fmt) "end type(vamp2_t)"
   end subroutine vamp2_write_grids
 
 @ %def vamp2_write_grids
 @ Read grids from unit.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
   procedure, public :: read_grids => vamp2_read_grids
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_read_grids (self, unit)
     class(vamp2_t), intent(out) :: self
     integer, intent(in), optional :: unit
     integer :: u
     integer :: ibuffer, jbuffer, ch
     character(len=80) :: buffer
     read (unit, descr_fmt) buffer
     read (unit, integer_fmt) buffer, ibuffer
     read (unit, integer_fmt) buffer, jbuffer
     select type (self)
     type is (vamp2_t)
        self = vamp2_t (n_channel = ibuffer, n_dim = jbuffer)
     end select
     read (unit, integer_fmt) buffer, self%config%n_calls_min_per_channel
     read (unit, integer_fmt) buffer, self%config%n_calls_threshold
     read (unit, integer_fmt) buffer, self%config%n_chains
     read (unit, logical_fmt) buffer, self%config%stratified
     read (unit, double_fmt) buffer, self%config%alpha
     read (unit, double_fmt) buffer, self%config%beta
     read (unit, integer_fmt) buffer, self%config%n_bins_max
     read (unit, integer_fmt) buffer, self%config%iterations
     read (unit, integer_fmt) buffer, self%config%n_calls
     read (unit, integer_fmt) buffer, self%result%it_start
     read (unit, integer_fmt) buffer, self%result%it_num
     read (unit, integer_fmt) buffer, self%result%samples
     read (unit, double_fmt) buffer, self%result%sum_int_wgtd
     read (unit, double_fmt) buffer, self%result%sum_wgts
     read (unit, double_fmt) buffer, self%result%sum_chi
     read (unit, double_fmt) buffer, self%result%chi2
     read (unit, double_fmt) buffer, self%result%efficiency
     read (unit, double_fmt) buffer, self%result%efficiency_pos
     read (unit, double_fmt) buffer, self%result%efficiency_neg
     read (unit, double_fmt) buffer, self%result%max_abs_f
     read (unit, double_fmt) buffer, self%result%max_abs_f_pos
     read (unit, double_fmt) buffer, self%result%max_abs_f_neg
     read (unit, double_fmt) buffer, self%result%result
     read (unit, double_fmt) buffer, self%result%std
     read (unit, descr_fmt) buffer
     do ch = 1, self%config%n_channel
        read (unit, double_array_fmt) ibuffer, self%weight(ch)
     end do
     read (unit, descr_fmt) buffer
     if (self%config%n_chains > 0) then
        read (unit, descr_fmt) buffer
        do ch = 1, self%config%n_channel
           read (unit, integer_array_fmt) ibuffer, self%chain(ch)
        end do
        read (unit, descr_fmt) buffer
     end if
     read (unit, descr_fmt) buffer
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%read_grid (unit)
     end do
     read (unit, descr_fmt) buffer
     read (unit, descr_fmt) buffer
   end subroutine vamp2_read_grids
 
 @ %def vamp2_read_grids
 @ Read and write grids from an unformatted file.
-<<vamp2: vamp2: TBP>>=
+<<VAMP2: vamp2: TBP>>=
 procedure :: write_binary_grids => vamp2_write_binary_grids
 procedure :: read_binary_grids => vamp2_read_binary_grids
-<<vamp2: procedures>>=
+<<VAMP2: procedures>>=
   subroutine vamp2_write_binary_grids (self, unit)
     class(vamp2_t), intent(in) :: self
     integer, intent(in) :: unit
     integer :: ch
     write (unit)
     write (unit) self%config%n_channel
     write (unit) self%config%n_dim
     write (unit) self%config%n_calls_min_per_channel
     write (unit) self%config%n_calls_threshold
     write (unit) self%config%n_chains
     write (unit) self%config%stratified
     write (unit) self%config%alpha
     write (unit) self%config%beta
     write (unit) self%config%n_bins_max
     write (unit) self%config%iterations
     write (unit) self%config%n_calls
     write (unit) self%result%it_start
     write (unit) self%result%it_num
     write (unit) self%result%samples
     write (unit) self%result%sum_int_wgtd
     write (unit) self%result%sum_wgts
     write (unit) self%result%sum_chi
     write (unit) self%result%chi2
     write (unit) self%result%efficiency
     write (unit) self%result%efficiency_pos
     write (unit) self%result%efficiency_neg
     write (unit) self%result%max_abs_f
     write (unit) self%result%max_abs_f_pos
     write (unit) self%result%max_abs_f_neg
     write (unit) self%result%result
     write (unit) self%result%std
     do ch = 1, self%config%n_channel
        write (unit) ch, self%weight(ch)
     end do
     if (self%config%n_chains > 0) then
        do ch = 1, self%config%n_channel
           write (unit) ch, self%chain(ch)
        end do
     end if
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%write_binary_grid (unit)
     end do
   end subroutine vamp2_write_binary_grids
 
   subroutine vamp2_read_binary_grids (self, unit)
     class(vamp2_t), intent(out) :: self
     integer, intent(in) :: unit
     integer :: ch, ibuffer, jbuffer
     read (unit)
     read (unit) ibuffer
     read (unit) jbuffer
     select type (self)
     type is (vamp2_t)
        self = vamp2_t (n_channel = ibuffer, n_dim = jbuffer)
     end select
     read (unit) self%config%n_calls_min_per_channel
     read (unit) self%config%n_calls_threshold
     read (unit) self%config%n_chains
     read (unit) self%config%stratified
     read (unit) self%config%alpha
     read (unit) self%config%beta
     read (unit) self%config%n_bins_max
     read (unit) self%config%iterations
     read (unit) self%config%n_calls
     read (unit) self%result%it_start
     read (unit) self%result%it_num
     read (unit) self%result%samples
     read (unit) self%result%sum_int_wgtd
     read (unit) self%result%sum_wgts
     read (unit) self%result%sum_chi
     read (unit) self%result%chi2
     read (unit) self%result%efficiency
     read (unit) self%result%efficiency_pos
     read (unit) self%result%efficiency_neg
     read (unit) self%result%max_abs_f
     read (unit) self%result%max_abs_f_pos
     read (unit) self%result%max_abs_f_neg
     read (unit) self%result%result
     read (unit) self%result%std
     do ch = 1, self%config%n_channel
        read (unit) ibuffer, self%weight(ch)
     end do
     if (self%config%n_chains > 0) then
        do ch = 1, self%config%n_channel
           read (unit) ibuffer, self%chain(ch)
        end do
     end if
     do ch = 1, self%config%n_channel
        call self%integrator(ch)%read_binary_grid (unit)
     end do
   end subroutine vamp2_read_binary_grids
 
 @ %def vamp2_write_binary_grids, vamp2_read_binary_grids
 @
 \section{Unit tests}
 \label{sec:unit-tests}
 
 Test module, followed by the corresponding implementation module.
 
 <<[[vamp2_ut.f90]]>>=
 <<File header>>
 
 module vamp2_ut
   use unit_tests
   use vamp2_uti
 
 <<Standard module head>>
 
-<<vamp2: public test>>
+<<VAMP2: public test>>
 
 contains
-<<vamp2: test driver>>
+<<VAMP2: test driver>>
 end module vamp2_ut
 @ %def vamp2_ut
 @
 <<[[vamp2_uti.f90]]>>=
 <<File header>>
 
 module vamp2_uti
 <<Use kinds>>
   use io_units
   use constants, only: pi
   use numeric_utils, only: nearly_equal
   use format_defs, only: FMT_12
   use rng_base
   use rng_stream
   use vegas, only: vegas_func_t, vegas_grid_t, operator(==)
   use vamp2
 
 <<Standard module head>>
 
-<<vamp2: test declaration>>
+<<VAMP2: test declaration>>
 
-<<vamp2: test types>>
+<<VAMP2: test types>>
 
 contains
-<<vamp2: tests>>
+<<VAMP2: tests>>
 end module vamp2_uti
 @ %def vamp2_uti
 @ API: driver for the unit tests below.
-<<vamp2: public test>>=
+<<VAMP2: public test>>=
   public :: vamp2_test
-<<vamp2: test driver>>=
+<<VAMP2: test driver>>=
   subroutine vamp2_test (u, results)
     integer, intent(in) :: u
     type(test_results_t), intent(inout) :: results
-    <<vamp2: execute tests>>
+    <<VAMP2: execute tests>>
   end subroutine vamp2_test
 
 @ %def vamp2_test
 @
 \subsubsection{Test function}
 \label{sec:test-function}
 
 We use the example from the Monte Carlo Examples of the GSL library
 \begin{equation}
   I = \int_{-pi}^{+pi} {dk_x/(2 pi)}  \int_{-pi}^{+pi} {dk_y/(2 pi)}  \int_{-pi}^{+pi} {dk_z/(2 pi)}  1 / (1 - cos(k_x)cos(k_y)cos(k_z)).
 \end{equation}
 The integral is reduced to region (0,0,0) $\rightarrow$ ($\pi$, $\pi$, $\pi$) and multiplied by 8.
-<<vamp2: test types>>=
+<<VAMP2: test types>>=
    type, extends (vamp2_func_t) :: vamp2_test_func_t
      !
    contains
-   <<vamp2: vamp2 test func: TBP>>
+   <<VAMP2: vamp2 test func: TBP>>
 end type vamp2_test_func_t
 
 @ %def vegas_test_func_t
 @
-<<vamp2: vamp2 test func: TBP>>=
+<<VAMP2: vamp2 test func: TBP>>=
   procedure, public :: evaluate_maps => vamp2_test_func_evaluate_maps
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   subroutine vamp2_test_func_evaluate_maps (self, x)
     class(vamp2_test_func_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     self%xi(:, 1) = x
     self%det(1) = 1
     self%valid_x = .true.
   end subroutine vamp2_test_func_evaluate_maps
 
 @ %def vamp2_test_func_evaluate_maps
 @ Evaluate the integrand.
-<<vamp2: vamp2 test func: TBP>>=
+<<VAMP2: vamp2 test func: TBP>>=
   procedure, public :: evaluate_func => vamp2_test_func_evaluate
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   real(default) function vamp2_test_func_evaluate (self, x) result (f)
     class(vamp2_test_func_t), intent(in) :: self
     real(default), dimension(:), intent(in) :: x
     f = 1.0 / (pi**3)
     f = f / ( 1.0 - cos (x(1)) * cos (x(2)) * cos (x(3)))
   end function vamp2_test_func_evaluate
 
 @ %def vamp2_test_func_evaluate
 @ The second test function implements
 \begin{equation}
   f(\vec{x}) = 4 \sin^{2}(\pi x_{1})\sin^{2}(\pi x_{2}) + 2\sin^2(\pi v),
 \end{equation}
 where
 \begin{align}
   x = u^{v} & y = u^{1 - v} \\
   u = xy & v = \frac{1}{2} \left( 1 + \frac{\log(x/y}{\log(xy)} \right).
 \end{align}
 The jacobian is $\frac{\partial (x, y)}{\partial (u, v)}$.
 
-<<vamp2: test types>>=
+<<VAMP2: test types>>=
   type, extends(vamp2_func_t) :: vamp2_test_func_2_t
      !
    contains
-   <<vamp2: vamp2 test func 2: TBP>>
+   <<VAMP2: vamp2 test func 2: TBP>>
   end type vamp2_test_func_2_t
 
 @ %def vamp2_test_func_2_t
 @ Evaluate maps.
-<<vamp2: vamp2 test func 2: TBP>>=
+<<VAMP2: vamp2 test func 2: TBP>>=
   procedure :: evaluate_maps => vamp2_test_func_2_evaluate_maps
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   subroutine vamp2_test_func_2_evaluate_maps (self, x)
     class(vamp2_test_func_2_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     select case (self%current_channel)
     case (1)
        self%xi(:, 1) = x
        self%xi(1, 2) = x(1) * x(2)
        self%xi(2, 2) = 0.5 * ( 1. + log(x(1) / x(2)) / log(x(1) * x(2)))
     case (2)
        self%xi(1, 1) = x(1)**x(2)
        self%xi(2, 1) = x(1)**(1. - x(2))
        self%xi(:, 2) = x
     end select
     self%det(1) = 1.
     self%det(2) = abs (log(self%xi(1, 2)))
     self%valid_x = .true.
   end subroutine vamp2_test_func_2_evaluate_maps
 
 @ %def vamp2_test_func_2_evaluate_maps
 @ Evaluate func.
-<<vamp2: vamp2 test func 2: TBP>>=
+<<VAMP2: vamp2 test func 2: TBP>>=
   procedure :: evaluate_func => vamp2_test_func_2_evaluate_func
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   real(default) function vamp2_test_func_2_evaluate_func (self, x) result (f)
     class(vamp2_test_func_2_t), intent(in) :: self
     real(default), dimension(:), intent(in) :: x
     f = 4. * sin(pi * self%xi(1, 1))**2 * sin(pi * self%xi(2, 1))**2 &
          + 2. * sin(pi * self%xi(2, 2))**2
   end function vamp2_test_func_2_evaluate_func
 
 @ %def vamp2_test_func_2_evaluate_func
 @ The third test function implements
 \begin{equation}
   f(\vec{x}) = 5 x_{1}^4 + 5 (1 - x_{1})^4,
 \end{equation}
 where
 \begin{equation}
   x_1 = u^{1 / 5} \quad \vee \quad  x_1 =  1 - v^{1 / 5}
 \end{equation}
 The jacobians are $\frac{\partial x_1}{\partial u} = \frac{1}{5}
 u^{-\frac{4}{5}}$ and $\frac{\partial x_1}{\partial v} = \frac{1}{5}
 v^{-\frac{4}{5}}$.
-<<vamp2: test types>>=
+<<VAMP2: test types>>=
   type, extends(vamp2_func_t) :: vamp2_test_func_3_t
      !
    contains
-   <<vamp2: vamp2 test func 3: TBP>>
+   <<VAMP2: vamp2 test func 3: TBP>>
   end type vamp2_test_func_3_t
 
 @ %def vamp2_test_func_3_t
 @ Evaluate maps.
-<<vamp2: vamp2 test func 3: TBP>>=
+<<VAMP2: vamp2 test func 3: TBP>>=
   procedure :: evaluate_maps => vamp2_test_func_3_evaluate_maps
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   subroutine vamp2_test_func_3_evaluate_maps (self, x)
     class(vamp2_test_func_3_t), intent(inout) :: self
     real(default), dimension(:), intent(in) :: x
     real(default) :: u, v, xx
     select case (self%current_channel)
     case (1)
        u = x(1)
        xx = u**0.2_default
        v = (1 - xx)**5._default
     case (2)
        v = x(1)
        xx = 1 - v**0.2_default
        u = xx**5._default
     end select
     self%det(1) = 0.2_default * u**(-0.8_default)
     self%det(2) = 0.2_default * v**(-0.8_default)
     self%xi(:, 1) = [u]
     self%xi(:, 2) = [v]
     self%valid_x = .true.
   end subroutine vamp2_test_func_3_evaluate_maps
 
 @ %def vamp2_test_func_3_evaluate_maps
 @ Evaluate func.
-<<vamp2: vamp2 test func 3: TBP>>=
+<<VAMP2: vamp2 test func 3: TBP>>=
   procedure :: evaluate_func => vamp2_test_func_3_evaluate_func
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   real(default) function vamp2_test_func_3_evaluate_func (self, x) result (f)
     class(vamp2_test_func_3_t), intent(in) :: self
     real(default), dimension(:), intent(in) :: x
     real(default) :: xx
     select case (self%current_channel)
     case (1)
        xx = x(1)**0.2_default
     case (2)
        xx = 1 - x(1)**0.2_default
     end select
     f = 5 * xx**4 + 5 * (1 - xx)**4
   end function vamp2_test_func_3_evaluate_func
 
 @ %def vamp2_test_func_3_evaluate_func
 @
 \subsubsection{MC Integrator check}
 \label{sec:mc-integrator-check}
 
 We reproduce the first test case of VEGAS.
 Initialise the VAMP2 MC integrator and call to [[vamp2_init_grid]] for the initialisation of the grid.
-<<vamp2: execute tests>>=
+<<VAMP2: execute tests>>=
   call test (vamp2_1, "vamp2_1", "VAMP2 initialisation and&
        & grid preparation", u, results)
-<<vamp2: test declaration>>=
+<<VAMP2: test declaration>>=
   public :: vamp2_1
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   subroutine vamp2_1 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(3), parameter :: x_lower = 0., &
          x_upper = pi
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vamp2_1"
     write (u, "(A)") "*   Purpose: initialise the VAMP2 MC integrator and the grid"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 1 and n_dim = 3"
     write (u, "(A)")
 
     allocate (vamp2_test_func_t :: func)
     call func%init (n_dim = 3, n_channel = 1)
     mc_integrator = vamp2_t (1, 3)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (10000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u, "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (2000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_1
 
 @ %def vamp2_1
 @ Integrate a function with two-dimensional argument and two channels.
-<<vamp2: execute tests>>=
+<<VAMP2: execute tests>>=
   call test (vamp2_2, "vamp2_2", "VAMP2 intgeration of two-dimensional &
        & function with two channels", u, results)
-<<vamp2: test declaration>>=
+<<VAMP2: test declaration>>=
   public :: vamp2_2
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   subroutine vamp2_2 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(2), parameter :: x_lower = 0., &
          x_upper = 1.
     real(default) :: result, abserr
 
     write (u, "(A)") "* Test output: vamp2_2"
     write (u, "(A)") "*   Purpose:  intgeration of two-dimensional &
        & function with two channels"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 1 and n_dim = 3"
     write (u, "(A)")
 
     allocate (vamp2_test_func_2_t :: func)
     call func%init (n_dim = 2, n_channel = 2)
     mc_integrator = vamp2_t (2, 2)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 10000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (1000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, verbose = .true., result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 2000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (200)
     call mc_integrator%integrate (func, rng, 3, verbose = .true., result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_2
 
 @ %def vamp2_2
 @ Integrate a function with two-dimensional argument and two channels.
-<<vamp2: execute tests>>=
+<<VAMP2: execute tests>>=
   call test (vamp2_3, "vamp2_3", "VAMP2 intgeration of two-dimensional &
        & function with two channels", u, results)
-<<vamp2: test declaration>>=
+<<VAMP2: test declaration>>=
   public :: vamp2_3
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   subroutine vamp2_3 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(2), parameter :: x_lower = 0., &
          x_upper = 1.
     real(default) :: result, abserr
     integer :: unit
 
     write (u, "(A)") "* Test output: vamp2_3"
     write (u, "(A)") "*   Purpose:  intgeration of two-dimensional &
        & function with two channels"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 1 and n_dim = 3"
     write (u, "(A)")
 
     allocate (vamp2_test_func_2_t :: func)
     call func%init (n_dim = 2, n_channel = 2)
     mc_integrator = vamp2_t (2, 2)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 20000"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (20000)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 20000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Write grid to file vamp2_3.grids"
     write (u, "(A)")
 
     unit = free_unit ()
     open (unit, file = "vamp2_3.grids", &
          action = "write", status = "replace")
     call mc_integrator%write_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Read grid from file vamp2_3.grids"
     write (u, "(A)")
 
     call mc_integrator%final ()
 
     unit = free_unit ()
     open (unit, file = "vamp2_3.grids", &
          action = "read", status = "old")
     call mc_integrator%read_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 5000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (5000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_3
 
 @ %def vamp2_3
 @ Integrate a function with two-dimensional argument and two channels.
 Use chained weights, although we average over each weight itself.
-<<vamp2: execute tests>>=
+<<VAMP2: execute tests>>=
   call test (vamp2_4, "vamp2_4", "VAMP2 intgeration of two-dimensional &
        & function with two channels with chains", u, results)
-<<vamp2: test declaration>>=
+<<VAMP2: test declaration>>=
   public :: vamp2_4
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   subroutine vamp2_4 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(2), parameter :: x_lower = 0., &
          x_upper = 1.
     real(default) :: result, abserr
     integer :: unit
 
     write (u, "(A)") "* Test output: vamp2_4"
     write (u, "(A)") "*   Purpose:  intgeration of two-dimensional &
        & function with two channels with chains"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 2 and n_dim = 2"
     write (u, "(A)")
 
     allocate (vamp2_test_func_2_t :: func)
     call func%init (n_dim = 2, n_channel = 2)
     mc_integrator = vamp2_t (2, 2)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 20000 and set chains"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (20000)
     call mc_integrator%set_chain (2, [1, 2])
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 10000 (Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Write grid to file vamp2_4.grids"
     write (u, "(A)")
 
     unit = free_unit ()
     open (unit, file = "vamp2_4.grids", &
          action = "write", status = "replace")
     call mc_integrator%write_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Read grid from file vamp2_4.grids"
     write (u, "(A)")
 
     call mc_integrator%final ()
 
     unit = free_unit ()
     open (unit, file = "vamp2_4.grids", &
          action = "read", status = "old")
     call mc_integrator%read_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 5000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (5000)
     call mc_integrator%integrate (func, rng, 3, result=result, abserr=abserr)
     write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ")") "Result: ", result, " +/- ", abserr
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_4
 
 @ %def vamp2_4
 @
-<<vamp2: execute tests>>=
+<<VAMP2: execute tests>>=
   call test (vamp2_5, "vamp2_5", "VAMP2 intgeration of two-dimensional &
        & function with two channels with equivalences", u, results)
-<<vamp2: test declaration>>=
+<<VAMP2: test declaration>>=
   public :: vamp2_5
-<<vamp2: tests>>=
+<<VAMP2: tests>>=
   subroutine vamp2_5 (u)
     integer, intent(in) :: u
     type(vamp2_t) :: mc_integrator
     class(rng_t), allocatable :: rng
     class(vamp2_func_t), allocatable :: func
     real(default), dimension(1), parameter :: x_lower = 0., &
          x_upper = 1.
     real(default) :: result, abserr
     integer :: unit
     type(vamp2_config_t) :: config
     type(vamp2_equivalences_t) :: eqv
     type(vegas_grid_t), dimension(2) :: grid
 
     write (u, "(A)") "* Test output: vamp2_5"
     write (u, "(A)") "*   Purpose:  intgeration of two-dimensional &
        & function with two channels with equivalences"
     write (u, "(A)")
 
     write (u, "(A)") "* Initialise random number generator (default seed)"
     write (u, "(A)")
 
     allocate (rng_stream_t :: rng)
     call rng%init ()
 
     call rng%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise MC integrator with n_channel = 2 and n_dim = 1"
     write (u, "(A)")
 
     allocate (vamp2_test_func_3_t :: func)
     call func%init (n_dim = 1, n_channel = 2)
     config%equivalences = .true.
     mc_integrator = vamp2_t (n_channel = 2, n_dim = 1)
     call mc_integrator%set_config (config)
     call mc_integrator%write (u)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise grid with n_calls = 20000 and set chains"
     write (u, "(A)")
 
     call mc_integrator%set_limits (x_lower, x_upper)
     call mc_integrator%set_calls (20000)
 
     write (u, "(A)")
     write (u, "(A)") "* Initialise equivalences"
     write (u, "(A)")
 
     eqv = vamp2_equivalences_t (n_eqv = 4, n_channel = 2, n_dim = 1)
     call eqv%set_equivalence &
          (i_eqv = 1, dest = 2, src = 1, perm = [1], mode = [VEQ_IDENTITY])
     call eqv%set_equivalence &
          (i_eqv = 2, dest = 1, src = 2, perm = [1], mode = [VEQ_IDENTITY])
     call eqv%set_equivalence &
          (i_eqv = 3, dest = 1, src = 1, perm = [1], mode = [VEQ_IDENTITY])
     call eqv%set_equivalence &
          (i_eqv = 4, dest = 2, src = 2, perm = [1], mode = [VEQ_IDENTITY])
     call eqv%write (u)
     call mc_integrator%set_equivalences (eqv)
 
     write (u, "(A)")
     write (u, "(A)") &
          "* Integrate with n_it = 3 and n_calls = 10000 (Grid-only Adaptation)"
     write (u, "(A)")
 
     call mc_integrator%integrate (func, rng, 3, &
          adapt_weights = .false., result=result, abserr=abserr)
     if (nearly_equal &
          (result, 2.000_default, rel_smallness = 0.003_default)) then
        write (u,  "(2x,A)") "Result: 2.000 [ok]"
     else
        write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ",A)") &
             "Result: ", result, " +/- ", abserr, " [not ok]"
     end if
 
     write (u, "(A)")
     write (u, "(A)") "* Compare the grids of both channels"
     write (u, "(A)")
 
     grid(1) = mc_integrator%get_grid(channel = 1)
     grid(2) = mc_integrator%get_grid(channel = 2)
 
     write (u, "(2X,A,1X,L1)") "Equal grids =", (grid(1) == grid(2))
 
     write (u, "(A)")
     write (u, "(A)") "* Write grid to file vamp2_5.grids"
     write (u, "(A)")
 
     unit = free_unit ()
     open (unit, file = "vamp2_5.grids", &
          action = "write", status = "replace")
     call mc_integrator%write_grids (unit)
     close (unit)
 
     write (u, "(A)")
     write (u, "(A)") "* Integrate with n_it = 3 and n_calls = 5000 (Precision)"
     write (u, "(A)")
 
     call mc_integrator%set_calls (5000)
     call mc_integrator%integrate (func, rng, 3, adapt_weights = .false., &
          refine_grids = .false., result=result, abserr=abserr)
     if (nearly_equal &
          (result, 2.000_default, rel_smallness = 0.003_default)) then
        write (u,  "(2x,A)") "Result: 2.000 [ok]"
     else
        write (u,  "(2x,A," // FMT_12 // ",A," // FMT_12 // ",A)") &
             "Result: ", result, " +/- ", abserr, " [not ok]"
     end if
 
     write (u, "(A)")
     write (u, "(A)") "* Cleanup"
 
     call mc_integrator%final ()
     call rng%final ()
     deallocate (rng)
   end subroutine vamp2_5
 
 @
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{MPI Request}
 
 \begin{description}
 \item[Request] Direct interface for requests.
-\item[Balancer] Sublidiary interfaces and types for implementation into [[request]].
-\item[Callback] Direct interface for non-blocking communication in callback fashion used in subsidiary in [[request]].
+\item[Balancer] Sublidiary interfaces and types for implementation
+  into [[request]].
+\item[Callback] Direct interface for non-blocking communication in
+  callback fashion used in subsidiary in [[request]].
 \end{description}
 
 \subsection{Request Base}
 
 \begin{description}
-\item[request] Container type for current request, contains handler index and status of request.
-\item[request group cache] Caching for commnunicator groups, tries to reduce the costly calls to [[MPI_CREATE_COMM_GROUP]].
-\item[request base] Base type for request handling, defines procedures and basic request scheme for direct implementation into [[VAMP2]].
+\item[request] Container type for current request, contains handler
+  index and status of request.
+\item[request group cache] Caching for commnunicator groups, tries to
+  reduce the costly calls to [[MPI_CREATE_COMM_GROUP]].
+\item[request base] Base type for request handling, defines procedures
+  and basic request scheme for direct implementation into [[VAMP2]].
 \end{description}
 
-When a data type has an associated communicator field, it always must duplicate the communicator in order to ensure communication encapsulation.
-Only in special case (e.g. outside of object communication is required) the parent communicator should be directly used.
-For example, that is the case for the group cache, as it exports its newly created communicator to the outside of the request library.
+When a data type has an associated communicator field, it always must
+duplicate the communicator in order to ensure communication
+encapsulation. Only in special case (e.g. outside of object
+communication is required) the parent communicator should be directly
+used. For example, that is the case for the group cache, as it exports
+its newly created communicator to the outside of the request library.
 
 <<[[request_base.f90]]>>=
 <<File header>>
 module request_base
   use io_units
 
   use diagnostics
 
   use balancer_base
   use request_callback
   use mpi_f08 !NODEP!
 
-  implicit none
+<<Standard module head>>
+
+<<Request base: public>>
+
+<<Request base: types>>
+
+<<Request base: interfaces>>
 
-  private
+contains
+
+<<Request base: procedures>>
 
+end module request_base
+@ %def request_base
+@
+<<Request base: public>>=
+  public :: request_t
+<<Request base: types>>=
   type :: request_t
      integer :: handler_id = 0
      logical :: terminate = .false.
      logical :: group = .false.
      logical :: group_master = .false.
      logical :: callback = .false.
      type(MPI_COMM) :: comm
   end type request_t
 
+@ %def request_t
+@
+<<Request base: types>>=
   type :: request_group_cache_t
      private
      type(MPI_COMM) :: parent_comm
      type(MPI_GROUP) :: parent_group
      type(MPI_COMM) :: comm
      type(MPI_GROUP) :: group
      integer, dimension(:), allocatable :: rank
    contains
-     procedure :: init => request_group_cache_init
-     procedure :: reset => request_group_cache_reset
-     procedure :: update => request_group_cache_update
-     procedure :: get_comm => request_group_cache_get_comm
-     procedure :: is_master => request_group_cache_is_master
+   <<Request base: group cache: TBP>>
   end type request_group_cache_t
 
-  !> Base type for requesting and back calling.
-  !!
-  !! A short remark on the semantics of point-to-point communication (as its intended use in this implemenation):
-  !! MPI is strict regarding the order of the messages (for blocking communication), as those can not overtake each other, p. 41.
-  !! For non-blocking communication, the standard extends it such that the order depends on "the execution order of the calls that initiate the communication".
-  !! The non-overtaking requirement is then extended to this definition of order.
-  !!
-  !! Color method for tracking communication?
-  !! We restrict each slave to a single callback (at once), which can communicate during the next request computation.
-  !! Before handling the next (current) callback, the former one has to finish (with a wait call on the client-side).
-  !! On the server-side, we only test on the finishing, reporting a communication failure.
-  !!
-  !! Furthermore, the implementation has to take care that the order of the communication calls on the master and slave code always matches!
-  !!
-  !! Therefore, we need to secure the order of communication calls.
-  !! First, we let the master initiate all callback communication *before* polling.
-  !! This fixiates the order.
-  !! Second, we require that the implementation of the polling honors this order.
+@ %def request_group_cache_t
+@
+Base type for requesting and back calling.
+
+A short remark on the semantics of point-to-point communication (as
+its intended use in this implemenation): MPI is strict regarding the
+order of the messages (for blocking communication), as those can not
+overtake each other, p. 41. For non-blocking communication, the
+standard extends it such that the order depends on "the execution
+order of the calls that initiate the communication". The
+non-overtaking requirement is then extended to this definition of
+order. 
+
+Color method for tracking communication? We restrict each slave to a
+single callback (at once), which can communicate during the next
+request computation. Before handling the next (current) callback, the
+former one has to finish (with a wait call on the client-side). On the
+server-side, we only test on the finishing, reporting a communication
+failure. 
+
+Furthermore, the implementation has to take care that the order of
+the communication calls on the master and slave code always matches!
+
+Therefore, we need to secure the order of communication calls. First,
+we let the master initiate all callback communication *before*
+polling. This fixiates the order. Second, we require that the
+implementation of the polling honors this order.
+<<Request base: public>>=
+  public :: request_base_t
+<<Request base: types>>=
   type, abstract :: request_base_t
      type(MPI_COMM) :: comm
      type(MPI_COMM) :: external_comm !! communicator for use outside of request, however, just a duplicate of comm.
      class(balancer_base_t), allocatable :: balancer
      type(request_group_cache_t) :: cache
      type(request_handler_manager_t) :: handler
    contains
-     procedure :: base_init => request_base_init
-     procedure :: base_write => request_base_write
-     procedure(request_base_deferred_write), deferred :: write
-     procedure :: is_master => request_base_is_master
-     procedure(request_base_has_workers), deferred :: has_workers
-     procedure :: get_external_comm => request_base_get_external_comm
-     procedure :: add_balancer => request_base_add_balancer
-     procedure :: add_handler => request_base_add_handler
-     procedure :: reset => request_base_reset
-     procedure :: call_handler => request_base_call_handler
-     procedure :: call_client_handler => request_base_call_client_handler
-     procedure :: await_handler => request_base_await_handler
-     procedure :: barrier => request_base_barrier
-     procedure(request_base_request_workload), deferred :: request_workload
-     procedure(request_base_release_workload), deferred :: release_workload
-     procedure(request_base_handle_and_release_workload), deferred :: handle_and_release_workload
+   <<Request base: base: TBP>>
   end type request_base_t
 
+@ %def request_base_t
+@
+<<Request base: interfaces>>=
   !> The basic idea behind the request mechanism is that each associated worker can request a workload either from a predefined local stack, from local stealing or from a global queue.
   !!
   !! We note that it is not necessary to differentiate between master and worker on this level of abstraction.
   !! Hence, the request interface ignores any notion regarding a possible parallelization concept.
   abstract interface
      subroutine request_base_deferred_write (req, unit)
        import :: request_base_t
        class(request_base_t), intent(in) :: req
        integer, intent(in), optional :: unit
      end subroutine request_base_deferred_write
 
      !> Verify if request object has workers.
      !!
      !! An implementation shall return if there at least two workers, or otherwisely stated,
      !! one master and one slave at least, when both are used as computing ranks.
      logical function request_base_has_workers (req) result (flag)
        import :: request_base_t
        class(request_base_t), intent(in) :: req
      end function request_base_has_workers
+  end interface
 
+@ %def request_base_has_workers
+@
+<<Request base: interfaces>>=
      !> Request workload and returns an request_t object.
      !!
      !! The request_t object has an associated handler_id and provide several ways
      !! to indicate whether the execution is to be terminated, or the request has an associated communictor.
      !! Finally, whether we expect that the handler id will be connected to an callback.
      !!
      !! \param[out] request Request container.
+  abstract interface
      subroutine request_base_request_workload (req, request)
        import :: request_base_t, request_t
        class(request_base_t), intent(inout) :: req
        type(request_t), intent(out) :: request
      end subroutine request_base_request_workload
+  end interface
 
+@ %def request_base_request_workload
+@
+<<Request base: interfaces>>=
      !> Release workload with the information from the request container.
      !!
      !! The release procedure may notify the master about the finishing of the workload associated with the handler_id.
      !! Or, it may just bookkeep whether the workload has finished.
      !! Additionally, if request%callback was true, it could handle the callback (from client side.)
+  abstract interface
      subroutine request_base_release_workload (req, request)
        import :: request_base_t, request_t
        class(request_base_t), intent(inout) :: req
        type(request_t), intent(in) :: request
      end subroutine request_base_release_workload
+  end interface
 
+@ %def request_base_release_workload
+@ 
+<<Request base: interfaces>>=
      !> Handle associated callback and release workload with the information from the request container.
      !!
      !! The procedure must call the associated callback handler using the handler_id.
      !! Remark: The callback manager is quite squishy regarding a missing handler (silent failure).
      !!         The procedure has to take care whether the callback was actually successful.
      !! The further release of the workload can then be deferred to the release_workload procedure.
      !! \param[in] request.
+  abstract interface
      subroutine request_base_handle_and_release_workload (req, request)
        import :: request_base_t, request_t
        class(request_base_t), intent(inout) :: req
        type(request_t), intent(in) :: request
      end subroutine request_base_handle_and_release_workload
   end interface
 
-  public :: request_t, request_base_t
-contains
-
+@ %def request_base_handle_and_release_workload
+@
+<<Request base: group cache: TBP>>=
+  procedure :: init => request_group_cache_init
+<<Request base: procedures>>=
   subroutine request_group_cache_init (cache, comm)
     class(request_group_cache_t), intent(inout) :: cache
     type(MPI_COMM), intent(in) :: comm
     call MPI_COMM_DUP (comm, cache%parent_comm)
     !! Local operation.
     call MPI_COMM_GROUP (cache%parent_comm, cache%parent_group)
     cache%group = MPI_GROUP_EMPTY
     cache%comm = MPI_COMM_NULL
   end subroutine request_group_cache_init
 
+@ %def request_group_cache_init
+@
+<<Request base: group cache: TBP>>=
+  procedure :: reset => request_group_cache_reset
+<<Request base: procedures>>=
   subroutine request_group_cache_reset (cache)
     class(request_group_cache_t), intent(inout) :: cache
     cache%group = MPI_GROUP_EMPTY
     cache%comm = MPI_COMM_NULL
   end subroutine request_group_cache_reset
 
+@ %def request_group_cache_reset
+@
+<<Request base: group cache: TBP>>=
+  procedure :: update => request_group_cache_update
+<<Request base: procedures>>=
   subroutine request_group_cache_update (cache, tag, rank)
     class(request_group_cache_t), intent(inout) :: cache
     integer, intent(in) :: tag
     integer, dimension(:), allocatable, intent(inout) :: rank
     type(MPI_GROUP) :: group
     integer :: result, error
     call move_alloc (rank, cache%rank)
     call MPI_GROUP_INCL (cache%parent_group, size (cache%rank), cache%rank, group)
     call MPI_GROUP_COMPARE (cache%group, group, result)
     if (result /= MPI_IDENT) then
        cache%group = group
        if (cache%comm /= MPI_COMM_NULL) call MPI_COMM_FREE (cache%comm)
        !! Group-local operation. However, time consuming.
        call MPI_COMM_CREATE_GROUP (cache%parent_comm, cache%group, tag, &
             cache%comm, error)
        if (error /= 0) then
           call msg_bug ("Error occured during communicator creation...")
        end if
     ! else
        ! call msg_message ("CACHE UPDATE: GROUPS ARE (NEARLY) IDENTICAL")
     end if
   end subroutine request_group_cache_update
 
+@ %def request_group_cache_update
+@
+<<Request base: group cache: TBP>>=
+  procedure :: get_comm => request_group_cache_get_comm
+<<Request base: procedures>>=
   subroutine request_group_cache_get_comm (cache, comm)
     class(request_group_cache_t), intent(in) :: cache
     type(MPI_COMM), intent(out) :: comm
     comm = cache%comm
   end subroutine request_group_cache_get_comm
 
+@ %def request_group_cache_get_comm
+@
+<<Request base: group cache: TBP>>=
+  procedure :: is_master => request_group_cache_is_master
+<<Request base: procedures>>=
   logical function request_group_cache_is_master (cache) result (flag)
     class(request_group_cache_t), intent(in) :: cache
     integer :: rank, error
     call MPI_COMM_RANK (cache%comm, rank, error)
     if (error /= 0) then
        call msg_bug ("Error: Could not retrieve group rank.")
     end if
     flag = (rank == 0)
   end function request_group_cache_is_master
 
+@ %def request_group_cache_is_master
+@
+<<Request base: base: TBP>>=
+  procedure :: base_init => request_base_init
+<<Request base: procedures>>=
   !! =================================================
   !! request_base_t
   !! =================================================
 
   !> Initialize request base with parent communicator.
   !!
   !! In order to separate the communication between different parts of the request library,
   !! duplicate the parent communicator using MPI_COMM_DUP, also done by cache and handler objects.
   !!
   !! \param[in] comm Parent MPI communicator for overall library.
   subroutine request_base_init (req, comm)
     class(request_base_t), intent(out) :: req
     type(MPI_COMM), intent(in) :: comm
     call MPI_COMM_DUP (comm, req%comm)
     call MPI_COMM_DUP (comm, req%external_comm)
     call req%cache%init (comm)
     call req%handler%init (comm)
   end subroutine request_base_init
 
+@ %def request_base_init
+@
+<<Request base: base: TBP>>=
+  procedure :: base_write => request_base_write
+  procedure(request_base_deferred_write), deferred :: write
+<<Request base: procedures>>=
   subroutine request_base_write (req, unit)
     class(request_base_t), intent(in) :: req
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     if (allocated (req%balancer)) then
        call req%balancer%write (u)
     else
        write (u, "(A)") "[BALANCER]"
        write (u, "(A)") "=> Not allocated"
     end if
     call req%handler%write (u)
   end subroutine request_base_write
 
+@ %def request_base_write
+@
+<<Request base: base: TBP>>=
+  procedure :: is_master => request_base_is_master
+  procedure(request_base_has_workers), deferred :: has_workers
+<<Request base: procedures>>=
   !> Check whether current worker is master rank in object communicator.
   !!
   !! Do not confuse with a group's master !!!
   !! Proof: rank == 0
   logical function request_base_is_master (req) result (flag)
     class(request_base_t), intent(in) :: req
     integer :: rank, ierr
     call MPI_COMM_RANK (req%comm, rank, ierr)
     if (ierr /= 0) then
        write (*, "(A,1X,I0)") "MPI Error: request_base_is_master", ierr
        stop 1
     end if
     flag = (rank == 0)
   end function request_base_is_master
 
+@ %def request_base_is_master
+@
+<<Request base: base: TBP>>=
+  procedure :: get_external_comm => request_base_get_external_comm
+<<Request base: procedures>>=
   !> Provide external communicator.
   !!
   !! The external communicator is just a duplicate of request%comm,
   !! in order to provide the same group of workers to external communication,
   !! however, in a different context, such that communication from outside does not interfere with request.
   subroutine request_base_get_external_comm (req, comm)
     class(request_base_t), intent(in) :: req
     type(MPI_COMM), intent(out) :: comm
     comm = req%external_comm
   end subroutine request_base_get_external_comm
 
+@ %def request_base_get_external_comm
+@
+<<Request base: base: TBP>>=
+  procedure :: add_balancer => request_base_add_balancer
+<<Request base: procedures>>=
   !> Add balancer to request.
   !!
   !! \param[inout] balancer
   subroutine request_base_add_balancer (req, balancer)
     class(request_base_t), intent(inout) :: req
     class(balancer_base_t), allocatable, intent(inout) :: balancer
     if (allocated (req%balancer)) deallocate (req%balancer)
     call move_alloc (balancer, req%balancer)
   end subroutine request_base_add_balancer
 
+@ %def request_base_add_balancer
+@
+<<Request base: base: TBP>>=
+  procedure :: add_handler => request_base_add_handler
+<<Request base: procedures>>=
   !> Add request handler with handler_id.
   !!
   !! \param[in] handler_id
   !! \param[in] handler Pointer to handler object.
   subroutine request_base_add_handler (req, handler_id, handler)
     class(request_base_t), intent(inout) :: req
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer, intent(in) :: handler
     call req%handler%add (handler_id, handler)
   end subroutine request_base_add_handler
 
+@ %def request_base_add_handler
+@
+<<Request base: base: TBP>>=
+  procedure :: reset => request_base_reset
+<<Request base: procedures>>=
   !> Reset request.
   !! Clear handler manager from associated callbacks,
   !! deallocate balancer, iff allocated, and reset communicator cache.
   subroutine request_base_reset (req, deallocate_balancer)
     class(request_base_t), intent(inout) :: req
     logical, intent(in), optional :: deallocate_balancer
     logical :: flag
     flag = .false.; if (present (deallocate_balancer)) &
          flag = deallocate_balancer
     if (flag .and. allocated (req%balancer)) then
        deallocate (req%balancer)
     end if
     call req%handler%clear ()
     call req%cache%reset ()
   end subroutine request_base_reset
 
+@ %def request_base_reset
+@
+<<Request base: base: TBP>>=
+  procedure :: call_handler => request_base_call_handler
+<<Request base: procedures>>=
   !> Call handler for master communication for handler_id.
   !!
   !! \param[in] handler_id The associated key of the callback object.
   !! \param[in] source_rank The rank of the result's source.
   subroutine request_base_call_handler (req, handler_id, source_rank)
     class(request_base_t), intent(inout) :: req
     integer, intent(in) :: handler_id
     integer, intent(in) :: source_rank
     call req%handler%callback (handler_id, source_rank)
   end subroutine request_base_call_handler
 
+@ %def request_base_call_handler
+@
+<<Request base: base: TBP>>=
+  procedure :: call_client_handler => request_base_call_client_handler
+<<Request base: procedures>>=
   !> Call handler for slave communication for handler_id.
   !!
   !! \param[in] handler_id The associated key of the callback object.
   subroutine request_base_call_client_handler (req, handler_id)
     class(request_base_t), intent(inout) :: req
     integer, intent(in) :: handler_id
     call req%handler%client_callback (handler_id, 0)
   end subroutine request_base_call_client_handler
 
+@ %def request_base_call_client_handler
+@
+<<Request base: base: TBP>>=
+  procedure :: await_handler => request_base_await_handler
+<<Request base: procedures>>=
   !> Wait on all handler in request handler manager to finish communication.
   subroutine request_base_await_handler (req)
     class(request_base_t), intent(inout) :: req
     call req%handler%waitall ()
   end subroutine request_base_await_handler
 
+@ %def request_base_await_handler
+@
+<<Request base: base: TBP>>=
+  procedure :: barrier => request_base_barrier
+  procedure(request_base_request_workload), deferred :: request_workload
+  procedure(request_base_release_workload), deferred :: release_workload
+  procedure(request_base_handle_and_release_workload), deferred :: handle_and_release_workload
+<<Request base: procedures>>=
   subroutine request_base_barrier (req)
     class(request_base_t), intent(in) :: req
     integer :: error
     call MPI_BARRIER (req%comm, error)
     if (error /= MPI_SUCCESS) then
        call msg_fatal ("Request: Error occured during MPI_BARRIER synchronisation.")
     end if
   end subroutine request_base_barrier
-end module request_base
-@ %def request_base
-@ \subsection{Request Simple}
+
+@ %def request_base_barrier
+@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Request Simple}
 We implement a local request scheme, without any additional communication, except the callback communication.
 <<[[request_simple.f90]]>>=
 <<File header>>
 module request_simple
   use io_units
   use diagnostics
 
   use array_list
 
   use balancer_base
   use balancer_simple
   use request_base
 
   use mpi_f08 !NODEP!
 
-  implicit none
+<<Standard module head>>
+
+<<Request simple: public>>
+
+<<Request simple: types>>
+
+contains
 
-  private
+<<Request simple: procedures>>
 
+end module request_simple
+@ %def request_simple
+@
+<<Request simple: public>>=
+  public :: request_simple_t
+<<Request simple: types>>=
   type, extends (request_base_t) :: request_simple_t
      integer :: n_workers = 0
      integer :: n_channels = 0
      logical, dimension(:), allocatable :: parallel_grid
    contains
-     procedure :: init => request_simple_init
-     procedure :: write => request_simple_write
-     procedure :: has_workers => request_simple_has_workers
-     procedure :: update => request_simple_update
-     procedure :: get_request_master => request_simple_get_request_master
-     !! deferred.
-     procedure :: request_workload => request_simple_request_workload
-     procedure :: release_workload => request_simple_release_workload
-     procedure :: handle_and_release_workload => request_simple_handle_and_release_workload
+   <<Request simple: simple: TBP>>
   end type request_simple_t
 
-  public :: request_simple_t
-contains
+@ %def request_simple_t
+@
+<<Request simple: simple: TBP>>=
+  procedure :: init => request_simple_init
+<<Request simple: procedures>>=
   subroutine request_simple_init (req, comm, n_channels)
     class(request_simple_t), intent(out) :: req
     type(MPI_COMM), intent(in) :: comm
     integer, intent(in) :: n_channels
     integer :: n_workers
     call req%base_init (comm)
     call MPI_COMM_SIZE (req%comm, req%n_workers)
     req%n_channels = n_channels
     allocate (req%parallel_grid (n_channels), source = .false.)
     call allocate_balancer ()
   contains
     subroutine allocate_balancer ()
       class(balancer_base_t), allocatable :: balancer
       allocate (balancer_simple_t :: balancer)
       select type (balancer)
       type is (balancer_simple_t)
          call balancer%init (n_workers = req%n_workers, n_resources = req%n_channels)
       end select
       call req%add_balancer (balancer)
     end subroutine allocate_balancer
   end subroutine request_simple_init
 
+@ %def request_simple_init
+@
+<<Request simple: simple: TBP>>=
+  procedure :: update => request_simple_update
+<<Request simple: procedures>>=
   !> Update number of channels and parallel grids.
   !!
   !! The simple request object does not utilize the request balancer, as the complexity of the request balancer is not required for the simple approach.
   !! The simple approach assigns each worker several channel by a modular mapping from the set of workers {0, …, N} to the set of channels {1, …, N_c}.
   !! Vetoing on those channel which have a parallel grid (check the definition in vegas.f90), where all workers are assigned.
   !!
   !! w = \phi(c) = (c - 1) mod N, if not P(c), else \forall w to c.
   !!
   !! The information is stored in a dynamic-sized array list, which is filled, reversed and then used in a stack-like manner keeping track of the unassigned channels.
   !! Assigned and finished channels are then moved to the finished stack.
   subroutine request_simple_update (req, parallel_grid)
     class(request_simple_t), intent(inout) :: req
     logical, dimension(:), intent(in) :: parallel_grid
     integer :: me, worker
     call req%reset ()
     call MPI_COMM_RANK (req%comm, me)
     worker = SHIFT_RANK_TO_WORKER(me)
     select type (balancer => req%balancer)
     type is (balancer_simple_t)
        call balancer%update_state (worker, parallel_grid)
     end select
     req%parallel_grid = parallel_grid
   end subroutine request_simple_update
 
+@ %def request_simple_update
+@
+<<Request simple: simple: TBP>>=
+  procedure :: write => request_simple_write
+<<Request simple: procedures>>=
   subroutine request_simple_write (req, unit)
     class(request_simple_t), intent(in) :: req
     integer, intent(in), optional :: unit
     integer :: u, n_size
     u = given_output_unit (unit)
     write (u, "(A)") "[REQUEST_SIMPLE]"
     write (u, "(A,1X,I0)") "N_CHANNELS", req%n_channels
     write (u, "(A,1X,I0)") "N_WORKERS", req%n_workers
     n_size = min (25, req%n_channels)
     write (u, "(A,25(1X,L1))") "PARALLEL_GRID", req%parallel_grid(:n_size)
     call req%base_write (u)
   end subroutine request_simple_write
 
+@ %def request_simple_write
+@
+<<Request simple: simple: TBP>>=
+  procedure :: has_workers => request_simple_has_workers
+<<Request simple: procedures>>=
   logical function request_simple_has_workers (req) result (flag)
     class(request_simple_t), intent(in) :: req
     flag = (req%n_workers > 1)
   end function request_simple_has_workers
 
+@ %def request_simple_has_workers
+@
+<<Request simple: simple: TBP>>=
+  procedure :: get_request_master => request_simple_get_request_master
+<<Request simple: procedures>>=
   integer function request_simple_get_request_master (req, channel) &
        result (rank)
     class(request_simple_t), intent(in) :: req
     integer, intent(in) :: channel
     if (.not. allocated (req%balancer)) then
        call msg_bug ("Error: Balancer is not allocated.")
     end if
     rank = shift_worker_to_rank (req%balancer%get_resource_master (channel))
     !! "Caveat emptor" hits here:
     !! The balancer returns either a valid worker id or (-1) depending on the associated resource (it must be active...)
     !! We have to check whether returned worker index is plausible.
   end function request_simple_get_request_master
 
+@ %def request_simple_get_request_master
+@
+<<Request simple: simple: TBP>>=
+  !! deferred.
+  procedure :: request_workload => request_simple_request_workload
+<<Request simple: procedures>>=
   !> Request workload.
   !!
   !! Depending on parallel_grid, we fill the request object differently.
   !! First, we do not set commnuicator for .not. parallel_grid (group and group master are set to .false., also).
   !! And the callback needs to be executed.
   !! Second, for parallel_grid, we set req%comm to the associated communicator and set group to .true..
   !! However, the overall master has the grid's result, therefore, only the master needs to the callback.
   !! Remark: We can actually intercept the callback for the master to himself; the results are already in the current position.
   subroutine request_simple_request_workload (req, request)
     class(request_simple_t), intent(inout) :: req
     type(request_t), intent(out) :: request
     integer :: rank, worker_id
     call MPI_COMM_RANK (req%comm, rank)
     worker_id = shift_rank_to_worker (rank)
     if (.not. req%balancer%is_pending () &
          .or. .not. req%balancer%is_assignable (worker_id)) then
        request%terminate = .true.
        return
     end if
     call req%balancer%assign_worker (worker_id, request%handler_id)
     associate (channel => request%handler_id)
       if (req%parallel_grid (channel)) then
          request%comm = req%external_comm
          request%group = .true.
          !! The object communicator is master.
          request%group_master = &
               (req%get_request_master (channel) == rank)
          request%callback = req%is_master ()
       else
          request%comm = req%external_comm
          request%group = .false.
          request%group_master = .true.
          request%callback = .true.
       end if
     end associate
   end subroutine request_simple_request_workload
 
+@ %def request_simple_request_workload
+@
+<<Request simple: simple: TBP>>=
+  procedure :: release_workload => request_simple_release_workload
+<<Request simple: procedures>>=
   subroutine request_simple_release_workload (req, request)
     class(request_simple_t), intent(inout) :: req
     type(request_t), intent(in) :: request
     integer :: rank, worker_id
     call MPI_COMM_RANK (req%comm, rank)
     worker_id = shift_rank_to_worker (rank)
     call req%balancer%free_worker (worker_id, request%handler_id)
   end subroutine request_simple_release_workload
 
+@ %def request_simple_release_workload
+@
+<<Request simple: simple: TBP>>=
+  procedure :: handle_and_release_workload => request_simple_handle_and_release_workload
+<<Request simple: procedures>>=
   subroutine request_simple_handle_and_release_workload (req, request)
     class(request_simple_t), intent(inout) :: req
     type(request_t), intent(in) :: request
     call req%call_client_handler (request%handler_id)
     call req%release_workload (request)
   end subroutine request_simple_handle_and_release_workload
-end module request_simple
-@ %def request_simple
-@ \subsection{Request Caller}
-We implement an global queue approach, requiring permanent communication between a governor and the worker.
+
+@ %def request_simple_handle_and_release_workload
+@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Request Caller}
+
+We implement an global queue approach, requiring permanent
+communication between a governor and the worker.
 <<[[request_caller.f90]]>>=
 <<File header>>
 module request_caller
   use kinds, only: default
   use io_units
   use diagnostics
 
   use request_base
   use balancer_base
   use balancer_channel
   use request_state
   use request_callback
 
   use mpi_f08 !NODEP!
 
-  implicit none
+<<Standard module head>>
+
+<<Request caller: public>>
 
-  private
+<<Request caller: types>>
 
+contains
+
+<<Request caller: procedures>>
+
+end module request_caller
+@ %def request_caller
+@
+<<Request caller: public>>=
+  public :: request_caller_t
+<<Request caller: types>>=
   type, extends (request_base_t):: request_caller_t
      private
      integer :: n_channels = 0
      integer :: n_workers = 0
      type(request_state_t) :: state
    contains
-     procedure :: init => request_caller_init
-     procedure :: write => request_caller_write
-     procedure :: has_workers => request_caller_has_workers
-     procedure :: update_balancer => request_caller_update_balancer
-     procedure :: handle_workload => request_caller_handle_workload
-     procedure :: request_workload => request_caller_request_workload
-     procedure :: release_workload => request_caller_release_workload
-     procedure :: handle_and_release_workload => request_caller_handle_and_release_workload
-     procedure :: request_terminate => request_caller_request_terminate
+   <<Request caller: caller: TBP>>
   end type request_caller_t
 
-  public :: request_caller_t
-contains
-
+@ %def request_caller_t
+@
+<<Request caller: caller: TBP>>=
+  procedure :: init => request_caller_init
+<<Request caller: procedures>>=
   subroutine request_caller_init (req, comm, n_channels)
     class(request_caller_t), intent(out) :: req
     type(MPI_COMM), intent(in) :: comm
     integer, intent(in) :: n_channels
     call req%base_init (comm)
     call MPI_COMM_SIZE (req%comm, req%n_workers)
     !! Exclude master rank (0) from set of workers.
     req%n_workers = req%n_workers - 1
     if (.not. req%has_workers ()) then
        call msg_warning ("Must not handle less than 3 ranks in a master/slave global queue.")
        call MPI_ABORT (req%comm, 1)
     end if
     req%n_channels = n_channels
     call req%state%init (comm, req%n_workers)
     if (req%is_master ()) then
        call allocate_balancer ()
     end if
   contains
     subroutine allocate_balancer ()
       class(balancer_base_t), allocatable :: balancer
       allocate (balancer_channel_t :: balancer)
       select type (balancer)
       type is (balancer_channel_t)
          call balancer%init (n_workers = req%n_workers, n_resources = req%n_channels)
       end select
       call req%add_balancer (balancer)
     end subroutine allocate_balancer
   end subroutine request_caller_init
 
+@ %def request_caller_init
+@
+<<Request caller: caller: TBP>>=
+  procedure :: write => request_caller_write
+<<Request caller: procedures>>=
   subroutine request_caller_write (req, unit)
     class(request_caller_t), intent(in) :: req
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     write (u, "(A)") "[REQUEST_CALLER]"
     call req%base_write (u)
     if (req%is_master ()) &
        call req%state%write (u)
   end subroutine request_caller_write
 
+@ %def request_caller_write
+@
+<<Request caller: caller: TBP>>=
+  procedure :: has_workers => request_caller_has_workers
+<<Request caller: procedures>>=
   logical function request_caller_has_workers (req) result (flag)
     class(request_caller_t), intent(in) :: req
     !! n_workers excludes the master rank.
     flag = (req%n_workers > 1)
   end function request_caller_has_workers
 
+@ %def request_caller_has_workers
+@
+<<Request caller: caller: TBP>>=
+  procedure :: update_balancer => request_caller_update_balancer
+<<Request caller: procedures>>=
   subroutine request_caller_update_balancer (req, weight, parallel_grid)
     class(request_caller_t), intent(inout) :: req
     real(default), dimension(:), intent(in) :: weight
     logical, dimension(:), intent(in) :: parallel_grid
     !! \note bug if not allocated?
     if (.not. allocated (req%balancer)) return
     call req%state%reset ()
     call req%reset ()
     select type (balancer => req%balancer)
     type is (balancer_channel_t)
        call balancer%update_state(weight, parallel_grid)
     end select
   end subroutine request_caller_update_balancer
 
+@ %def request_caller_update_balancer
+@
+<<Request caller: caller: TBP>>=
+  procedure :: handle_workload => request_caller_handle_workload
+<<Request caller: procedures>>=
   subroutine request_caller_handle_workload (req)
     class(request_caller_t), intent(inout) :: req
     integer :: handler, tag, source, worker_id
     if (.not. allocated (req%balancer)) then
        call msg_warning ("Request: Error occured, load balancer not allocated.&
           & Terminate all workers.")
        !! We postpone to stop the program here so we can terminate all workers gracefully.
        !! First, we receive their requests, then we overwrite their "original" tag to MPI_TAG_TERMINATE.
        !! Second, we iterate this, until all workers are terminated and return without doing any besides.
     end if
     call req%state%init_request ()
     call req%state%receive_request ()
     do while (.not. req%state%is_terminated ())
        call req%state%await_request ()
        do while (req%state%has_request ())
           call req%state%get_request (source, tag, handler)
           !! Formally differentiate between worker_id and source.
           worker_id = source
           if (.not. allocated (req%balancer)) tag = MPI_TAG_TERMINATE
           select case (tag)
           case (MPI_TAG_REQUEST)
              if (req%balancer%is_assignable (worker_id)) then
                 call req%balancer%assign_worker (worker_id, handler)
                 if (.not. req%balancer%has_resource_group (handler)) then
                    call req%state%update_request (source, MPI_TAG_ASSIGN_SINGLE, handler)
                 else
                    call req%state%update_request (source, MPI_TAG_ASSIGN_GROUP, handler)
                    call provide_request_group (handler, source)
                 end if
              else
                 call req%state%terminate (source)
              end if
           case (MPI_TAG_HANDLER_AND_RELEASE)
              call req%call_handler (handler, source_rank = source)
              call req%balancer%free_worker (worker_id, handler)
           case (MPI_TAG_RELEASE)
              call req%balancer%free_worker (worker_id, handler)
           case (MPI_TAG_TERMINATE)
              call req%state%terminate (source)
           case (MPI_TAG_CLIENT_TERMINATE)
              !! Allow workers to request their own termination.
              call req%state%terminate (source)
           case default
              call msg_warning ()
           end select
        end do
        call req%state%receive_request ()
     end do
     !! If we are here, there should be no leftover communnication.
     !! Hence, we must check whether there is no left-over communication call (from server-side).
     call req%state%free_request ()
   contains
     subroutine provide_request_group (handler_id, dest_rank)
       integer, intent(in) :: handler_id
       integer, intent(in) :: dest_rank
       integer, dimension(:), allocatable :: rank
       !! Rank indices and worker indices are identical, as we skip the master worker deliberately,
       !! we can reuse the worker indices as rank indices.
       call req%balancer%get_resource_group (handler_id, rank)
       call req%state%provide_request_group (dest_rank, rank)
     end subroutine provide_request_group
   end subroutine request_caller_handle_workload
 
+@ %def request_caller_handle_workload
+@
+<<Request caller: caller: TBP>>=
+  procedure :: request_workload => request_caller_request_workload
+<<Request caller: procedures>>=
   subroutine request_caller_request_workload (req, request)
     class(request_caller_t), intent(inout) :: req
     type(request_t), intent(out) :: request
     type(MPI_STATUS) :: status
     call req%state%client_serve (request%handler_id, status)
     request%terminate = .false.
     request%group = .false.
     request%callback = .false.
     request%comm = MPI_COMM_NULL
     select case (status%MPI_TAG)
     case (MPI_TAG_ASSIGN_SINGLE)
        !! Default to req's communicator.
        request%comm = req%external_comm
        request%group_master = .true.
        request%callback = .true.
     case (MPI_TAG_ASSIGN_GROUP)
        request%group = .true.
        call retrieve_request_group (request%handler_id)
        call req%cache%get_comm (request%comm)
        request%group_master = req%cache%is_master ()
        request%callback = req%cache%is_master ()
     case (MPI_TAG_TERMINATE)
        request%terminate = status%MPI_TAG == MPI_TAG_TERMINATE
     end select
   contains
     subroutine retrieve_request_group (handler_id)
       integer, intent(in) :: handler_id
       integer, dimension(:), allocatable :: rank
       !! Here, worker and rank indices are interchangeable.
       call req%state%retrieve_request_group (rank)
       call req%cache%update (handler_id, rank)
     end subroutine retrieve_request_group
   end subroutine request_caller_request_workload
 
+@ %def request_caller_request_workload
+@
+<<Request caller: caller: TBP>>=
+  procedure :: release_workload => request_caller_release_workload
+<<Request caller: procedures>>=
   subroutine request_caller_release_workload (req, request)
     class(request_caller_t), intent(inout) :: req
     type(request_t), intent(in) :: request
     call req%state%client_free (request%handler_id, has_callback = request%group_master)
   end subroutine request_caller_release_workload
 
+@ %def request_caller_release_workload
+@
+<<Request caller: caller: TBP>>=
+  procedure :: handle_and_release_workload => request_caller_handle_and_release_workload
+<<Request caller: procedures>>=
   subroutine request_caller_handle_and_release_workload (req, request)
     class(request_caller_t), intent(inout) :: req
     type(request_t), intent(in) :: request
     if (.not. req%handler%has_handler (request%handler_id)) then
        call msg_bug ("Request: Handler is not registered for this worker.")
     end if
     call req%release_workload (request)
     call req%call_client_handler (request%handler_id)
   end subroutine request_caller_handle_and_release_workload
 
+@ %def request_caller_handle_and_release_workload
+@
+<<Request caller: caller: TBP>>=
+  procedure :: request_terminate => request_caller_request_terminate
+<<Request caller: procedures>>=
   subroutine request_caller_request_terminate (req)
     class(request_caller_t), intent(inout) :: req
     if (req%is_master ()) return
     call req%state%client_terminate ()
   end subroutine request_caller_request_terminate
-end module request_caller
-@ %def request_caller
-@ \subsubsection{Request Caller State}
-We implement the complete communication for [[request_caller]] into a state (or in WK's nomenclature, instance) object.
+
+@ %def request_caller_request_terminate
+@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsubsection{Request Caller State}
+
+We implement the complete communication for [[request_caller]] into a
+state (or in WK's nomenclature, instance) object.
 <<[[request_state.f90]]>>=
 <<File header>>
 module request_state
   use, intrinsic :: iso_fortran_env, only: ERROR_UNIT
   use iterator
   use diagnostics
 
   use mpi_f08 !NODEP!
 
-  implicit none
+<<Standard module head>>
+
+<<Request state: public>>
+
+<<Request state: parameters>>
 
-  private
+<<Request state: types>>
+
+contains
 
+<<Request state: procedures>>
+
+end module request_state
+@ %def request_state
+@
+<<Request state: parameters>>=
   integer, parameter, public :: MPI_EMPTY_HANDLER = 0
 
   integer, parameter, public :: MPI_TAG_NULL = 0, &
        MPI_TAG_REQUEST = 1, &
        MPI_TAG_RELEASE = 2, &
        MPI_TAG_HANDLER_AND_RELEASE = 4, &
        MPI_TAG_TERMINATE = 8, &
        MPI_TAG_CLIENT_TERMINATE = 16, &
        MPI_TAG_ASSIGN_SINGLE = 32, &
        MPI_TAG_ASSIGN_GROUP = 64, &
        MPI_TAG_COMMUNICATOR_GROUP = 128
 
   integer, parameter :: MPI_STATE_ERR = 1
 
+@
+<<Request state: public>>=
+  public :: request_state_t
+<<Request state: types>>=
   type :: request_state_t
      private
      type(MPI_COMM) :: comm
      integer :: n_workers = 0
      integer :: n_workers_done = 0
      !! From MPI-3.1 book
      !! i \in {1, N_workes_done}, max size = N_workers
      type(MPI_Request), dimension(:), allocatable :: request
      type(MPI_Status), dimension(:), allocatable :: status
      integer, dimension(:), allocatable :: indices
      !! i \in {1, N_workers}
      integer, dimension(:), allocatable :: handler
      logical, dimension(:), allocatable :: terminated
      type(iterator_t) :: request_iterator
    contains
-     procedure :: init => request_state_init
-     procedure :: write => request_state_write
-     procedure :: reset => request_state_reset
-     procedure :: is_terminated => request_state_is_terminated
-     procedure, private :: set_terminated => request_state_set_terminated
-     procedure :: terminate => request_state_terminate
-     procedure :: client_terminate => request_state_client_terminate
-     procedure :: init_request => request_state_init_request
-     procedure :: receive_request => request_state_receive_request
-     procedure :: await_request => request_state_await_request
-     procedure :: has_request => request_state_has_request
-     procedure :: get_request => request_state_get_request
-     procedure :: update_request => request_state_update_request
-     procedure :: free_request => request_state_free_request
-     procedure :: provide_request_group => request_state_provide_request_group
-     procedure :: retrieve_request_group => request_state_retrieve_request_group
-     procedure :: client_serve => request_state_client_serve
-     procedure :: client_free => request_state_client_free
+   <<Request state: state: TBP>>
   end type request_state_t
 
-  public :: request_state_t
-contains
+@ %def request_state_t
+@
+<<Request state: state: TBP>>=
+  procedure :: init => request_state_init
+<<Request state: procedures>>=
   subroutine request_state_init (state, comm, n_workers)
     class(request_state_t), intent(out) :: state
     type(MPI_COMM), intent(in) :: comm
     integer, intent(in) :: n_workers
     integer :: rank
     call MPI_COMM_DUP (comm, state%comm)
     state%n_workers = n_workers
     state%n_workers_done = n_workers
     call state%request_iterator%init (1, n_workers)
     allocate (state%request(state%n_workers), source = MPI_REQUEST_NULL)
     allocate (state%status(state%n_workers), source = MPI_STATUS_IGNORE)
     allocate (state%handler(state%n_workers), source = MPI_EMPTY_HANDLER)
     allocate (state%indices(state%n_workers), source = 0)
     allocate (state%terminated(state%n_workers), source = .false.)
     state%indices = [(rank, rank = 1, n_workers)]
   end subroutine request_state_init
 
+@ %def request_state_init
+@
+<<Request state: state: TBP>>=
+  procedure :: write => request_state_write
+<<Request state: procedures>>=
   subroutine request_state_write (state, unit)
     class(request_state_t), intent(in) :: state
     integer, intent(in), optional :: unit
     integer :: u, i
     u = ERROR_UNIT; if (present (unit)) u = unit
     write (u, "(A)") "[REQUEST_STATE]"
     write (u, "(A,1X,I0)") "N_WORKERS", state%n_workers
     write (u, "(A,1X,I0)") "N_WORKERS_DONE", state%n_workers_done
     write (u, "(A)") "RANK | SOURCE | TAG | ERROR | REQUEST_NULL"
     do i = 1, state%n_workers_done
        write (u, "(A,4(1X,I0),1X,L1)") "REQUEST", state%indices(i), &
             state%status(i)%MPI_SOURCE, &
             state%status(i)%MPI_TAG, &
             state%status(i)%MPI_ERROR, &
             (state%request(i) == MPI_REQUEST_NULL)
     end do
     write (u, "(A,999(1X,I0))") "HANDLER", state%handler
     write (u, "(A,999(1X,L1))") "TERMINATED", state%terminated
   end subroutine request_state_write
 
+@ %def request_state_write
+@
+<<Request state: state: TBP>>=
+  procedure :: reset => request_state_reset
+<<Request state: procedures>>=
   subroutine request_state_reset (state)
     class(request_state_t), intent(inout) :: state
     integer :: rank
     state%n_workers_done = state%n_workers
     call state%request_iterator%init (1, state%n_workers)
     state%handler = MPI_EMPTY_HANDLER
     state%indices = [(rank, rank = 1, state%n_workers)]
     state%terminated = .false.
   end subroutine request_state_reset
 
+@ %def request_state_reset
+@
+<<Request state: state: TBP>>=
+  procedure :: is_terminated => request_state_is_terminated
+<<Request state: procedures>>=
   ! pure function request_state_is_terminated (state) result (flag)
   function request_state_is_terminated (state) result (flag)
     class(request_state_t), intent(in) :: state
     logical :: flag
     flag = all (state%terminated)
   end function request_state_is_terminated
 
+@ %def request_state_is_terminated
+@
+<<Request state: state: TBP>>=
+  procedure, private :: set_terminated => request_state_set_terminated
+<<Request state: procedures>>=
   !> Set rank to be terminated (however, do not communicate it).
   !!
   !! This is an EVIL procedure, as it operates only locally on the master
   !! and does not communicate its purpose.
   !! However, in order to allow termination requests from client-side
   !! we need to manipulate the specific rank states.
   subroutine request_state_set_terminated (state, rank)
     class(request_state_t), intent(inout) :: state
     integer, intent(in) :: rank
     state%terminated(rank) = .true.
   end subroutine request_state_set_terminated
 
+@ %def request_state_set_terminated
+@
+<<Request state: state: TBP>>=
+  procedure :: terminate => request_state_terminate
+<<Request state: procedures>>=
   subroutine request_state_terminate (state, rank)
     class(request_state_t), intent(inout) :: state
     integer, intent(in) :: rank
     integer :: error
     call MPI_SEND (MPI_EMPTY_HANDLER, 1, MPI_INTEGER, &
          rank, MPI_TAG_TERMINATE, state%comm, error)
     if (error /= 0) then
        write (msg_buffer, "(A,1X,I3)") "Request: Error occured during terminate, RANK", rank
        call msg_bug ()
     end if
     call state%set_terminated (rank)
   end subroutine request_state_terminate
 
+@ %def request_state_terminate
+@
+<<Request state: state: TBP>>=
+  procedure :: client_terminate => request_state_client_terminate
+<<Request state: procedures>>=
   subroutine request_state_client_terminate (state)
     class(request_state_t), intent(in) :: state
     integer :: error
     call MPI_SEND (MPI_EMPTY_HANDLER, 1, MPI_INTEGER, &
          0, MPI_TAG_CLIENT_TERMINATE, state%comm, error)
     if (error /= 0) then
        write (msg_buffer, "(A,1X,I3)") "Request: Error occured during client-sided terminate"
        call msg_bug ()
     end if
   end subroutine request_state_client_terminate
 
+@ %def request_state_client_terminate
+@
+<<Request state: state: TBP>>=
+  procedure :: init_request => request_state_init_request
+<<Request state: procedures>>=
   !> Init persistent requests.
   !!
   !! Must be called before first receive_request.
   !! Free_request must be called after is_terminated returns true.
   subroutine request_state_init_request (state)
     class(request_state_t), intent(inout) :: state
     integer :: i, rank, error
     do i = 1, state%n_workers_done
        rank = state%indices(i)
        call MPI_RECV_INIT (state%handler(rank), 1, MPI_INTEGER, &
             rank, MPI_ANY_TAG, state%comm, state%request(rank), error)
        if (error /= 0) then
           write (msg_buffer, "(A,2(A,1X,I0))") "Request: Error occured during receive init, &
                & RANK", rank, "HANDLER", state%handler(rank)
           call msg_message ()
           call MPI_ABORT (state%comm, MPI_STATE_ERR)
        end if
     end do
   end subroutine request_state_init_request
 
+@ %def request_state_init_request
+@
+<<Request state: state: TBP>>=
+  procedure :: receive_request => request_state_receive_request
+<<Request state: procedures>>=
   !> Receive requests from non-terminated workers.
   !!
   !! Before receiving new requests, santize arrays of received ranks from terminated ones.
   subroutine request_state_receive_request (state)
     class(request_state_t), intent(inout) :: state
     integer :: i, rank
     integer :: error
     if (state%is_terminated ()) return
     call sanitize_from_terminated_ranks ()
     !! Receive new requests from (still active) workers.
     do i = 1, state%n_workers_done
        rank = state%indices(i)
        call MPI_START (state%request(rank), error)
        if (error /= 0) then
           write (msg_buffer, "(A,2(A,1X,I6))") "Request: Error occured during receive request, &
              & RANK", rank, "HANDLER", state%handler(rank)
           call msg_message ()
           call MPI_ABORT (state%comm, MPI_STATE_ERR)
        end if
     end do
   contains
     subroutine sanitize_from_terminated_ranks ()
       integer :: n_workers_done
       integer, dimension(:), allocatable :: indices
       !! Remove terminated ranks from done workers.
       indices = pack(state%indices(:state%n_workers_done), &
            .not. state%terminated(state%indices(:state%n_workers_done)))
       state%n_workers_done = size (indices)
       state%indices(:state%n_workers_done) = indices
     end subroutine sanitize_from_terminated_ranks
   end subroutine request_state_receive_request
 
+@ %def request_state_receive_request
+@
+<<Request state: state: TBP>>=
+  procedure :: await_request => request_state_await_request
+<<Request state: procedures>>=
   subroutine request_state_await_request (state)
     class(request_state_t), intent(inout) :: state
     integer :: error
     if (state%is_terminated ()) return
     !! We verify that we have active handles associated with request state.
     call MPI_TESTSOME (state%n_workers, state%request, state%n_workers_done, &
          state%indices, state%status, error)
     if (error /= 0) then
        write (ERROR_UNIT, "(A)") "Error occured during await (testing) request..."
        call state%write (ERROR_UNIT)
        call MPI_ABORT (state%comm, MPI_STATE_ERR)
     else if (state%n_workers_done == MPI_UNDEFINED) then
        write (ERROR_UNIT, "(A)") "TEST_WAITSOME returned with MPI_UNDEFINED."
        call state%write (ERROR_UNIT)
        call MPI_ABORT (state%comm, MPI_STATE_ERR)
     end if
     !! Wait a little bit...
     if (state%n_workers_done == 0) then
        !! Proof: REQUEST(i), i \in {1, N_workers}, i is equivalent to rank.
        !! Proof: INDICES(j), STATUS(j), j \in {1, N_workers_done}
        !! Proof: INDICES(j) -> i, injectiv.
        call MPI_WAITSOME (state%n_workers, state%request, state%n_workers_done, &
             state%indices, state%status, error)
        if (error /= 0) then
           write (ERROR_UNIT, "(A)") "Error occured during await request..."
           call state%write (ERROR_UNIT)
           call MPI_ABORT (state%comm, MPI_STATE_ERR)
        end if
     endif
     call state%request_iterator%init (1, state%n_workers_done)
   end subroutine request_state_await_request
 
+@ %def request_state_await_request
+@
+<<Request state: state: TBP>>=
+  procedure :: has_request => request_state_has_request
+<<Request state: procedures>>=
   pure function request_state_has_request (state) result (flag)
     class(request_state_t), intent(in) :: state
     logical :: flag
     flag = state%request_iterator%is_iterable ()
   end function request_state_has_request
 
+@ %def request_state_has_request
+@
+<<Request state: state: TBP>>=
+  procedure :: get_request => request_state_get_request
+<<Request state: procedures>>=
   subroutine request_state_get_request (state, rank, tag, handler)
     class(request_state_t), intent(inout) :: state
     integer, intent(out) :: rank
     integer, intent(out) :: tag
     integer, intent(out) :: handler
     integer :: ndx
     if (.not. state%has_request ()) then
        call msg_bug ("Request: Cannot access missing request.")
     end if
     ndx = state%request_iterator%next ()
     rank = state%indices(ndx)
     if (rank /= state%status(ndx)%MPI_SOURCE) then
        write (msg_buffer, "(A,2(1X,I3))") &
           "Request: RANK and SOURCE mismatch", rank, state%status(ndx)%MPI_SOURCE
        call msg_bug ()
     end if
     tag = state%status(ndx)%MPI_TAG
     handler = state%handler(rank)
   end subroutine request_state_get_request
 
+@ %def request_state_get_request
+@
+<<Request state: state: TBP>>=
+  procedure :: update_request => request_state_update_request
+<<Request state: procedures>>=
   subroutine request_state_update_request (state, rank, tag, handler)
     class(request_state_t), intent(inout) :: state
     integer, intent(in) :: rank
     integer, intent(in) :: tag
     integer, intent(in) :: handler
     integer :: error
     state%handler(rank) = handler
     call MPI_SEND (handler, 1, MPI_INTEGER, &
          rank, tag, state%comm, error)
     if (error /= 0) then
        write (msg_buffer, "(A,3(A,1X,I3))") "Request: Error occured during update, &
           &RANK", rank, "TAG", tag, "HANDLER", handler
        call msg_bug ()
     end if
   end subroutine request_state_update_request
 
+@ %def request_state_update_request
+@
+<<Request state: state: TBP>>=
+  procedure :: free_request => request_state_free_request
+<<Request state: procedures>>=
   subroutine request_state_free_request (state)
     class(request_state_t), intent(inout) :: state
     integer :: rank
     do rank = 1, state%n_workers
        if (state%request(rank) == MPI_REQUEST_NULL) cycle
        call MPI_REQUEST_FREE (state%request(rank))
     end do
   end subroutine request_state_free_request
 
+@ %def request_state_free_request
+@
+<<Request state: state: TBP>>=
+  procedure :: provide_request_group => request_state_provide_request_group
+<<Request state: procedures>>=
   subroutine request_state_provide_request_group (state, dest_rank, worker)
     class(request_state_t), intent(in) :: state
     integer, intent(in) :: dest_rank
     integer, dimension(:), intent(in) :: worker
     call MPI_SEND (worker, size (worker), MPI_INTEGER, &
          dest_rank, MPI_TAG_COMMUNICATOR_GROUP, state%comm)
   end subroutine request_state_provide_request_group
 
+@ %def request_state_provide_request_group
+@
+<<Request state: state: TBP>>=
+  procedure :: retrieve_request_group => request_state_retrieve_request_group
+<<Request state: procedures>>=
   subroutine request_state_retrieve_request_group (state, worker)
     class(request_state_t), intent(inout) :: state
     integer, dimension(:), allocatable, intent(out) :: worker
     type(MPI_STATUS) :: status
     integer :: n_workers
     call MPI_PROBE (0, MPI_TAG_COMMUNICATOR_GROUP, state%comm, status)
     call MPI_GET_COUNT(status, MPI_INTEGER, n_workers)
     allocate (worker (n_workers), source = 0)
     call MPI_RECV (worker, n_workers, MPI_INTEGER, &
          0, MPI_TAG_COMMUNICATOR_GROUP, state%comm, status)
   end subroutine request_state_retrieve_request_group
 
+@ %def request_state_retrieve_request_group
+@
+<<Request state: state: TBP>>=
+  procedure :: client_serve => request_state_client_serve
+<<Request state: procedures>>=
   !> Query for a request (send an request tag, then receive a handler).
   subroutine request_state_client_serve (state, handler_id, status)
     class(request_state_t), intent(in) :: state
     integer, intent(out) :: handler_id
     type(MPI_STATUS), intent(out) :: status
     call MPI_SEND (MPI_EMPTY_HANDLER, 1, MPI_INTEGER, &
          0, MPI_TAG_REQUEST, state%comm)
     call MPI_RECV (handler_id, 1, MPI_INTEGER, &
          0, MPI_ANY_TAG, state%comm, status)
   end subroutine request_state_client_serve
 
+@ %def request_state_client_rate
+@
+<<Request state: state: TBP>>=
+  procedure :: client_free => request_state_client_free
+<<Request state: procedures>>=
   !> Free handler from worker.
   subroutine request_state_client_free (state, handler_id, has_callback)
     class(request_state_t), intent(in) :: state
     integer, intent(in) :: handler_id
     logical, intent(in) :: has_callback
     integer :: tag
     tag = merge (MPI_TAG_HANDLER_AND_RELEASE, MPI_TAG_RELEASE, has_callback)
     call MPI_SEND (handler_id, 1, MPI_INTEGER, &
          0, tag, state%comm)
   end subroutine request_state_client_free
-end module request_state
-@ %def request_state
-@ \subsection{Balancer Base}
+
+@ %def request_state_client_free
+@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Balancer Base}
 \begin{description}
 \item[Balancer Base] Base type for usage for extensions of [[request_base]].
 \end{description}
 <<[[balancer_base.f90]]>>=
 module balancer_base
   use io_units
   use diagnostics
   use array_list
 
-  implicit none
+<<Standard module head>>
+
+<<Balancer base: public>>
+
+<<Balancer base: parameters>>
+
+<<Balancer base: types>>
+
+<<Balancer base: interfaces>>
+
+contains
 
-  private
+<<Balancer base: procedures>>
 
+end module balancer_base
+@ %def balancer_base
+@
+<<Balancer base: parameters>>=
+  integer, parameter, public :: STATE_SINGLE = 1, &
+       STATE_ALL = 2
+
+@
+<<Balancer base: types>>=
   type :: worker_t
      private
      integer :: resource = 0
      integer :: state = 0
      integer :: n_resources = 0
      logical :: assigned = .false.
    contains
-     procedure :: write => worker_write
-     procedure :: is_assigned => worker_is_assigned
-     procedure :: get_resource => worker_get_resource
-     procedure :: get_state => worker_get_state
-     procedure :: add_resource => worker_add_resource
-     procedure :: free => worker_free
+   <<Balancer base: worker: TBP>>
   end type worker_t
 
+@ %def worker_t
+@
+<<Balancer base: types>>=
   type :: resource_t
      private
      integer :: resource_id = 0
      logical :: active = .false.
      integer :: n_assigned_workers = 0
    contains
-     procedure :: write => resource_write
-     procedure :: is_active => resource_is_active
-     procedure :: set_active => resource_set_active
-     procedure :: set_inactive => resource_set_inactive
+   <<Balancer base: resource: TBP>>
   end type resource_t
 
-  integer, parameter, public :: STATE_SINGLE = 1, &
-       STATE_ALL = 2
-
+@ %def resource_t
+@
+<<Balancer base: public>>=
+  public :: resource_state_t
+<<Balancer base: types>>=
   type :: resource_state_t
      integer :: n_workers = 0
      integer :: mode = 0
      type(array_list_t) :: resource_stack
      type(array_list_t) :: finished_stack
    contains
-     procedure :: write => resource_state_write
-     procedure :: init => resource_state_init
-     procedure :: add_resource => resource_state_add_resource
-     procedure :: freeze => resource_state_freeze
-     procedure :: clear => resource_state_clear
-     procedure :: has_resource => resource_state_has_resource
-     procedure :: assign_resource => resource_state_assign_resource
-     procedure :: free_resource => resource_state_free_resource
+   <<Balancer base: resource state: TBP>>
   end type resource_state_t
 
-  !> Dynamic load balancer.
-  !!
-  !! We organize resources and workers in a transparent way using indices.
-  !! These indices replace pointer magic.
-  !!
-  !! The balancer aggregates a dynamic state, however, we allow the state by
-  !! the use of a pointer, to access the static fields of the balancer.
+@ %def resource_state_t
+@
+Dynamic load balancer. We organize resources and workers in a
+transparent way using indices. These indices replace pointer magic.
+The balancer aggregates a dynamic state, however, we allow the state
+by the use of a pointer, to access the static fields of the balancer.
+<<Balancer base: public>>=
+  public :: balancer_base_t
+<<Balancer base: types>>=
   type, abstract :: balancer_base_t
      integer :: n_workers = 0
      integer :: n_resources = 0
      integer :: n_states = 0
      type(worker_t), dimension(:), allocatable :: worker
      type(resource_t), dimension(:), allocatable :: resource
      type(resource_state_t), dimension(:), allocatable :: state
    contains
-     procedure :: base_init => balancer_base_base_init
-     procedure :: base_write => balancer_base_write
-     procedure(balancer_base_deferred_write), deferred :: write
-     procedure :: add_state => balancer_base_add_state
-     procedure, private :: link_worker_and_state => balancer_base_link_worker_and_state
-     procedure :: is_assignable => balancer_base_is_assignable
-     procedure :: is_worker_pending => balancer_base_is_worker_pending
-     procedure :: is_pending => balancer_base_is_pending
-     procedure(balancer_base_has_resource_group), deferred :: has_resource_group
-     procedure(balancer_base_get_resource_group), deferred :: get_resource_group
-     procedure(balancer_base_get_resource_master), deferred :: get_resource_master
-     procedure(balancer_base_assign_worker), deferred :: assign_worker
-     procedure(balancer_base_free_worker), deferred :: free_worker
+   <<Balancer base: base: TBP>>
   end type balancer_base_t
 
+@ %def balancer_base_t
+@
+<<Balancer base: interfaces>>=
   abstract interface
      subroutine balancer_base_deferred_write (balancer, unit)
        import :: balancer_base_t
        class(balancer_base_t), intent(in) :: balancer
        integer, intent(in), optional :: unit
      end subroutine balancer_base_deferred_write
+   end interface
 
+@ %def balancer_base_deferred_write
+@
+<<Balancer base: interfaces>>=
      !> Has resource an associated resource group.
      !!
      !! \note .true. only on an active resource, else .false.
+  abstract interface
      pure logical function balancer_base_has_resource_group (balancer, resource_id) &
           result (flag)
        import :: balancer_base_t
        class(balancer_base_t), intent(in) :: balancer
        integer, intent(in) :: resource_id
      end function balancer_base_has_resource_group
+   end interface
 
+@ %def balancer_base_has_resource_group
+@
+<<Balancer base: interfaces>>=
      !> Get resource group.
      !!
      !! \note Implementation must check against group existence.
      !! \return group (allocated|NOT allocated for (inactive|non-group) resource)
+  abstract interface
      pure subroutine balancer_base_get_resource_group (balancer, resource_id, group)
        import :: balancer_base_t
        class(balancer_base_t), intent(in) :: balancer
        integer, intent(in) :: resource_id
        integer, dimension(:), allocatable, intent(out) :: group
      end subroutine balancer_base_get_resource_group
+   end interface
 
+@ %def balancer_base_get_resource_group
+@
+<<Balancer base: interfaces>>=
      !> Get resource master (worker).
      !!
      !! Return worker as given, however, if extended type is used in a non-local
      !! or in combination with a commnuicative request type, then check on activation status of associated resource.
      !!
      !! \return worker Valid worker index (\in {1, …, N}) only on active resource*, else -1.
+  abstract interface
      pure integer function balancer_base_get_resource_master (balancer, resource_id) &
           result (worker)
        import :: balancer_base_t
        class(balancer_base_t), intent(in) :: balancer
        integer, intent(in) :: resource_id
      end function balancer_base_get_resource_master
+  end interface
 
+@ %def balancer_base_get_resource_master
+@
+<<Balancer base: interfaces>>=
      !> Assign resource to a given worker or retrieve current assigned resource.
      !!
      !! If worker has already a resource assigned, return resource.
      !! If worker has not been assigned a resource, retrieve new resource from state.
      !!
      !! \note Each call must check if a worker is assignable, if not, the procedure must return resource_id = -1.
+  abstract interface
      subroutine balancer_base_assign_worker (balancer, worker_id, resource_id)
        import :: balancer_base_t
        class(balancer_base_t), intent(inout) :: balancer
        integer, intent(in) :: worker_id
        integer, intent(out) :: resource_id
      end subroutine balancer_base_assign_worker
+   end interface
 
-     !> Free assignment of worker.
-     !!
-     !! If worker is not assigned, this procedure is idempotent.
-     !! If worker is assigned, alter state correspondingly.
-     !! \note In order to correctly free a worker from a resource, we have to explicitly keep track of the association status of a worker and a resource.
-     !! This feature is mostly relevant for resources with a worker group.
-     ! The resource may be disassociated from their worker by earlier calls or the former worker may be already assigned to a new resource.
-     !! In the latter case, we are not allowed to free them (as the new resource is still active).
-     !! Therefore, each call must check if a worker and resource are still associated and the resource is still active.
-     !! Only, in this case, disassociating workers and resource is allowed.
+@ %def balancer_base_assign_worker
+@
+Free assignment of worker. If the worker is not assigned, this
+procedure is idempotent. If the worker is assigned, alter state
+correspondingly. In order to correctly free a worker from a resource,
+we have to explicitly keep track of the association status of a worker
+and a resource. This feature is mostly relevant for resources with a
+worker group. The resource may be disassociated from their worker by
+earlier calls or the former worker may be already assigned to a new
+resource. In the latter case, we are not allowed to free them (as the
+new resource is still active). Therefore, each call must check if a
+worker and resource are still associated and the resource is still
+active. Only, in this case, disassociating workers and resource is
+allowed.
+<<Balancer base: interfaces>>=
+  abstract interface
      subroutine balancer_base_free_worker (balancer, worker_id, resource_id)
        import :: balancer_base_t
        class(balancer_base_t), intent(inout) :: balancer
        integer, intent(in) :: worker_id
        integer, intent(in) :: resource_id
      end subroutine balancer_base_free_worker
   end interface
 
-  public :: balancer_base_t, resource_state_t
-  public :: shift_rank_to_worker, shift_worker_to_rank
-contains
+@ %def balancer_base_free_worker
+@
+<<Balancer base: public>>=
+  public :: shift_rank_to_worker
+@ 
+<<Balancer base: procedures>>=
   !> Shift rank index to worker index.
   !! Proof: rank \in {0, …, N - 1}, worker \in {1, …, N}
   elemental integer function shift_rank_to_worker (rank) result (worker)
     integer, intent(in) :: rank
     worker = rank + 1
   end function shift_rank_to_worker
 
+@ %def shift_rank_to_worker
+@
+<<Balancer base: public>>=
+  public :: shift_worker_to_rank
+<<Balancer base: procedures>>=
   !> Shift worker index to rank index.
   !! Proof: rank \in {0, …, N - 1}, worker \in {1, …, N}
   elemental integer function shift_worker_to_rank (worker) result (rank)
     integer, intent(in) :: worker
     rank = worker - 1
   end function shift_worker_to_rank
 
+@ %def shift_worker_to_rank
+@
+<<Balancer base: worker: TBP>>=
+  procedure :: write => worker_write
+<<Balancer base: procedures>>=
   subroutine worker_write (worker, unit)
     class(worker_t), intent(in) :: worker
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     write (u, "(3(A,1X,I3,1X),A,1X,L1)") "RESOURCE", worker%resource, &
          "STATE", worker%state, &
          "N_RESOURCES", worker%n_resources, &
          "ASSIGNED", worker%assigned
   end subroutine worker_write
 
+@ %def worker_write
+@
+<<Balancer base: worker: TBP>>=
+  procedure :: is_assigned => worker_is_assigned
+<<Balancer base: procedures>>=
   elemental logical function worker_is_assigned (worker) result (flag)
     class(worker_t), intent(in) :: worker
     flag = worker%assigned
   end function worker_is_assigned
 
+@ %def worker_is_assigned
+@
+<<Balancer base: worker: TBP>>=
+  procedure :: get_resource => worker_get_resource
+<<Balancer base: procedures>>=
   elemental integer function worker_get_resource (worker) result (resource_id)
     class(worker_t), intent(in) :: worker
     resource_id = worker%resource
   end function worker_get_resource
 
+@ %def worker_get_resource
+@
+<<Balancer base: worker: TBP>>=
+  procedure :: get_state => worker_get_state
+<<Balancer base: procedures>>=
   elemental integer function worker_get_state (worker) result (i_state)
     class(worker_t), intent(in) :: worker
     i_state = worker%state
   end function worker_get_state
 
+@ %def worker_get_state
+@
+<<Balancer base: worker: TBP>>=
+  procedure :: add_resource => worker_add_resource
+<<Balancer base: procedures>>=
   elemental subroutine worker_add_resource (worker, resource_id)
     class(worker_t), intent(inout) :: worker
     integer, intent(in) :: resource_id
     worker%n_resources = worker%n_resources + 1
     worker%assigned = .true.
     worker%resource = resource_id
   end subroutine worker_add_resource
 
+@ %def worker_add_resource
+@
+<<Balancer base: worker: TBP>>=
+  procedure :: free => worker_free
+<<Balancer base: procedures>>=
   elemental subroutine worker_free (worker)
     class(worker_t), intent(inout) :: worker
     worker%assigned = .false.
     worker%resource = 0
   end subroutine worker_free
 
+@ %def worker_free
+@
+<<Balancer base: resource: TBP>>=
+  procedure :: write => resource_write
+<<Balancer base: procedures>>=
   subroutine resource_write (resource, unit)
     class(resource_t), intent(in) :: resource
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     write (u, "(A,1X,I3,1X,A,1X,L1,1X,A,1X,I3)") &
          "RESOURCE_ID", resource%resource_id, &
          "ACTIVE", resource%active, &
          "N_ASSIGNED_WORKERS", resource%n_assigned_workers
   end subroutine resource_write
 
+@ %def resource_write
+@
+<<Balancer base: resource: TBP>>=
+  procedure :: is_active => resource_is_active
+<<Balancer base: procedures>>=
   elemental logical function resource_is_active (resource) result (flag)
     class(resource_t), intent(in) :: resource
     flag = resource%active
   end function resource_is_active
 
+@ %def resource_is_active
+@
+<<Balancer base: resource: TBP>>=
+  procedure :: set_active => resource_set_active
+<<Balancer base: procedures>>=
   subroutine resource_set_active (resource, n_workers)
     class(resource_t), intent(inout) :: resource
     integer, intent(in) :: n_workers
     resource%active = .true.
     resource%n_assigned_workers = n_workers
   end subroutine resource_set_active
 
+@ %def resource_set_active
+@
+<<Balancer base: resource: TBP>>=
+  procedure :: set_inactive => resource_set_inactive
+<<Balancer base: procedures>>=
   subroutine resource_set_inactive (resource)
     class(resource_t), intent(inout) :: resource
     resource%active = .false.
   end subroutine resource_set_inactive
 
+@ %def resource_set_inactive
+@
+<<Balancer base: resource state: TBP>>=
+  procedure :: write => resource_state_write
+<<Balancer base: procedures>>=
   subroutine resource_state_write (state, unit)
     class(resource_state_t), intent(in) :: state
     integer, intent(in), optional :: unit
     integer :: u
     u = given_output_unit (unit)
     write (u, "(A,1X,I0)") "N_STATE_WORKERS", state%n_workers
     select case (state%mode)
     case (STATE_SINGLE)
        write (u, "(A)") "MODE ONE-TO-ONE"
     case (STATE_ALL)
        write (u, "(A)") "MODE ALL-TO-ONE"
     case default
        write (u, "(A)") "UNSUPPORTED MODE"
     end select
     write (u, "(A)") "RESOURCE"
     call state%resource_stack%write (u)
     write (u, "(A)") "FINISHED"
     call state%finished_stack%write (u)
   end subroutine resource_state_write
 
+@ %def resource_state_write
+@
+<<Balancer base: resource state: TBP>>=
+  procedure :: init => resource_state_init
+<<Balancer base: procedures>>=
   subroutine resource_state_init (state, mode, n_workers)
     class(resource_state_t), intent(out) :: state
     integer, intent(in) :: mode
     integer, intent(in) :: n_workers
     state%mode = mode
     state%n_workers = n_workers
     call state%resource_stack%init ()
     call state%finished_stack%init ()
   end subroutine resource_state_init
 
+@ %def resource_state_init
+@
+<<Balancer base: resource state: TBP>>=
+  procedure :: add_resource => resource_state_add_resource
+<<Balancer base: procedures>>=
   subroutine resource_state_add_resource (state, i_resource)
     class(resource_state_t), intent(inout) :: state
     integer, intent(in) :: i_resource
     call state%resource_stack%add (i_resource)
   end subroutine resource_state_add_resource
 
+@ %def resource_state_add_resource
+@
+<<Balancer base: resource state: TBP>>=
+  procedure :: freeze => resource_state_freeze
+<<Balancer base: procedures>>=
   subroutine resource_state_freeze (state)
     class(resource_state_t), intent(inout) :: state
     call state%resource_stack%sort ()
     call state%resource_stack %reverse_order ()
   end subroutine resource_state_freeze
 
+@ %def resource_state_freeze
+@
+<<Balancer base: resource state: TBP>>=
+  procedure :: clear => resource_state_clear
+<<Balancer base: procedures>>=
   subroutine resource_state_clear (state)
     class(resource_state_t), intent(inout) :: state
     call state%resource_stack%clear ()
     call state%finished_stack%clear ()
   end subroutine resource_state_clear
 
+@ %def resource_state_clear
+@
+<<Balancer base: resource state: TBP>>=
+  procedure :: has_resource => resource_state_has_resource
+<<Balancer base: procedures>>=
   elemental function resource_state_has_resource (state) result (flag)
     class(resource_state_t), intent(in) :: state
     logical :: flag
     flag = .not. state%resource_stack%is_empty ()
   end function resource_state_has_resource
 
+@ %def resource_state_has_resoruce
+@
+<<Balancer base: resource state: TBP>>=
+  procedure :: assign_resource => resource_state_assign_resource
+<<Balancer base: procedures>>=
   function resource_state_assign_resource (state) result (i_resource)
     class(resource_state_t), intent(inout) :: state
     integer :: i_resource
     if (state%resource_stack%is_empty ()) then
        i_resource = 0
        call msg_bug ("Error: No leftover resource on stack.")
        return
     end if
     i_resource = state%resource_stack%remove () !! Pop last element from stack.
   end function resource_state_assign_resource
 
+@ %def resource_state_assign_ressource
+@
+<<Balancer base: resource state: TBP>>=
+  procedure :: free_resource => resource_state_free_resource
+<<Balancer base: procedures>>=
   subroutine resource_state_free_resource (state, i_resource)
     class(resource_state_t), intent(inout) :: state
     integer, intent(in) :: i_resource
     if (state%resource_stack%is_element (i_resource)) then
        call msg_bug ("Error: Cannot free resource, still on resource stack.")
     end if
     call state%finished_stack%add (i_resource)
   end subroutine resource_state_free_resource
 
+@ %def resource_state_free_resource
+@
+<<Balancer base: base: TBP>>=
+  procedure :: base_write => balancer_base_write
+  procedure(balancer_base_deferred_write), deferred :: write
+<<Balancer base: procedures>>=
   subroutine balancer_base_write (balancer, unit)
     class(balancer_base_t), intent(in) :: balancer
     integer, intent(in), optional :: unit
     integer :: u, i
     u = given_output_unit (unit)
     write (u, "(A)") "[REQUEST BALANCER]"
     write (u, "(3(A,1X,I3,1X))") "N_WORKERS", balancer%n_workers, &
          "N_RESOURCES", balancer%n_resources, &
          "N_STATES", balancer%n_states
     write (u, "(A)") "[WORKER]"
     do i = 1, balancer%n_workers
        call balancer%worker(i)%write (u)
     end do
     write (u, "(A)") "[RESOURCE]"
     do i = 1, balancer%n_resources
        call balancer%resource(i)%write (u)
     end do
     write (u, "(A)") "[STATES]"
     do i = 1, balancer%n_states
        call balancer%state(i)%write (u)
     end do
   end subroutine balancer_base_write
 
+@ %def balancer_base_write
+@
+<<Balancer base: base: TBP>>=
+  procedure :: base_init => balancer_base_base_init
+<<Balancer base: procedures>>=
   subroutine balancer_base_base_init (balancer, n_workers, n_resources)
     class(balancer_base_t), intent(out) :: balancer
     integer, intent(in) :: n_workers
     integer, intent(in) :: n_resources
     balancer%n_workers = n_workers
     balancer%n_resources = n_resources
     allocate (balancer%worker (n_workers))
     allocate (balancer%resource (n_resources))
     call init_resource ()
   contains
     subroutine init_resource ()
       integer :: i
       do i = 1, balancer%n_resources
          balancer%resource(i)%resource_id = i
       end do
     end subroutine init_resource
   end subroutine balancer_base_base_init
 
+@ %def balancer_base_base_init
+@
+<<Balancer base: base: TBP>>=
+  procedure :: add_state => balancer_base_add_state
+<<Balancer base: procedures>>=
   !> Add partition of workers and link with workers.
   !! We move the allocated partition object into the balancer.
   !! We then assign each partition its respective number of workers in a incrementing linear fashion.
   !! However, we postpone the linking of the resources to the partition, which can be either done dynamically with the balancer state or directly with the appropriate type-bound procedure.
   subroutine balancer_base_add_state (balancer, state)
     class(balancer_base_t), intent(inout) :: balancer
     type(resource_state_t), dimension(:), allocatable, intent(inout) :: state
     balancer%n_states = size (state)
     call move_alloc (state, balancer%state)
     call balancer%link_worker_and_state ()
   end subroutine balancer_base_add_state
 
+@ %def balancer_base_add_state
+@
+<<Balancer base: base: TBP>>=
+  procedure, private :: link_worker_and_state => balancer_base_link_worker_and_state
+<<Balancer base: procedures>>=
   subroutine balancer_base_link_worker_and_state (balancer)
     class(balancer_base_t), intent(inout) :: balancer
     integer :: i, j, i_worker
     if (.not. allocated (balancer%state)) &
          call msg_bug ("Error: resource state not allocated.")
     !! Link worker to a state.
     i_worker = 1
     do i = 1, balancer%n_states
        do j = 1, balancer%state(i)%n_workers
           if (i_worker > balancer%n_workers) then
              call msg_bug ("Balancer: Number of state workers&
                   & exceeding global number of workers")
           end if
           associate (worker => balancer%worker(i_worker))
             worker%state = i
             !! Reset worker attributes.
             worker%resource = 0
             worker%n_resources = 0
             worker%assigned = .false.
           end associate
           i_worker = i_worker + 1
        end do
     end do
   end subroutine balancer_base_link_worker_and_state
 
+@ %def balancer_base_link_worker_and_state
+@
+<<Balancer base: base: TBP>>=
+  procedure :: is_assignable => balancer_base_is_assignable
+<<Balancer base: procedures>>=
   !> Is a worker unassigned, or is a worker assigned, but already assigned to an active resource?
   !!
   !! \note Fence for assign_worker TBP.
   !! The assign_worker TPB must call is_assignable in order to retrieve the worker status.
   !! \param[in] worker_id
   !! \return flag If worker is NOT assigned, return .true. if state has resources.
   !!              If worker is assigned, return .true. if associated resource is active.
   pure logical function balancer_base_is_assignable (balancer, worker_id) result (flag)
     class(balancer_base_t), intent(in) :: balancer
     integer, intent(in) :: worker_id
     integer :: i_state, resource_id
     flag = .false.
     if (balancer%worker(worker_id)%assigned) then
        resource_id = balancer%worker(worker_id)%resource
        flag = balancer%resource(resource_id)%is_active ()
     else
        i_state = balancer%worker(worker_id)%get_state ()
        flag = balancer%state(i_state)%has_resource ()
     end if
   end function balancer_base_is_assignable
 
+@ %def balancer_base_is_assignable
+@
+<<Balancer base: base: TBP>>=
+  procedure :: is_worker_pending => balancer_base_is_worker_pending
+<<Balancer base: procedures>>=
   !> Is a worker still pending.
   !!
   !! Test worker assignment, and if there is a (valid) resource and if it is still active.
   pure logical function balancer_base_is_worker_pending (balancer, worker_id) result (flag)
     class(balancer_base_t), intent(in) :: balancer
     integer, intent(in) :: worker_id
     integer :: resource_id
     flag = balancer%worker(worker_id)%assigned
     if (flag) then
        resource_id = balancer%worker(worker_id)%get_resource ()
        flag = balancer%resource(resource_id)%is_active ()
     end if
   end function balancer_base_is_worker_pending
 
+@ %def balancer_base_is_worker_pending
+@
+<<Balancer base: base: TBP>>=
+  procedure :: is_pending => balancer_base_is_pending
+  procedure(balancer_base_has_resource_group), deferred :: has_resource_group
+  procedure(balancer_base_get_resource_group), deferred :: get_resource_group
+  procedure(balancer_base_get_resource_master), deferred :: get_resource_master
+  procedure(balancer_base_assign_worker), deferred :: assign_worker
+  procedure(balancer_base_free_worker), deferred :: free_worker
+<<Balancer base: procedures>>=
   logical function balancer_base_is_pending (balancer) result (flag)
     class(balancer_base_t), intent(in) :: balancer
     flag = all (balancer%state%has_resource ())
   end function balancer_base_is_pending
 
-end module balancer_base
-@ %def balancer_base
-@ \subsection{Balancer Simple}
+@ %def balancer_base_is_pending
+@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Balancer Simple}
 <<[[balancer_simple.f90]]>>=
 <<File header>>
 module balancer_simple
   use io_units
   use diagnostics
 
   use balancer_base
 
-  implicit none
+<<Standard module head>>
 
-  private
-
-  integer, parameter :: N_BALANCER_SIMPLE_STATES = 1, &
-       BALANCER_SIMPLE_CHANNEL = 1
+<<Balancer simple: parameters>>
 
   !> Simple balancer.
   !!
   !! The simple balancer distribute the channel among the N workers using a modulo prescription.
   !! However, it does assign all workers to a channel capable of grid with parallelizable structure.
   !!
   !! The balancer use local, non-communicative approach; each worker allocates an own instance of the balancer
   !! and fills it with the respecting resources.
   !!
   !! We defer possible checks (or sentinels) to the request module, e.g. such as checking whether all channels are computed globally.
+
+<<Balancer simple: public>>
+
+<<Balancer simple: types>>
+
+contains
+
+<<Balancer simple: procedures>>
+
+end module balancer_simple
+@ %def balancer_simple
+@
+<<Balancer simple: parameters>>=
+  integer, parameter :: N_BALANCER_SIMPLE_STATES = 1, &
+       BALANCER_SIMPLE_CHANNEL = 1
+@ 
+<<Balancer simple: public>>=
+  public :: balancer_simple_t
+<<Balancer simple: types>>=
   type, extends (balancer_base_t) :: balancer_simple_t
      logical, dimension(:), allocatable :: parallel_grid
    contains
-     procedure :: init => balancer_simple_init
-     procedure :: write => balancer_simple_write
-     procedure :: update_state => balancer_simple_update_state
-     procedure :: has_resource_group => balancer_simple_has_resource_group
-     procedure :: get_resource_group => balancer_simple_get_resource_group
-     procedure :: get_resource_master => balancer_simple_get_resource_master
-     procedure, private :: map_channel_to_worker => balancer_simple_map_channel_to_worker
-     procedure :: assign_worker => balancer_simple_assign_worker
-     procedure :: free_worker => balancer_simple_free_worker
+   <<Balancer simple: simple: TBP>>
   end type balancer_simple_t
 
-  public :: balancer_simple_t
-contains
+@ %def balancer_simple_t
+@
+<<Balancer simple: simple: TBP>>=
+  procedure :: init => balancer_simple_init
+<<Balancer simple: procedures>>=
   subroutine balancer_simple_init (balancer, n_workers, n_resources)
     class(balancer_simple_t), intent(out) :: balancer
     integer, intent(in) :: n_workers
     integer, intent(in) :: n_resources
     type(resource_state_t), dimension(:), allocatable :: state
     call balancer%base_init (n_workers, n_resources)
     allocate (balancer%parallel_grid(n_resources), source = .false.)
     allocate (state (N_BALANCER_SIMPLE_STATES))
     call state(BALANCER_SIMPLE_CHANNEL)%init ( &
          mode = STATE_SINGLE, &
          n_workers = balancer%n_workers)
     call balancer%add_state (state)
   end subroutine balancer_simple_init
 
+@ %def balancer_simple_init
+@
+<<Balancer simple: simple: TBP>>=
+  procedure :: write => balancer_simple_write
+<<Balancer simple: procedures>>=
   subroutine balancer_simple_write (balancer, unit)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in), optional :: unit
     integer :: u, n_size
     u = given_output_unit (unit)
     call balancer%base_write (u)
     n_size = min (25, size (balancer%parallel_grid))
     write (u, "(A,25(1X,L1))") "Parallel Grids:", balancer%parallel_grid(:n_size)
   end subroutine balancer_simple_write
 
+@ %def balancer_simple_write
+@
+<<Balancer simple: simple: TBP>>=
+  procedure :: update_state => balancer_simple_update_state
+<<Balancer simple: procedures>>=
   !> Update balancer state.
   !!
   !! Each worker update its own balancer state requiring information about the worker_id.
   subroutine balancer_simple_update_state (balancer, worker_id, parallel_grid)
     class(balancer_simple_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     logical, dimension(:), intent(in) :: parallel_grid
     integer :: ch, worker
     balancer%parallel_grid = parallel_grid
     if (.not. allocated (balancer%state)) then
        call msg_bug ("Error: balancer state not allocated.")
     end if
     associate (state => balancer%state(BALANCER_SIMPLE_CHANNEL))
       call state%clear ()
       do ch = 1, balancer%n_resources
          if (parallel_grid(ch)) then
             call state%add_resource (ch)
          else
             worker = balancer%map_channel_to_worker (ch)
             if (worker == worker_id) then
                call state%add_resource (ch)
             end if
          end if
       end do
       call state%freeze ()
     end associate
   end subroutine balancer_simple_update_state
 
+@ %def balancer_simple_update_state
+@
+<<Balancer simple: simple: TBP>>=
+  procedure :: has_resource_group => balancer_simple_has_resource_group
+<<Balancer simple: procedures>>=
   pure logical function balancer_simple_has_resource_group (balancer, resource_id) &
        result (flag)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     if (.not. balancer%resource(resource_id)%is_active ()) then
        flag = .false.
        return
     end if
     flag = balancer%parallel_grid (resource_id)
   end function balancer_simple_has_resource_group
 
+@ %def balancer_simple_has_resource_group
+@
+<<Balancer simple: simple: TBP>>=
+  procedure :: get_resource_group => balancer_simple_get_resource_group
+<<Balancer simple: procedures>>=
   pure subroutine balancer_simple_get_resource_group (balancer, resource_id, group)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     integer, dimension(:), allocatable, intent(out) :: group
     integer :: i
     if (.not. balancer%has_resource_group (resource_id)) return
     group = pack ([(i, i=1,balancer%n_workers)], &
          mask = balancer%worker%get_resource () == resource_id)
   end subroutine balancer_simple_get_resource_group
 
+@ %def balancer_simple_get_resource_group
+@
+<<Balancer simple: simple: TBP>>=
+  procedure :: get_resource_master => balancer_simple_get_resource_master
+<<Balancer simple: procedures>>=
   !> Retrieve resource master holding the results to be communicated.
   !!
   !! As the simple balancer operates locally on each worker, we do not need to check whether a resource is currently active.
   !! All the resources (and their respective order) is fixed at each update of the balancer.
   pure integer function balancer_simple_get_resource_master (balancer, resource_id) &
        result (worker_id)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     !! \note Do NOT check on resource activation (see interface prescription).
     if (balancer%parallel_grid(resource_id)) then
        worker_id = 1
     else
        worker_id = balancer%map_channel_to_worker (resource_id)
     end if
   end function balancer_simple_get_resource_master
 
+@ %def balancer_simple_get_resource_master
+@
+<<Balancer simple: simple: TBP>>=
+  procedure, private :: map_channel_to_worker => balancer_simple_map_channel_to_worker
+<<Balancer simple: procedures>>=
   pure integer function balancer_simple_map_channel_to_worker (balancer, channel) &
        result (worker)
     class(balancer_simple_t), intent(in) :: balancer
     integer, intent(in) :: channel
     !! Proof: channel \in {1, N_c}, number of workers N, rank \in {0, …, N - 1}
     !! Proof: worker \in {1, …, N}
     !! a = b mod c, then 0 <= a < c
     worker = mod (channel - 1, balancer%n_workers) + 1
   end function balancer_simple_map_channel_to_worker
 
+@ %def balancer_simple_map_channel_to_worker
+@
+<<Balancer simple: simple: TBP>>=
+  procedure :: assign_worker => balancer_simple_assign_worker
+<<Balancer simple: procedures>>=
   subroutine balancer_simple_assign_worker (balancer, worker_id, resource_id)
     class(balancer_simple_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     integer, intent(out) :: resource_id
     integer :: i
     if (.not. balancer%is_assignable (worker_id)) then
        resource_id = -1
        RETURN
     end if
     if (balancer%worker(worker_id)%is_assigned ()) then
        resource_id = balancer%worker(worker_id)%get_resource ()
        RETURN
     end if
     associate (state => balancer%state(BALANCER_SIMPLE_CHANNEL))
       if (.not. state%has_resource ()) then
          resource_id = 0
          return
       end if
       resource_id = state%assign_resource ()
       if (balancer%parallel_grid(resource_id)) then
          do i = 1, balancer%n_workers
             if (balancer%is_worker_pending (i)) then
                write (msg_buffer, "(A,1X,I0,1X,A,1X,I0,1X,A)") "WORKER", i, "ASSIGNED"
                call msg_bug ()
             end if
             call balancer%worker(i)%add_resource (resource_id)
          end do
          call balancer%resource(resource_id)%set_active (n_workers = balancer%n_workers)
       else
          call balancer%worker(worker_id)%add_resource (resource_id)
          call balancer%resource(resource_id)%set_active (n_workers = 1)
       end if
     end associate
   end subroutine balancer_simple_assign_worker
 
+@ %def balancer_simple_assign_worker
+@
+<<Balancer simple: simple: TBP>>=
+  procedure :: free_worker => balancer_simple_free_worker
+<<Balancer simple: procedures>>=
   subroutine balancer_simple_free_worker (balancer, worker_id, resource_id)
     class(balancer_simple_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     integer, intent(in) :: resource_id
     integer :: i
     if (.not. balancer%worker(worker_id)%is_assigned ()) return
     if (.not. resource_id == balancer%worker(worker_id)%get_resource ()) then
        call msg_bug ("Balancer simple: resource and associated resource do not match.")
     end if
     associate (state => balancer%state(BALANCER_SIMPLE_CHANNEL))
       call balancer%resource(resource_id)%set_inactive ()
       call state%free_resource (resource_id)
       if (balancer%parallel_grid(resource_id)) then
          do i = 1, balancer%n_workers
             call balancer%worker(i)%free ()
          end do
       else
          call balancer%worker(worker_id)%free ()
       end if
     end associate
   end subroutine balancer_simple_free_worker
-end module balancer_simple
-@ %def balancer_simple
-@ \subsection{Balancer Channel}
+
+@ %def balancer_simple_free_worker
+@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Balancer Channel}
 <<[[balancer_channel.f90]]>>=
 <<File header>>
 module balancer_channel
   use kinds, only: default
   use io_units
   use diagnostics
 
   use balancer_base
 
-  implicit none
+<<Standard module head>>
+
+<<Balancer channel: public>>
 
-  private
+<<Balancer channel: parameters>>
 
+<<Balancer channel: types>>
+
+contains
+
+<<Balancer channel: procedures>>
+
+end module balancer_channel
+@ %def balancer_channel
+@
+<<Balancer channel: parameters>>=
   real(default), parameter :: BETA = 1.5_default
 
   integer, parameter :: N_BALANCER_CHANNEL_STATE = 2, &
        CHANNEL_STATE = 1, &
        GRID_STATE = 2
 
+@
+<<Balancer channel: public>>=
+  public :: balancer_channel_t
+<<Balancer channel: types>>=
   type, extends(balancer_base_t) :: balancer_channel_t
      private
      integer :: n_parallel_grids = 0
      integer :: n_parallel_channels = 0
      integer :: n_grid_workers = 0
      integer :: n_channel_workers = 0
      logical, dimension(:), allocatable :: parallel_grid
    contains
-     procedure :: init => balancer_channel_init
-     procedure :: write => balancer_channel_write
-     procedure :: update_state => balancer_channel_update_state
-     procedure :: has_resource_group => balancer_channel_has_resource_group
-     procedure :: get_resource_group => balancer_channel_get_resource_group
-     procedure :: get_resource_master => balancer_channel_get_resource_master
-     procedure :: assign_worker => balancer_channel_assign_worker
-     procedure :: free_worker => balancer_channel_free_worker
+   <<Balancer channel: base: TBP>>
   end type balancer_channel_t
 
-  public :: balancer_channel_t
-contains
+@ %def balancer_channel_t
+@
+<<Balancer channel: base: TBP>>=
+  procedure :: init => balancer_channel_init
+<<Balancer channel: procedures>>=
   subroutine balancer_channel_init (balancer, n_workers, n_resources)
     class(balancer_channel_t), intent(out), target :: balancer
     integer, intent(in) :: n_workers
     integer, intent(in) :: n_resources
     call balancer%base_init (n_workers, n_resources)
     allocate (balancer%parallel_grid(n_resources), source = .false.)
   end subroutine balancer_channel_init
 
+@ %def balancer_channel_init
+@
+<<Balancer channel: base: TBP>>=
+  procedure :: write => balancer_channel_write
+<<Balancer channel: procedures>>=
   subroutine balancer_channel_write (balancer, unit)
     class(balancer_channel_t), intent(in) :: balancer
     integer, intent(in), optional :: unit
     integer :: u, n_size
     u = given_output_unit (unit)
     write (u, "(A)") "Channel Balancer."
     write (u, "(A,1X,I3)") "Parallel grids: ", balancer%n_parallel_grids
     write (u, "(A,1X,I3)") "Parallel channels: ", balancer%n_parallel_channels
     write (u, "(A,1X,I3)") "Grid workers: ", balancer%n_grid_workers
     write (u, "(A,1X,I3)") "Channel workers: ", balancer%n_channel_workers
     n_size = min (25, size (balancer%parallel_grid))
     write (u, "(A,25(1X,L1))") "Parallel Grids:", balancer%parallel_grid(:n_size)
     call balancer%base_write (u)
   end subroutine balancer_channel_write
 
+@ %def balancer_channel_write
+@
+<<Balancer channel: base: TBP>>=
+  procedure :: update_state => balancer_channel_update_state
+<<Balancer channel: procedures>>=
   subroutine balancer_channel_update_state (balancer, weight, parallel_grid)
     class(balancer_channel_t), intent(inout) :: balancer
     real(default), dimension(:), intent(in) :: weight
     logical, dimension(:), intent(in) :: parallel_grid
     real(default) :: min_parallel_weight
     balancer%parallel_grid = parallel_grid
     min_parallel_weight = balancer%n_resources**(1._default - 1_default / BETA) &
          / balancer%n_workers**BETA
     balancer%parallel_grid = &
          balancer%parallel_grid .and. (weight >= min_parallel_weight)
     if (balancer%n_resources >= balancer%n_workers) then
        !! Apply full multi-channel parallelization.
        balancer%n_parallel_grids = 0
        balancer%n_parallel_channels = balancer%n_resources
        balancer%parallel_grid = .false.
        balancer%n_grid_workers = 0
        balancer%n_channel_workers = balancer%n_workers
     else
        if (count (balancer%parallel_grid) == balancer%n_resources) then
           !! Apply full VEGAS parallelization.
           balancer%n_parallel_grids = balancer%n_resources
           balancer%n_parallel_channels = 0
           balancer%n_grid_workers = balancer%n_workers
           balancer%n_channel_workers = 0
        else
           !! Apply mixed mode.
           balancer%n_parallel_grids = count (balancer%parallel_grid)
           balancer%n_parallel_channels = balancer%n_resources - balancer%n_parallel_grids
           call compute_mixed_mode (weight)
        end if
     end if
     if(allocated (balancer%state)) then
        deallocate (balancer%state)
     end if
     call allocate_state ()
   contains
     subroutine compute_mixed_mode (weight)
       real(default), dimension(:), intent(in) :: weight
       real(default) :: weight_parallel_grids, &
            ratio_weight, &
            ratio_n_channels, &
            ratio
       !! Apply mixed mode.
       weight_parallel_grids = sum (weight, balancer%parallel_grid)
       !! Overall normalization of weight, \f$\alpha_{\text{grids}} +
       !! \alpha_{\text{channels}} = 1\f$.
       !! \f$\alpha_{\text{channels}} = 1 - \alpha_{\text{grids}}\f$
       ratio_weight = weight_parallel_grids / (1 - weight_parallel_grids)
       ratio_n_channels = real (balancer%n_parallel_grids, default) &
            / (balancer%n_resources - balancer%n_parallel_grids)
       !! The average computation of channel is proportional to its weight.
       !! Reweight number of channels (per mode) by their summed weights.
       !! R = w * N / (w * N + w' * N'); primed refer to parallel grid entities.
       !!   = 1 / (1 + w' / w * N' / N)
       ratio = 1 / (1  + ratio_weight * ratio_n_channels)
       ratio = min (max (ratio, 0.0_default), 1.0_default) !! Safe-guard ratio computation.
       !! In the case of small numbers of workers and a very small ratio,
       !! nint can assign no worker to channel/grid parallelization,
       !! which is still requested by n_parallel_channels/grids.
       !! In that case, we have to enforce: n_worker = n_channel_worker + n_grid_worker
       balancer%n_channel_workers = nint (ratio * balancer%n_workers)
       balancer%n_grid_workers = nint ((1 - ratio) * balancer%n_workers)
       !! In the case of small numbers of workers and a very small ratio,
       !! nint can assign no worker to channel/grid parallelization,
       !! which is still requested by n_parallel_channels/grids.
       !! In that case, we have to enforce: n_worker = n_channel_worker + n_grid_worker
       if (balancer%n_workers >= 2 &
            .AND. (balancer%n_parallel_channels > 0 .and. balancer%n_channel_workers < 1)) then
          balancer%n_channel_workers = 1
          balancer%n_grid_workers = balancer%n_grid_workers - 1
       end if
       !! The grid resources will only be increased to N = 2
       !! if more than 3 workers are present.
       if (balancer%n_workers >= 3 &
            .AND. (balancer%n_parallel_grids > 0 .and. balancer%n_grid_workers < 2)) then
          balancer%n_grid_workers = 2
          balancer%n_channel_workers = balancer%n_channel_workers - 2
       end if
     end subroutine compute_mixed_mode
 
     subroutine allocate_state ()
       type(resource_state_t), dimension(:), allocatable :: state
       integer :: ch
       allocate (state(N_BALANCER_CHANNEL_STATE))
       call state(CHANNEL_STATE)%init ( &
            mode = STATE_SINGLE, &
            n_workers = balancer%n_channel_workers)
       call state(GRID_STATE)%init ( &
            mode = STATE_ALL, &
            n_workers = balancer%n_grid_workers)
       do ch = 1, balancer%n_resources
          if (balancer%parallel_grid(ch)) then
             call state(GRID_STATE)%add_resource (ch)
          else
             call state(CHANNEL_STATE)%add_resource (ch)
          end if
       end do
       call state(CHANNEL_STATE)%freeze ()
       call state(GRID_STATE)%freeze ()
       call balancer%add_state (state)
     end subroutine allocate_state
   end subroutine balancer_channel_update_state
 
+@ %def balancer_channel_update_state
+@
+<<Balancer channel: base: TBP>>=
+  procedure :: has_resource_group => balancer_channel_has_resource_group
+<<Balancer channel: procedures>>=
   pure function balancer_channel_has_resource_group (balancer, resource_id) &
        result (flag)
     class(balancer_channel_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     logical :: flag
     if (.not. balancer%resource(resource_id)%is_active ()) then
        flag = .false.
        return
     end if
     flag = balancer%parallel_grid(resource_id)
   end function balancer_channel_has_resource_group
 
+@ %def balancer_channel_has_resource_group
+@
+<<Balancer channel: base: TBP>>=
+  procedure :: get_resource_group => balancer_channel_get_resource_group
+<<Balancer channel: procedures>>=
   pure subroutine balancer_channel_get_resource_group (balancer, resource_id, group)
     class(balancer_channel_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     integer, dimension(:), allocatable, intent(out) :: group
     integer :: i
     if (.not. balancer%has_resource_group (resource_id)) return
     group = pack ([(i, i = 1, balancer%n_workers)], &
          mask = balancer%worker%get_resource () == resource_id)
   end subroutine balancer_channel_get_resource_group
 
+@ %def balancer_channel_get_resource_group
+@
+<<Balancer channel: base: TBP>>=
+  procedure :: get_resource_master => balancer_channel_get_resource_master
+<<Balancer channel: procedures>>=
   pure integer function balancer_channel_get_resource_master (balancer, resource_id) &
        result (worker_id)
     class(balancer_channel_t), intent(in) :: balancer
     integer, intent(in) :: resource_id
     integer :: i
     if (.not. balancer%resource(resource_id)%is_active ()) then
        worker_id = -1
        return
     end if
     !! Linear search.
     !! First element in worker group is defined as master.
     associate (worker => balancer%worker)
       do i = 1, balancer%n_workers
          if (worker(i)%get_resource () == resource_id) then
             worker_id = i
             exit
          end if
       end do
     end associate
   end function balancer_channel_get_resource_master
 
+@ %def balancer_channel_get_resource_master
+@
+<<Balancer channel: base: TBP>>=
+  procedure :: assign_worker => balancer_channel_assign_worker
+<<Balancer channel: procedures>>=
   subroutine balancer_channel_assign_worker (balancer, worker_id, resource_id)
     class(balancer_channel_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     integer, intent(out) :: resource_id
     integer :: i_state
     if (.not. balancer%is_assignable (worker_id)) then
        resource_id = -1
        return
     end if
     if (balancer%worker(worker_id)%is_assigned ()) then
        resource_id = balancer%worker(worker_id)%get_resource ()
        return
     end if
     associate (state => balancer%state)
       i_state = balancer%worker(worker_id)%get_state ()
       if (.not. state(i_state)%has_resource ()) then
          resource_id = 0
          return
       end if
       resource_id = state(i_state)%assign_resource ()
       select case (state(i_state)%mode)
       case (STATE_SINGLE)
          call balancer%worker(worker_id)%add_resource (resource_id)
          call balancer%resource(resource_id)%set_active (n_workers = 1)
       case (STATE_ALL)
          call fill_resource_group (i_state, resource_id)
       end select
     end associate
   contains
     subroutine fill_resource_group (i_state, resource_id)
       integer, intent(in) :: i_state
       integer, intent(in) :: resource_id
       integer :: i, n_workers
       n_workers = 0
       do i = 1, balancer%n_workers
          if (.not. balancer%worker(i)%get_state () == i_state) cycle
          if (balancer%is_worker_pending (i)) then
             write (msg_buffer, "(A,1X,I0,1X,A,1X,I0,1X,A)") "WORKER", i, "STATE", i_state, "ASSIGNED"
             call msg_bug ()
          end if
          call balancer%worker(i)%add_resource (resource_id)
          n_workers = n_workers + 1
       end do
       if (n_workers /= balancer%state(i_state)%n_workers) then
          call msg_bug ("Number of assigned workers unequal to number of state workers.")
       end if
       call balancer%resource(resource_id)%set_active (n_workers = n_workers)
     end subroutine fill_resource_group
   end subroutine balancer_channel_assign_worker
 
-  !> Free worker from associated resource.
-  !!
-  !! Idempotent. Depending on state association, given resource must equal worker's resource (check) for single state.
-  !! For all state, the *current* resource of the worker may differ (grouping behavior!), only in case, that the older resource is inactive, return as idempotent.
-  !! Else, free all worker from resource group.
+@ %def balancer_channel_assign_worker
+@  
+Free worker from associated resource. Idempotent. Depending on state
+association, given resource must equal worker's resource (check) for
+single state. For all state, the *current* resource of the worker may
+differ (grouping behavior!), only in case, that the older resource is
+inactive, return as idempotent. Else, free all worker from resource
+group.
+<<Balancer channel: base: TBP>>=
+  procedure :: free_worker => balancer_channel_free_worker
+<<Balancer channel: procedures>>=
   subroutine balancer_channel_free_worker (balancer, worker_id, resource_id)
     class(balancer_channel_t), intent(inout) :: balancer
     integer, intent(in) :: worker_id
     integer, intent(in) :: resource_id
     integer :: i, i_state
     if (.not. balancer%worker(worker_id)%is_assigned ()) return
     associate (state => balancer%state)
       i_state = balancer%worker(worker_id)%get_state ()
       select case (state(i_state)%mode)
       case (STATE_SINGLE)
          if (.not. resource_id == balancer%worker(worker_id)%get_resource ()) then
             call msg_bug ("Channel balancer: resource and associated resource do not match.")
          end if
          call balancer%resource(resource_id)%set_inactive ()
          call state(i_state)%free_resource (resource_id)
          call balancer%worker(worker_id)%free ()
       case (STATE_ALL)
          if (resource_id /= balancer%worker(worker_id)%get_resource ()) then
             if (balancer%resource(resource_id)%is_active ()) then
                msg_buffer = "Channel balancer: resource is still active,&
                     & but worker is assigned to another resource."
                call msg_bug ()
             else
                !! Special case: Worker was already freed from (now inactive) resource_id (by another call to free_worker),
                !! and in the mean time assigned to a new resource. So, nothing to do.
                return
             end if
          end if
          call balancer%resource(resource_id)%set_inactive ()
          call state(i_state)%free_resource (resource_id)
          do i = 1, balancer%n_workers
             if (.not. balancer%worker(i)%get_state () == i_state) cycle
             call balancer%worker(i)%free ()
          end do
       end select
     end associate
   end subroutine balancer_channel_free_worker
-end module balancer_channel
-@ %def balancer_channel
-@ \subsection{Request Callback}
+
+@ %def balancer_channel_free_worker
+@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Request Callback}
+
 \begin{description}
-\item[request handler] Base type and interface for providing two-sided communication in a callback fashion.
+\item[request handler] Base type and interface for providing two-sided
+  communication in a callback fashion. 
 \item[request handler manager] Manages handler with a binary tree.
 \end{description}
 <<[[request_callback.f90]]>>=
 module request_callback
   use, intrinsic :: iso_fortran_env, only: ERROR_UNIT
   use binary_tree
   use diagnostics
 
   use mpi_f08 !NODEP!
 
-  implicit none
-
-  private
+<<Standard module head>>
 
   !> Request handler.
   !!
   !! A request handler allows to dispatch an object for communication a priori
   !! for a slave and a master.
   !! Typically, each slave register its own request handles, whereas the master
   !! requests all possible requests handles matching those of the slaves.
   !! The requests can then be used later on, e.g. during a computation, a slave
   !! may add a request to the master worker and starts sending an object.
   !! The master worker looks up the appropriate request handler, which then
   !! closes the communication, i.e. by a receive call.
   !! Most important: Only a pointer to the buffer object is stored, therefore, the calling function
   !! has to ensure, that the communication object (buffer) will not be changed
   !! during a request handle (receive or send).
   !!
   !! Remark: The handler allows for a tag offset which allows to uniqify the the communication.
   !!         The problem occurs when multiple callback need to handled simultanously and MPI needs to connect the communication calls accordingly.
   !!         Each message has a tuple of (source, tag, comm) associated, we can uniquify this tuple by a unique tag.
   !!         tag = tag_offset + {1, …, N_requests} where tag_offsets are multiple of N_requests.
   !!         The latter condition should checked by a modulo.
   !! What happens if the communication is out of order (is this a problem? check with standard)?
+
+<<Request callback: public>>
+
+<<Request callback: types>>
+
+<<Request callback: interfaces>>
+
+contains
+
+<<Request callback: procedures>>
+
+end module request_callback
+@ %def request_callback
+@
+<<Request callback: public>>=
+  public :: request_handler_t
+<<Request callback: types>>=
   type, abstract :: request_handler_t
      integer :: n_requests = 0
      integer :: tag_offset = 0
      type(MPI_REQUEST), dimension(:), allocatable :: request
      type(MPI_STATUS), dimension(:), allocatable :: status
      logical :: activated = .false.
      logical :: finished = .false.
    contains
      procedure :: base_write => request_handler_base_write
      procedure(request_handler_write), deferred :: write
      !! \todo{sbrass} implement initialization procedure.
      procedure(request_handler_handle), deferred :: handle
      procedure(request_handler_client_handle), deferred :: client_handle
      procedure :: allocate => request_handler_allocate
      procedure :: get_status => request_handler_get_status
      procedure :: testall => request_handler_testall
      procedure :: waitall => request_handler_waitall
      procedure :: free => request_handler_free
   end type request_handler_t
 
+@ %def request_handler_t
+@
+<<Request callback: public>>=
+  public :: request_handler_manager_t
+<<Request callback: types>>=
   type :: request_handler_manager_t
      private
      type(MPI_COMM) :: comm
      type(binary_tree_t) :: tree
    contains
-     procedure :: init => request_handler_manager_init
-     procedure :: write => request_handler_manager_write
-     procedure :: add => request_handler_manager_add
-     procedure :: clear => request_handler_manager_clear
-     procedure :: has_handler => request_handler_manager_has_handler
-     procedure :: test => request_handler_manager_test
-     procedure :: wait => request_handler_manager_wait
-     procedure :: waitall => request_handler_manager_waitall
-     procedure, private :: fill_status => request_handler_manager_fill_status
-     procedure, private :: handler_at => request_handler_manager_handler_at
-     procedure :: callback => request_handler_manager_callback
-     procedure :: client_callback => request_handler_manager_client_callback
+   <<Request handler: handler manager: TBP>>
   end type request_handler_manager_t
 
+@ %def request_handler_manager_t
+@
+<<Request callback: interfaces>>=
   abstract interface
      subroutine request_handler_write (handler, unit)
        import :: request_handler_t
        class(request_handler_t), intent(in) :: handler
        integer, intent(in), optional :: unit
      end subroutine request_handler_write
+  end interface
 
+@ %def request_handler_write
+@
+<<Request callback: interfaces>>=
+  abstract interface
      !> Handle a request from server side.
      !!
      !! The message tag can be used in order to uniquify the respective messages between master and slave.
      !! E.g. by explicitly setting it, or by using it in a computation i * N_R + j, i \in {1, …, N} and j \in {1, …, N_R}.
      !!
      !! Must set *activated* to .true. when called.
      !! \param[in] source Integer rank of the source in comm.
      !! \param[in] tag Specify the message tag.
      !! \param[in] comm MPI communicator.
      subroutine request_handler_handle (handler, source_rank, comm)
        import :: request_handler_t, MPI_COMM
        class(request_handler_t), intent(inout) :: handler
        integer, intent(in) :: source_rank
        type(MPI_COMM), intent(in) :: comm
      end subroutine request_handler_handle
+  end interface
 
+@ %def request_handler_handle
+@
+<<Request callback: interfaces>>=
+  abstract interface
      !> Handle a request from client side.
      !!
      !! Must set *activated* to .true. when called.
      !! \param[in] rank Integer of the receiver in comm.
      !! \param[in] tag Specify the message tag.
      !! \param[in] comm MPI communicator.
      subroutine request_handler_client_handle (handler, dest_rank, comm)
        import :: request_handler_t, MPI_COMM
        class(request_handler_t), intent(inout) :: handler
        integer, intent(in) :: dest_rank
        type(MPI_COMM), intent(in) :: comm
      end subroutine request_handler_client_handle
   end interface
 
-  public :: request_handler_t, request_handler_manager_t
-contains
+@ %def request_handler_client_handle
+@
+<<Request callback: procedures>>=
   !!
   !! Request handler.
   !!
   subroutine request_handler_base_write (handler, unit)
     class(request_handler_t), intent(in) :: handler
     integer, intent(in), optional :: unit
     integer :: u, i
     u = ERROR_UNIT; if (present (unit)) u = unit
     write (u, "(A,1X,I0)") "N_REQUESTS", handler%n_requests
     write (u, "(A,1X,I0)") "TAG_OFFSET", handler%tag_offset
     write (u, "(A,1X,L1)") "FINISHED", handler%finished
     write (u, "(A,1X,L1)") "ACTIVATED", handler%activated
     write (u, "(A)") "I | SOURCE | TAG | ERROR | REQUEST_NULL"
     do i = 1, handler%n_requests
        write (u, "(A,4(1X,I0),1X,L1)") "REQUEST", i, &
             handler%status(i)%MPI_SOURCE, &
             handler%status(i)%MPI_TAG, &
             handler%status(i)%MPI_ERROR, &
             (handler%request(i) == MPI_REQUEST_NULL)
     end do
   end subroutine request_handler_base_write
 
+@ %def request_handler_base_write
+@
+<<Request callback: procedures>>=
   !> Allocate MPI request and status object.
   !!
   !! Must be called during or after object-initialization.
   !!
   !! \param[inout] handler Handler must be intent inout, as the calling function may already manipulated the extended object.
   !! \param[in] n_requests Number of MPI requests the objects needs to be able to handle.
   !! \param[in] tag_offset First tag to be used, all other must follow in an increasing manner until tag_offset + (N_r + 1).
   !! Proof: tag \in {tag_offset, tag_offset + n_requests}.
   subroutine request_handler_allocate (handler, n_requests, tag_offset)
     class(request_handler_t), intent(inout) :: handler
     integer, intent(in) :: n_requests
     integer, intent(in) :: tag_offset
     allocate (handler%request(n_requests), source = MPI_REQUEST_NULL)
     allocate (handler%status(n_requests))
     handler%n_requests = n_requests
     if (mod (tag_offset, n_requests) /= 0) &
          call msg_bug ("Error during handler allocate, tag_offset is not a multiple of n_requests.")
     !! What is the max.-allowed MPI_TAG?
     handler%tag_offset = tag_offset
     handler%activated = .false.
     handler%finished = .false.
   end subroutine request_handler_allocate
 
+@ %def request_handler_allocate
+@
+<<Request callback: procedures>>=
   !> Get status from request objects in a non-destructive way.
   subroutine request_handler_get_status (handler)
     class(request_handler_t), intent(inout) :: handler
     integer :: i
     logical :: flag
     if (.not. handler%activated) return
     handler%finished = .true.
     do i = 1, handler%n_requests
        call MPI_REQUEST_GET_STATUS (handler%request(i), flag, &
             handler%status(i))
        handler%finished = handler%finished .and. flag
     end do
   end subroutine request_handler_get_status
 
+@ %def request_handler_get_status
+@
+<<Request callback: procedures>>=
   !> Call MPI_WATIALL and raise finished flag.
   subroutine request_handler_waitall (handler)
     class(request_handler_t), intent(inout) :: handler
     integer :: error
     if (.not. handler%activated .or. handler%finished) return
     call MPI_WAITALL (handler%n_requests, handler%request, handler%status, error)
     if (error /= 0) then
        call msg_bug ("Request: Error occured during waitall on handler.")
     end if
     handler%finished = .true.
   end subroutine request_handler_waitall
 
+@ %def request_handler_waitall
+@
+<<Request callback: procedures>>=
   logical function request_handler_testall (handler) result (flag)
     class(request_handler_t), intent(inout) :: handler
     integer :: error
     if (.not. handler%activated .or. .not. handler%finished) then
        call MPI_TESTALL (handler%n_requests, handler%request, handler%finished, &
             handler%status, error)
        ! call print_status ()
        if (error /= 0) then
           call msg_bug ("Request: Error occured during testall on handler.")
        end if
     end if
     flag = handler%finished
   contains
     subroutine print_status ()
       integer :: i
       do i = 1, handler%n_requests
          associate (status => handler%status(i))
            write (ERROR_UNIT, *) status%MPI_SOURCE, status%MPI_TAG, status%MPI_ERROR
          end associate
       end do
     end subroutine print_status
   end function request_handler_testall
 
+@ %def request_handler_testall
+@
+<<Request callback: procedures>>=
   subroutine request_handler_free (handler)
     class(request_handler_t), intent(inout) :: handler
     integer :: i, error
     do i = 1, handler%n_requests
        if (handler%request(i) == MPI_REQUEST_NULL) cycle
        call MPI_REQUEST_FREE (handler%request(i), error)
        if (error /= 0) then
           call msg_bug ("Request: Error occured during free request on handler.")
        end if
     end do
   end subroutine request_handler_free
 
+@ %def request_handler_free
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: init => request_handler_manager_init
+<<Request callback: procedures>>=
   !!
   !! Request handler manager.
   !!
   subroutine request_handler_manager_init (rhm, comm)
     class(request_handler_manager_t), intent(out) :: rhm
     type(MPI_COMM), intent(in) :: comm
     call MPI_COMM_DUP (comm, rhm%comm)
   end subroutine request_handler_manager_init
 
+@ %def request_handler_manager_init 
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: write => request_handler_manager_write
+<<Request callback: procedures>>=
   subroutine request_handler_manager_write (rhm, unit)
     class(request_handler_manager_t), intent(in) :: rhm
     integer, intent(in), optional :: unit
     integer :: u
     u = ERROR_UNIT; if (present (unit)) u = unit
     write (u, "(A)") "[REQUEST_CALLBACK_MANAGER]"
     call rhm%tree%write (u)
   end subroutine request_handler_manager_write
 
+@ %def request_handler_manager_write
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: add => request_handler_manager_add
+<<Request callback: procedures>>=
   subroutine request_handler_manager_add (rhm, handler_id, handler)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer, intent(in) :: handler
     class(*), pointer :: obj
     obj => handler
     call rhm%tree%insert (handler_id, obj)
   end subroutine request_handler_manager_add
 
+@ %def request_handler_manager_add
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: clear => request_handler_manager_clear
+<<Request callback: procedures>>=
   subroutine request_handler_manager_clear (rhm)
     class(request_handler_manager_t), intent(inout) :: rhm
     call rhm%tree%clear ()
   end subroutine request_handler_manager_clear
 
+@ %def request_handler_manager_clear
+@
+<<Request handler: handler manager: TBP>>=
+  procedure, private :: fill_status => request_handler_manager_fill_status
+<<Request callback: procedures>>=
   !> Get status (in a non-destructive way) for all associated handler.
   subroutine request_handler_manager_fill_status (rhm)
     class(request_handler_manager_t), intent(inout) :: rhm
     type(binary_tree_iterator_t) :: iterator
     integer :: handler_id
     class(request_handler_t), pointer :: handler
     call iterator%init (rhm%tree)
     do while (iterator%is_iterable ())
        call iterator%next (handler_id)
        call rhm%handler_at (handler_id, handler)
        call handler%get_status ()
     end do
   end subroutine request_handler_manager_fill_status
 
+@ %def request_handler_manager_fill_status
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: test => request_handler_manager_test
+<<Request callback: procedures>>=
   logical function request_handler_manager_test (rhm, handler_id) result (flag)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer :: handler
     call rhm%handler_at (handler_id, handler)
     flag = handler%testall ()
   end function request_handler_manager_test
 
+@ %def request_handler_manager_test
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: wait => request_handler_manager_wait
+<<Request callback: procedures>>=
   subroutine request_handler_manager_wait (rhm, handler_id)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer :: handler
     call rhm%handler_at (handler_id, handler)
     call handler%waitall ()
   end subroutine request_handler_manager_wait
 
+@ %def request_handler_manager_wait
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: waitall => request_handler_manager_waitall
+<<Request callback: procedures>>=
   subroutine request_handler_manager_waitall (rhm)
     class(request_handler_manager_t), intent(inout) :: rhm
     type(binary_tree_iterator_t) :: iterator
     integer :: handler_id
     call iterator%init (rhm%tree)
     do while (iterator%is_iterable ())
        call iterator%next (handler_id)
        !! Test handler (destructive test on request handler).
        if (.not. rhm%test (handler_id)) &
             call rhm%wait (handler_id)
     end do
   end subroutine request_handler_manager_waitall
 
+@ %def request_handler_manager_waitall
+@
+<<Request handler: handler manager: TBP>>=
+  procedure, private :: handler_at => request_handler_manager_handler_at
+<<Request callback: procedures>>=
   subroutine request_handler_manager_handler_at (rhm, handler_id, handler)
     class(request_handler_manager_t), intent(in) :: rhm
     integer, intent(in) :: handler_id
     class(request_handler_t), pointer, intent(out) :: handler
     class(*), pointer :: obj
     call rhm%tree%search (handler_id, obj)
     select type (obj)
     class is (request_handler_t)
        handler => obj
     class default
        call msg_bug ("Object is not derived from request_handler_t.")
     end select
   end subroutine request_handler_manager_handler_at
 
+@ %def request_handler_manager_handler_at
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: has_handler => request_handler_manager_has_handler
+<<Request callback: procedures>>=
   function request_handler_manager_has_handler (rhm, handler_id) result (flag)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     logical :: flag
     flag = rhm%tree%has_key (handler_id)
   end function request_handler_manager_has_handler
 
+@ %def request_handler_manager_has_handler
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: callback => request_handler_manager_callback
+<<Request callback: procedures>>=
   !> Call server-sided procedure of callback with handler_id.
   !!
   !! \param[in] handler_id
   !! \param[in] source
   subroutine request_handler_manager_callback (rhm, handler_id, source_rank)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     integer, intent(in) :: source_rank
     class(request_handler_t), pointer :: handler
     if (.not. rhm%tree%has_key (handler_id)) return
     call rhm%handler_at (handler_id, handler)
     call handler%handle (source_rank = source_rank, comm = rhm%comm)
   end subroutine request_handler_manager_callback
 
+@ %def request_handler_manager_callback
+@
+<<Request handler: handler manager: TBP>>=
+  procedure :: client_callback => request_handler_manager_client_callback
+<<Request callback: procedures>>=
   !> Call client-sided procedure of callback with handler_id.
   !!
   !! \param[in] handler_id
   !! \param[in] source Destination rank.
   subroutine request_handler_manager_client_callback (rhm, handler_id, dest_rank)
     class(request_handler_manager_t), intent(inout) :: rhm
     integer, intent(in) :: handler_id
     integer, intent(in) :: dest_rank
     class(request_handler_t), pointer :: handler
     if (.not. rhm%tree%has_key (handler_id)) return
     call rhm%handler_at (handler_id, handler)
     call handler%client_handle (dest_rank = dest_rank, comm = rhm%comm)
   end subroutine request_handler_manager_client_callback
-end module request_callback
-@ %def request_callback
+
+@ %def request_handlder_manager_client_callback
+@
Index: trunk/src/vegas/Makefile.am
===================================================================
--- trunk/src/vegas/Makefile.am	(revision 8796)
+++ trunk/src/vegas/Makefile.am	(revision 8797)
@@ -1,282 +1,264 @@
 ## Makefile.am -- Makefile for WHIZARD
 ##
 ## Process this file with automake to produce Makefile.in
 #
 # Copyright (C) 1999-2022 by
 #     Wolfgang Kilian <kilian@physik.uni-siegen.de>
 #     Thorsten Ohl <ohl@physik.uni-wuerzburg.de>
 #     Juergen Reuter <juergen.reuter@desy.de>
 #     with contributions from
 #     cf. main AUTHORS file
 #
 # WHIZARD is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2, or (at your option)
 # any later version.
 #
 # WHIZARD is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 #
 ########################################################################
 
 ## The files in this directory implement the VEGAS algorithm and the
 ## multi-channel implementation with VEGAS as backbone integrator
 
 ## We create a library which is still to be combined with auxiliary libs.
 noinst_LTLIBRARIES = libvegas.la
 check_LTLIBRARIES = libvegas_ut.la
 
 COMMON_F90 =
 
 MPI_F90 = \
-	vegas.f90_mpi \
-	vamp2.f90_mpi \
-	request_base.f90_mpi \
-	request_simple.f90_mpi \
-	request_caller.f90_mpi \
-	request_state.f90_mpi \
-	balancer_base.f90_mpi \
-	balancer_simple.f90_mpi \
-	balancer_channel.f90_mpi \
-	request_callback.f90_mpi
+  vegas.f90_mpi \
+  vamp2.f90_mpi
+
+LOAD_BALANCER_MODULES = \
+  request_base.f90 \
+  request_simple.f90 \
+  request_caller.f90 \
+  request_state.f90 \
+  balancer_base.f90 \
+  balancer_simple.f90 \
+  balancer_channel.f90 \
+  request_callback.f90
 
 SERIAL_F90 = \
-	vegas.f90_serial \
-	vamp2.f90_serial
+  vegas.f90_serial \
+  vamp2.f90_serial
 
 EXTRA_DIST = \
-	$(COMMON_F90) \
-	$(SERIAL_F90) \
-	$(MPI_F90)
+  $(COMMON_F90) \
+  $(SERIAL_F90) \
+  $(MPI_F90) \
+  $(LOAD_BALANCER_MODULES)
 
 nodist_libvegas_la_SOURCES = \
-	$(COMMON_F90) \
-	vegas.f90 \
-	vamp2.f90
+  $(COMMON_F90) \
+  vegas.f90 \
+  vamp2.f90
 
 if FC_USE_MPI
 nodist_libvegas_la_SOURCES += \
-	request_base.f90 \
-	request_simple.f90 \
-	request_caller.f90 \
-	request_state.f90 \
-	balancer_base.f90 \
-	balancer_simple.f90 \
-	balancer_channel.f90 \
-	request_callback.f90
+  request_base.f90 \
+  request_simple.f90 \
+  request_caller.f90 \
+  request_state.f90 \
+  balancer_base.f90 \
+  balancer_simple.f90 \
+  balancer_channel.f90 \
+  request_callback.f90
 endif
 
 DISTCLEANFILES = \
-	vegas.f90 \
-	vamp2.f90 \
-	request_base.f90 \
-	request_simple.f90 \
-	request_caller.f90 \
-	request_state.f90 \
-	balancer_base.f90 \
-	balancer_simple.f90 \
-	balancer_channel.f90 \
-	request_callback.f90
+  vegas.f90 \
+  vamp2.f90
 
 if FC_USE_MPI
 vegas.f90: vegas.f90_mpi
 	-cp -f $< $@
 vamp2.f90: vamp2.f90_mpi
 	-cp -f $< $@
-request_base.f90: request_base.f90_mpi
-	-cp -f $< $@
-request_simple.f90: request_simple.f90_mpi
-	-cp -f $< $@
-request_caller.f90: request_caller.f90_mpi
-	-cp -f $< $@
-request_state.f90: request_state.f90_mpi
-	-cp -f $< $@
-balancer_base.f90: balancer_base.f90_mpi
-	-cp -f $< $@
-balancer_simple.f90: balancer_simple.f90_mpi
-	-cp -f $< $@
-balancer_channel.f90: balancer_channel.f90_mpi
-	-cp -f $< $@
-request_callback.f90: request_callback.f90_mpi
-	-cp -f $< $@
 else
 vegas.f90: vegas.f90_serial
 	-cp -f $< $@
 vamp2.f90: vamp2.f90_serial
 	-cp -f $< $@
 endif
 
 libvegas_ut_la_SOURCES = \
-	vegas_uti.f90 vegas_ut.f90 \
-	vamp2_uti.f90 vamp2_ut.f90
+  vegas_uti.f90 vegas_ut.f90 \
+  vamp2_uti.f90 vamp2_ut.f90
 
 ## Omitting this would exclude it from the distribution
 dist_noinst_DATA = vegas.nw
 
 # Modules and installation
 # Dump module names into file Modules
 execmoddir = $(fmoddir)/whizard
 nodist_execmod_HEADERS = \
   ${nodist_libvegas_la_SOURCES:.f90=.$(FCMOD)}
 
 libvegas_Modules = $(nodist_libvegas_la_SOURCES:.f90=) $(libvegas_ut_la_SOURCES:.f90=)
 Modules: Makefile
 	@for module in $(libvegas_Modules); do \
 		echo $$module >> $@.new; \
 	done
 	@if diff $@ $@.new -q >/dev/null; then \
 		rm $@.new; \
 	else \
 		mv $@.new $@; echo "Modules updated"; \
 	fi
 BUILT_SOURCES = Modules
 
 ## Fortran module dependencies
 # Get module lists from other directories
 module_lists = \
-	../basics/Modules \
-	../utilities/Modules \
-	../testing/Modules \
-	../system/Modules \
-	../rng/Modules \
-	../phase_space/Modules
+  ../basics/Modules \
+  ../utilities/Modules \
+  ../testing/Modules \
+  ../system/Modules \
+  ../rng/Modules \
+  ../phase_space/Modules
 
 $(module_lists):
 	$(MAKE) -C `dirname $@` Modules
 
 Module_dependencies.sed: $(nodist_libvegas_la_SOURCES) $(libvegas_ut_la_SOURCES)
 Module_dependencies.sed: $(module_lists)
 	@rm -f $@
 	echo 's/, *only:.*//' >> $@
 	echo 's/, *&//' >> $@
 	echo 's/, *.*=>.*//' >> $@
 	echo 's/$$/.lo/' >> $@
 	for list in $(module_lists); do \
 		dir="`dirname $$list`"; \
 		for mod in `cat $$list`; do \
 			echo 's!: '$$mod'.lo$$!': $$dir/$$mod'.lo!' >> $@; \
 		done ; \
 	done
 
 DISTCLEANFILES += Module_dependencies.sed
 
 
 # The following line just says
 #    include Makefile.depend
 # but in a portable fashion (depending on automake's AM_MAKE_INCLUDE
 @am__include@ @am__quote@Makefile.depend@am__quote@
 
 Makefile.depend: Module_dependencies.sed
 Makefile.depend: $(nodist_libvegas_la_SOURCES) $(libvegas_ut_la_SOURCES)
 	@rm -f $@
 	for src in $^; do \
 		module="`basename $$src | sed 's/\.f[90][0358]//'`"; \
 		grep '^ *use ' $$src \
 		| grep -v '!NODEP!' \
 		| sed -e 's/^ *use */'$$module'.lo: /' \
 				-f Module_dependencies.sed; \
 	done > $@
 
 DISTCLEANFILES += Makefile.depend
 
 ## Dependencies across directories and packages, if not automatically generated
 rng_tao.lo: ../../vamp/src/tao_random_numbers.$(FCMOD)
 
 # Fortran90 module files are generated at the same time as object files
 .lo.$(FCMOD):
 	@:
 #	touch $@
 
 AM_FCFLAGS = -I../basics -I../utilities -I../testing -I../system -I../combinatorics -I../rng -I../physics -I../fastjet -I../qft -I../matrix_elements -I../types -I../particles -I../beams -I../phase_space -I../expr_base -I../variables -I../../vamp/src -I../mci
 
 ########################################################################
 ## Default Fortran compiler options
 
 ## Profiling
 if FC_USE_PROFILING
 AM_FCFLAGS += $(FCFLAGS_PROFILING)
 endif
 
 ## OpenMP
 if FC_USE_OPENMP
 AM_FCFLAGS += $(FCFLAGS_OPENMP)
 endif
 
 # MPI
 if FC_USE_MPI
 AM_FCFLAGS += $(FCFLAGS_MPI)
 endif
 
 ########################################################################
 ## Non-standard targets and dependencies
 
 ## (Re)create F90 sources from NOWEB source.
 if NOWEB_AVAILABLE
 
 NOMPI_FILTER = -filter "sed 's/defn NOMPI:/defn/'"
 MPI_FILTER = -filter "sed 's/defn MPI:/defn/'"
 
 PRELUDE = $(top_srcdir)/src/noweb-frame/whizard-prelude.nw
 POSTLUDE = $(top_srcdir)/src/noweb-frame/whizard-postlude.nw
 
 vegas.stamp: $(PRELUDE) $(srcdir)/vegas.nw $(POSTLUDE)
 	@rm -f vegas.tmp
 	@touch vegas.tmp
 	for src in $(COMMON_F90) $(libvegas_ut_la_SOURCES); do \
 		$(NOTANGLE) -R[[$$src]] $^ | $(CPIF) $$src; \
 		done
 	for src in $(SERIAL_F90:.f90_serial=.f90); do \
 		$(NOTANGLE) -R[[$$src]] $(NOMPI_FILTER) $^ | $(CPIF) $$src'_serial'; \
 		done
 	for src in $(MPI_F90:.f90_mpi=.f90); do \
 		$(NOTANGLE) -R[[$$src]] $(MPI_FILTER) $^ | $(CPIF) $$src'_mpi'; \
 		done
+	for src in $(LOAD_BALANCER_MODULES); do \
+		$(NOTANGLE) -R[[$$src]] $(MPI_FILTER) $^ | $(CPIF) $$src; \
+		done
 	@mv -f vegas.tmp vegas.stamp
 
-$(COMMON_F90) $(SERIAL_F90) $(MPI_F90) $(libvegas_ut_la_SOURCES): vegas.stamp
+$(COMMON_F90) $(SERIAL_F90) $(MPI_F90) $(LOAD_BALANCER_MODULES) $(libvegas_ut_la_SOURCES): vegas.stamp
 ## Recover from the removal of $@
 	@if test -f $@; then :; else \
 		rm -f vegas.stamp; \
 		$(MAKE) $(AM_MAKEFLAGS) vegas.stamp; \
 	fi
 
 endif
 
 
 ########################################################################
 ## Non-standard cleanup tasks
 ## Remove sources that can be recreated using NOWEB
 if NOWEB_AVAILABLE
 maintainer-clean-noweb:
 	-rm -f *.f90 *.f90_serial *.f90_mpi *.c
 endif
 .PHONY: maintainer-clean-noweb
 
 ## Remove those sources also if builddir and srcdir are different
 if NOWEB_AVAILABLE
 clean-noweb:
 	test "$(srcdir)" != "." && rm -f *.f90 *.f90_serial *.f90_mpi *.c || true
 endif
 .PHONY: clean-noweb
 
 ## Remove F90 module files
 clean-local: clean-noweb
 	-rm -f vegas.stamp vegas.tmp
 	-rm -f *.$(FCMOD)
 if FC_SUBMODULES
-	-rm -f *.smod
+	-rm -f *.smod *.sub
 endif
 
 ## Remove backup files
 maintainer-clean-backup:
 	-rm -f *~
 .PHONY: maintainer-clean-backup
 
 ## Register additional clean targets
 maintainer-clean-local: maintainer-clean-noweb maintainer-clean-backup