#ifndef HAVE_GLOBAL_HPP
#define HAVE_GLOBAL_HPP
// Autogenerated - do not edit by hand !

#ifdef _OPENMP
#include <omp.h>
#endif
#include <algorithm>
#include <cmath>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <limits>
#include <set>
#include <sstream>
#include <valarray>
#include <vector>
#include "radix.hpp"
/** \brief Automatic differentiation library designed for TMB
    \details See `TMBad::ADFun`.
*/
namespace TMBad {

typedef unsigned int hash_t;
typedef unsigned int Index;
typedef double Scalar;
typedef std::pair<Index, Index> IndexPair;
typedef std::vector<unsigned int> IndexVector;

template <class T>
std::ostream &operator<<(std::ostream &out, const std::vector<T> &v) {
  out << "{";
  size_t last = v.size() - 1;
  for (size_t i = 0; i < v.size(); ++i) {
    out << v[i];
    if (i != last) out << ", ";
  }
  out << "}";
  return out;
}

/** \brief Union of closed intervals */
template <class T>
struct intervals {
  struct ep : std::pair<T, bool> {
    bool left() const { return !this->second; }
    ep(T x, bool type) : std::pair<T, bool>(x, type) {}
    operator T() { return this->first; }
  };
  std::set<ep> x;
  typedef typename std::set<ep>::iterator iterator;
  /** \brief Insert new interval [a,b]
      \return `false` if union did not change. `true` otherwise.
  */
  bool insert(T a, T b) {
    ep x1(a, false);
    ep x2(b, true);
    iterator it1 = x.upper_bound(x1);
    iterator it2 = x.lower_bound(x2);

    bool insert_x1 = (it1 == x.end()) || it1->left();
    bool insert_x2 = (it2 == x.end()) || it2->left();

    bool change = (it1 != it2) || insert_x1;

    if (it1 != it2) {
      x.erase(it1, it2);
    }

    if (insert_x1) x.insert(x1);
    if (insert_x2) x.insert(x2);
    return change;
  }
  /** \brief Apply a functor to each interval */
  template <class F>
  F &apply(F &f) const {
    for (iterator it = x.begin(); it != x.end();) {
      ep a = *it;
      ++it;
      ep b = *it;
      ++it;
      f(a, b);
    }
    return f;
  }
  struct print_interval {
    void operator()(T a, T b) { Rcout << "[ " << a << " , " << b << " ] "; }
  };
  void print() {
    print_interval f;
    this->apply(f);
    Rcout << "\n";
  }
};

struct Dependencies : std::vector<Index> {
  typedef std::vector<Index> Base;
  std::vector<std::pair<Index, Index> > I;
  Dependencies();
  void clear();
  void add_interval(Index a, Index b);
  void add_segment(Index start, Index size);

  void monotone_transform_inplace(const std::vector<Index> &x);

  template <class F>
  F &apply(F &f) {
    for (size_t i = 0; i < this->size(); i++) f((*this)[i]);
    for (size_t i = 0; i < I.size(); i++) {
      for (Index j = I[i].first; j <= I[i].second; j++) {
        f(j);
      }
    }
    return f;
  }

  template <class F>
  F &apply_if_not_visited(F &f, intervals<Index> &visited) {
    for (size_t i = 0; i < this->size(); i++) f((*this)[i]);
    for (size_t i = 0; i < I.size(); i++) {
      if (visited.insert(I[i].first, I[i].second)) {
        for (Index j = I[i].first; j <= I[i].second; j++) {
          f(j);
        }
      }
    }
    return f;
  }

  bool any(const std::vector<bool> &x) const;
};

/** \brief Define `segment_ref` array to access inside `ForwardArgs`
    or `ReverseArgs` */
enum ArrayAccess { x_read, y_read, y_write, dx_read, dx_write, dy_read };
template <class Args, ArrayAccess What>
struct Accessor {};
template <class Args>
struct Accessor<Args, x_read> {
  typename Args::value_type operator()(Args &args, Index j) {
    return args.x(j);
  }
};
template <class Args>
struct Accessor<Args, y_read> {
  typename Args::value_type operator()(Args &args, Index j) {
    return args.y(j);
  }
};
template <class Args>
struct Accessor<Args, y_write> {
  typename Args::value_type &operator()(Args &args, Index j) {
    return args.y(j);
  }
};
template <class Args>
struct Accessor<Args, dx_read> {
  typename Args::value_type operator()(Args &args, Index j) {
    return args.dx(j);
  }
};
template <class Args>
struct Accessor<Args, dx_write> {
  typename Args::value_type &operator()(Args &args, Index j) {
    return args.dx(j);
  }
};
template <class Args>
struct Accessor<Args, dy_read> {
  typename Args::value_type operator()(Args &args, Index j) {
    return args.dy(j);
  }
};
/** \brief Provide read/write access to an array segment

    This class gives a common interface to vectorized array access
    during forward or reverse sweeps.  The purpose is to reduce the
    amount of manually written loops *and* to provide a compile time
    check for access permissions.
*/
template <class Args, ArrayAccess What>
struct segment_ref {
  typedef typename Args::value_type Type;
  Accessor<Args, What> element_access;
  Args args;
  Index from, n;
  segment_ref(Args &args, Index from, Index n) : args(args), from(from), n(n) {}
  template <class Other>
  operator Other() {
    Other ans(n);
    for (size_t i = 0; i < n; i++) {
      ans[i] = element_access(args, from + i);
    }
    return ans;
  }
  Type operator[](Index i) { return element_access(args, from + i); }
  template <class Other>
  segment_ref &operator=(const Other &other) {
    for (size_t i = 0; i < n; i++) {
      element_access(args, from + i) = other[i];
    }
    return *this;
  }
  template <class Other>
  segment_ref &operator+=(const Other &other) {
    for (size_t i = 0; i < n; i++) {
      element_access(args, from + i) += other[i];
    }
    return *this;
  }
  template <class Other>
  segment_ref &operator-=(const Other &other) {
    for (size_t i = 0; i < n; i++) {
      element_access(args, from + i) -= other[i];
    }
    return *this;
  }
};

/** \brief Argument class to handle array access of operator inputs and outputs.
    - Values (`global::values`) and derivatives (`global::derivs`) are
      stored as two contiguous arrays of the same size.
    - Operator inputs can be scattered and thus require indirect lookup.
    - Operator outputs are always contiguous and thus do not require
      any indirect lookup.
      \warning An `Args` class instance becomes invalid if
      `global::inputs` is modified.
*/
template <class dummy = void>
struct Args {
  /** \brief Array for indirect access of operator inputs */
  const Index *inputs;
  /** \brief Input/output pointers.
      - `ptr.first` points into the `input` array.
      - `ptr.second` points into the actual value/derivative array
  */
  IndexPair ptr;
  /** \brief Get variable index of j'th input to current operator */
  Index input(Index j) const { return inputs[ptr.first + j]; }
  /** \brief Get variable index of j'th output of current operator */
  Index output(Index j) const { return ptr.second + j; }
  Args(const IndexVector &inputs) : inputs(&inputs[0]) {
    ptr.first = 0;
    ptr.second = 0;
  }
};
/** \brief Access input/output values during a forward pass. Write access
    granted for the output value only.
    \warning A `ForwardArgs` class instance becomes invalid if
    `global::inputs` or `global::values` are modified.
*/
template <class Type>
struct ForwardArgs : Args<> {
  typedef std::vector<Type> TypeVector;
  typedef Type value_type;
  Type *values;
  /** \brief j'th input variable of this operator */
  const Type &x(Index j) { return values[input(j)]; }
  /** \brief j'th output variable of this operator */
  Type &y(Index j) { return values[output(j)]; }
  /** \brief pointer version - use with caution. */
  Type *x_ptr(Index j) { return &values[input(j)]; }
  /** \brief pointer version - use with caution. */
  Type *y_ptr(Index j) { return &values[output(j)]; }
  /** \brief segment version */
  segment_ref<ForwardArgs, x_read> x_segment(Index from, Index size) {
    return segment_ref<ForwardArgs, x_read>(*this, from, size);
  }
  /** \brief segment version */
  segment_ref<ForwardArgs, y_write> y_segment(Index from, Index size) {
    return segment_ref<ForwardArgs, y_write>(*this, from, size);
  }
  ForwardArgs(const IndexVector &inputs, TypeVector &values)
      : Args<>(inputs), values(&values[0]) {}
};
/** \brief Access input/output values and derivatives during a reverse
    pass. Write access granted for the input derivative only.
    \warning A `ReverseArgs` class instance becomes invalid if
    `global::inputs`, `global::values` or `global::derivs` are
    modified.
*/
template <class Type>
struct ReverseArgs : Args<> {
  typedef std::vector<Type> TypeVector;
  typedef Type value_type;
  Type *values;
  Type *derivs;
  /** \brief j'th input variable of this operator */
  Type x(Index j) { return values[input(j)]; }
  /** \brief j'th output variable of this operator */
  Type y(Index j) { return values[output(j)]; }
  /** \brief Partial derivative of end result wrt. j'th input variable of
      this operator */
  Type &dx(Index j) { return derivs[input(j)]; }
  /** \brief Partial derivative of end result wrt. j'th output variable of
      this operator */
  Type dy(Index j) { return derivs[output(j)]; }
  /** \brief pointer version - use with caution. */
  Type *x_ptr(Index j) { return &values[input(j)]; }
  /** \brief pointer version - use with caution. */
  Type *y_ptr(Index j) { return &values[output(j)]; }
  /** \brief pointer version - use with caution. */
  Type *dx_ptr(Index j) { return &derivs[input(j)]; }
  /** \brief pointer version - use with caution. */
  Type *dy_ptr(Index j) { return &derivs[output(j)]; }
  /** \brief segment version */
  segment_ref<ReverseArgs, x_read> x_segment(Index from, Index size) {
    return segment_ref<ReverseArgs, x_read>(*this, from, size);
  }
  /** \brief segment version */
  segment_ref<ReverseArgs, y_read> y_segment(Index from, Index size) {
    return segment_ref<ReverseArgs, y_read>(*this, from, size);
  }
  /** \brief segment version */
  segment_ref<ReverseArgs, dx_write> dx_segment(Index from, Index size) {
    return segment_ref<ReverseArgs, dx_write>(*this, from, size);
  }
  /** \brief segment version */
  segment_ref<ReverseArgs, dy_read> dy_segment(Index from, Index size) {
    return segment_ref<ReverseArgs, dy_read>(*this, from, size);
  }
  ReverseArgs(const IndexVector &inputs, TypeVector &values, TypeVector &derivs)
      : Args<>(inputs), values(&values[0]), derivs(&derivs[0]) {
    ptr.first = (Index)inputs.size();
    ptr.second = (Index)values.size();
  }
};

template <>
struct ForwardArgs<bool> : Args<> {
  typedef std::vector<bool> BoolVector;
  BoolVector &values;
  intervals<Index> &marked_intervals;
  bool x(Index j) { return values[input(j)]; }
  BoolVector::reference y(Index j) { return values[output(j)]; }
  ForwardArgs(const IndexVector &inputs, BoolVector &values,
              intervals<Index> &marked_intervals)
      : Args<>(inputs), values(values), marked_intervals(marked_intervals) {}
  /** \brief Helper */
  template <class Operator>
  bool any_marked_input(const Operator &op) {
    if (Operator::implicit_dependencies) {
      Dependencies dep;
      op.dependencies(*this, dep);
      return dep.any(values);
    } else {
      Index ninput = op.input_size();
      for (Index j = 0; j < ninput; j++)
        if (x(j)) return true;
    }
    return false;
  }
  /** \brief Helper */
  template <class Operator>
  void mark_all_output(const Operator &op) {
    Index noutput = op.output_size();
    for (Index j = 0; j < noutput; j++) y(j) = true;
  }
  /** \brief Dense sparsity pattern */
  template <class Operator>
  bool mark_dense(const Operator &op) {
    if (any_marked_input(op)) {
      mark_all_output(op);
      return true;
    }
    return false;
  }
};

template <>
struct ReverseArgs<bool> : Args<> {
  typedef std::vector<bool> BoolVector;
  BoolVector &values;
  intervals<Index> &marked_intervals;
  BoolVector::reference x(Index j) { return values[input(j)]; }
  bool y(Index j) { return values[output(j)]; }
  ReverseArgs(IndexVector &inputs, BoolVector &values,
              intervals<Index> &marked_intervals)
      : Args<>(inputs), values(values), marked_intervals(marked_intervals) {
    ptr.first = (Index)inputs.size();
    ptr.second = (Index)values.size();
  }
  /** \brief Helper */
  template <class Operator>
  bool any_marked_output(const Operator &op) {
    Index noutput = op.output_size();
    for (Index j = 0; j < noutput; j++)
      if (y(j)) return true;
    return false;
  }
  /** \brief Helper */
  template <class Operator>
  void mark_all_input(const Operator &op) {
    if (Operator::implicit_dependencies) {
      Dependencies dep;
      op.dependencies(*this, dep);

      for (size_t i = 0; i < dep.size(); i++) values[dep[i]] = true;

      for (size_t i = 0; i < dep.I.size(); i++) {
        Index a = dep.I[i].first;
        Index b = dep.I[i].second;
        bool insert = marked_intervals.insert(a, b);
        if (insert) {
          for (Index j = a; j <= b; j++) {
            values[j] = true;
          }
        }
      }
    } else {
      Index ninput = op.input_size();
      for (Index j = 0; j < ninput; j++) x(j) = true;
    }
  }
  /** \brief Dense sparsity pattern */
  template <class Operator>
  bool mark_dense(const Operator &op) {
    if (any_marked_output(op)) {
      mark_all_input(op);
      return true;
    }
    return false;
  }
};

std::string tostr(const Index &x);

std::string tostr(const Scalar &x);

struct Writer : std::string {
  static std::ostream *cout;
  Writer(std::string str);
  Writer(Scalar x);
  Writer();

  template <class V>
  std::string vinit(const V &x) {
    std::string y = "{";
    for (size_t i = 0; i < x.size(); i++)
      y = y + (i == 0 ? "" : ",") + tostr(x[i]);
    y = y + "}";
    return y;
  }

  std::string p(std::string x);
  Writer operator+(const Writer &other);
  Writer operator-(const Writer &other);
  Writer operator-();
  Writer operator*(const Writer &other);
  Writer operator/(const Writer &other);

  Writer operator*(const Scalar &other);
  Writer operator+(const Scalar &other);

  void operator=(const Writer &other);
  void operator+=(const Writer &other);
  void operator-=(const Writer &other);
  void operator*=(const Writer &other);
  void operator/=(const Writer &other);

  template <class T>
  friend Writer &operator<<(Writer &w, const T &v) {
    *cout << v;
    return w;
  }
  template <class T>
  friend Writer &operator<<(Writer &w, const std::valarray<T> &x) {
    *cout << w.vinit(x);
    return w;
  }
};

template <>
struct ForwardArgs<Writer> : ForwardArgs<Scalar> {
  typedef std::vector<Scalar> ScalarVector;
  typedef ForwardArgs<Scalar> Base;
  /** \brief Write constants in the source code? */
  bool const_literals;
  /** \brief Enable indirect local addressing mode within loop? */
  bool indirect;
  void set_indirect() {
    indirect = true;
    ptr.first = 0;
    ptr.second = 0;
  }
  Writer xd(Index j) { return "v[" + tostr(input(j)) + "]"; }
  Writer yd(Index j) { return "v[" + tostr(output(j)) + "]"; }
  Writer xi(Index j) { return "v[i[" + tostr(Index(ptr.first + j)) + "]]"; }
  Writer yi(Index j) { return "v[o[" + tostr(Index(ptr.second + j)) + "]]"; }
  Writer x(Index j) { return (indirect ? xi(j) : xd(j)); }
  Writer y(Index j) { return (indirect ? yi(j) : yd(j)); }
  Writer y_const(Index j) {
    TMBAD_ASSERT2(!indirect, "Attempt to write constants within loop?");
    return tostr(Base::y(j));
  }
  ForwardArgs(IndexVector &inputs, ScalarVector &values)
      : ForwardArgs<Scalar>(inputs, values) {
    const_literals = false;
    indirect = false;
  }
};

template <>
struct ReverseArgs<Writer> : Args<> {
  typedef std::vector<Scalar> ScalarVector;
  /** \brief Write constants in the source code? */
  bool const_literals;
  /** \brief Enable indirect local addressing mode within loop? */
  bool indirect;
  void set_indirect() {
    indirect = true;
    ptr.first = 0;
    ptr.second = 0;
  }
  Writer dxd(Index j) { return "d[" + tostr(input(j)) + "]"; }
  Writer dyd(Index j) { return "d[" + tostr(output(j)) + "]"; }
  Writer xd(Index j) { return "v[" + tostr(input(j)) + "]"; }
  Writer yd(Index j) { return "v[" + tostr(output(j)) + "]"; }
  Writer dxi(Index j) { return "d[i[" + tostr(Index(ptr.first + j)) + "]]"; }
  Writer dyi(Index j) { return "d[o[" + tostr(Index(ptr.second + j)) + "]]"; }
  Writer xi(Index j) { return "v[i[" + tostr(Index(ptr.first + j)) + "]]"; }
  Writer yi(Index j) { return "v[o[" + tostr(Index(ptr.second + j)) + "]]"; }
  Writer x(Index j) { return (indirect ? xi(j) : xd(j)); }
  Writer y(Index j) { return (indirect ? yi(j) : yd(j)); }
  Writer dx(Index j) { return (indirect ? dxi(j) : dxd(j)); }
  Writer dy(Index j) { return (indirect ? dyi(j) : dyd(j)); }

  ReverseArgs(IndexVector &inputs, ScalarVector &values) : Args<>(inputs) {
    const_literals = false;
    indirect = false;
    ptr.first = (Index)inputs.size();
    ptr.second = (Index)values.size();
  }
};

struct Position {
  Position(Index node, Index first, Index second);
  Position();
  Index node;
  IndexPair ptr;
  bool operator<(const Position &other) const;
};

/** \brief Utility: sort inplace */
template <class T>
void sort_inplace(std::vector<T> &x) {
  std::sort(x.begin(), x.end());
}

/** \brief Utility: sort unique inplace */
template <class T>
void sort_unique_inplace(std::vector<T> &x) {
  std::sort(x.begin(), x.end());
  typename std::vector<T>::iterator last = std::unique(x.begin(), x.end());
  x.erase(last, x.end());
}

/** \brief Operator graph in compressed row storage */
struct graph {
  std::vector<Index> j;
  std::vector<Index> p;
  graph();
  size_t num_neighbors(Index node);
  Index *neighbors(Index node);
  bool empty();
  size_t num_nodes();
  void print();
  /** \brief Private workspace used by `graph::search`. Must either be
      empty or filled with false when not in use. */
  std::vector<bool> mark;
  /** \brief Used to lookup operator (node) of an independent variable */
  std::vector<Index> inv2op;
  /** \brief Used to lookup operator (node) of a dependent variable */
  std::vector<Index> dep2op;
  /** \brief Number of column indices by row, i.e. number of *outgoing* edges */
  std::vector<Index> rowcounts();
  /** \brief Number of row indices by column, i.e. number of *ingoing* edges */
  std::vector<Index> colcounts();
  /** \brief Perform a breadth-first search
      \param start Nodes that initiate the search
      \param visited On input contains boolean mask of already visited
      nodes which should be skipped from the search. On output the
      result of the search has been marked as visited.
      \param result The result of the search is appended to this vector.
      \note `start` and `result` are allowed to be references to the same
     object.
  */
  void bfs(const std::vector<Index> &start, std::vector<bool> &visited,
           std::vector<Index> &result);
  /** \brief Find sub graph
      \param start On input contains the nodes from which to perform
      the search. On output contains the sorted result of the search.
      \param sort_input  Remove duplicates before searching? Can be set to false
     if `start` is unique. \param sort_output Sort the result of the search?
     Output must be sorted in order to work as a valid subgraph (!) \warning
     `start` really is *nodes* and **not** *variables*.
      - To initiate a search from an independent variable `i` use `inv2op[i]` to
     lookup the corresponding node !
      - To initiate a search from a dependent variable `i` use `dep2op[i]` to
     lookup the corresponding node !
  */
  void search(std::vector<Index> &start, bool sort_input = true,
              bool sort_output = true);
  /** \copydoc search
      \param visited On input contains boolean mask of already visited
      nodes which should be skipped from the search. On output the
      result of the search has been marked as visited.
      \note To perform the search along a *restricted graph* one can
      pass the complement of the restriction as 'already visited'.
  */
  void search(std::vector<Index> &start, std::vector<bool> &visited,
              bool sort_input = true, bool sort_output = true);
  /** \brief Find boundary of subgraph
      \param subgraph Nodes of the subgraph
      \details Returns the set of nodes *outside the subgraph* which
      are connected to some node in the subgraph.
  */
  std::vector<Index> boundary(const std::vector<Index> &subgraph);
  /** \brief Construct a graph
      \param num_nodes Number of nodes
      \param edges Graph edges represented as node pairs
  */
  graph(size_t num_nodes, const std::vector<IndexPair> &edges);
};
struct global;
global *get_glob();

namespace {
template <class CompleteOperator, bool dynamic>
struct constructOperator {};
template <class CompleteOperator>
struct constructOperator<CompleteOperator, false> {
  CompleteOperator *operator()() {
    static CompleteOperator *pOp = new CompleteOperator();
    return pOp;
  }
};
template <class CompleteOperator>
struct constructOperator<CompleteOperator, true> {
  CompleteOperator *operator()() {
    CompleteOperator *pOp = new CompleteOperator();
    return pOp;
  }

  template <class T1>
  CompleteOperator *operator()(const T1 &x1) {
    CompleteOperator *pOp = new CompleteOperator(x1);
    return pOp;
  }

  template <class T1, class T2>
  CompleteOperator *operator()(const T1 &x1, const T2 &x2) {
    CompleteOperator *pOp = new CompleteOperator(x1, x2);
    return pOp;
  }

  template <class T1, class T2, class T3>
  CompleteOperator *operator()(const T1 &x1, const T2 &x2, const T3 &x3) {
    CompleteOperator *pOp = new CompleteOperator(x1, x2, x3);
    return pOp;
  }

  template <class T1, class T2, class T3, class T4>
  CompleteOperator *operator()(const T1 &x1, const T2 &x2, const T3 &x3,
                               const T4 &x4) {
    CompleteOperator *pOp = new CompleteOperator(x1, x2, x3, x4);
    return pOp;
  }
};
}  // namespace

struct global {
  struct ad_plain;
  struct ad_aug;
  typedef ad_aug Replay;
  struct ad_range;
  struct print_config;
  /** \brief The abstract operator for the operation stack `global::opstack`
      - The methods in this class must be implemented for all operators.
     However, in practice most members can be autogenerated by the `Complete`
     class. \note Virtual member functions are associated with a significant
      overhead (vtable lookup). For performance reasons, it is
      therefore important to squeeze as much work as possible into
      these virtual methods.
  */
  struct OperatorPure {
    /** \brief Increment input/output pointers to prepare for the next
        OperatorPure in the stack */
    virtual void increment(IndexPair &ptr) = 0;
    /** \brief Decrement input/output pointers to prepare for the
        previous OperatorPure in the stack */
    virtual void decrement(IndexPair &ptr) = 0;
    /** \brief Update output values of this OperatorPure */
    virtual void forward(ForwardArgs<Scalar> &args) = 0;
    /** \brief Update input derivs of this OperatorPure */
    virtual void reverse(ReverseArgs<Scalar> &args) = 0;
    /** \brief Fast equivalent of combined `forward()` *and* `increment()` */
    virtual void forward_incr(ForwardArgs<Scalar> &args) = 0;
    /** \brief Fast equivalent of combined `decrement()` *and* `reverse()` */
    virtual void reverse_decr(ReverseArgs<Scalar> &args) = 0;
    /** \brief Number of inputs to this OperatorPure */
    virtual Index input_size() = 0;
    /** \brief Number of outputs from this OperatorPure */
    virtual Index output_size() = 0;
    /** \brief Mark forward dependencies \details Calculate \f$y=Jx\f$
        where \f$J\f$ denotes the Jacobian sparsity pattern of the
        operator. Operators are by default assumed to be dense.
    */
    virtual void forward(ForwardArgs<bool> &args) = 0;
    /** \brief Mark reverse dependencies \details Calculate \f$x=J^Ty\f$
        where \f$J\f$ denotes the Jacobian sparsity pattern of the
        operator. Operators are by default assumed to be dense.
    */
    virtual void reverse(ReverseArgs<bool> &args) = 0;
    /** \copydoc forward_incr */
    virtual void forward_incr(ForwardArgs<bool> &args) = 0;
    /** \copydoc reverse_decr */
    virtual void reverse_decr(ReverseArgs<bool> &args) = 0;
    /** \brief Conditionally mark *all* outputs */
    virtual void forward_incr_mark_dense(ForwardArgs<bool> &args) = 0;
    /** \brief Get the indices of variables required by this operator.
        \details Result of this operation (the indices) are *appended*
        to the `dep` argument. Its main purpose is to build the
        computational graph. Under normal circumstances the
        information can be autogenerated from the operator
        inputs. However there is a subtle exception: Operators are
        allowed to take reference inputs. For instance a matrix multiply
        only requires addresses of the first element of the two input
        matrices. Such cases must have special code to provide **all**
        input addresses.
        \note This information is somewhat overlapping with that of
        `reverse(ReverseArgs<Replay>& args)`.
    */
    virtual void dependencies(Args<> &args, Dependencies &dep) = 0;
    /** \brief Replay operation sequence. \copydoc forward */
    virtual void forward(ForwardArgs<Replay> &args) = 0;
    /** \brief Replay operation sequence. \copydoc reverse */
    virtual void reverse(ReverseArgs<Replay> &args) = 0;
    /** \brief Replay operation sequence. \copydoc forward_incr */
    virtual void forward_incr(ForwardArgs<Replay> &args) = 0;
    /** \brief Replay operation sequence. \copydoc reverse_decr */
    virtual void reverse_decr(ReverseArgs<Replay> &args) = 0;
    /** \brief Source code writer. \copydoc forward */
    virtual void forward(ForwardArgs<Writer> &args) = 0;
    /** \brief Source code writer. \copydoc reverse */
    virtual void reverse(ReverseArgs<Writer> &args) = 0;
    /** \brief Source code writer. \copydoc forward_incr */
    virtual void forward_incr(ForwardArgs<Writer> &args) = 0;
    /** \brief Source code writer. \copydoc reverse_decr */
    virtual void reverse_decr(ReverseArgs<Writer> &args) = 0;
    /** \brief Name of this OperatorPure */
    virtual const char *op_name() { return "NoName"; }
    /** \brief Lookup table for operator fusion. Merge this
        OperatorPure with an identical copy. If no match return
        NULL. */
    virtual OperatorPure *self_fuse() = 0;
    /** \brief Lookup table for operator fusion. Merge this
        OperatorPure with another operator. If no match return
        NULL. */
    virtual OperatorPure *other_fuse(OperatorPure *other) = 0;
    /** \brief Return a copy of this OperatorPure. */
    virtual OperatorPure *copy() = 0;
    /** \brief Deallocate this OperatorPure. */
    virtual void deallocate() = 0;
    /** \brief Operator info that is constant for this operator */
    struct op_info {
      bool dynamic;
      bool smart_pointer;
      bool dense;
      bool is_linear;
      bool independent_variable;
      bool dependent_variable;
      bool allow_remap;
    };
    /** \brief Get operator info. */
    virtual op_info info() = 0;
    /** \brief Optional operator_data */
    virtual void *operator_data() = 0;
    /** \brief Operator identifier \details If two operators have
        equal identifier it can be assumed that they represent equal
        mappings, i.e. same input implies same output.
    */
    virtual void *identifier() = 0;
    /** \brief Optional print method */
    virtual void print(print_config cfg) = 0;
    /** \brief Get pointer to operator before it was
        completed. \warning Avoid unless strictly necessary */
    virtual void *incomplete() = 0;
    virtual ~OperatorPure() {}
  };

  /** \brief Operation stack
      \details Essentially just a vector of operations with some
      memory management on top.
      \warning Use inherited members with caution. E.g. `resize(0)`
      wouldn't do the expected. Use `clear()` instead.
  */
  struct operation_stack : std::vector<OperatorPure *> {
    bool any_dynamic;
    operation_stack();
    operation_stack(const operation_stack &x);
    template <bool dynamic>
    void push_back(OperatorPure *x) {
      std::vector<OperatorPure *>::push_back(x);
      if (dynamic) any_dynamic = true;
    }
    void push_back(OperatorPure *x, bool dynamic);
    operation_stack &operator=(const operation_stack &x);
    ~operation_stack();
    void clear();
    void copy_from(const operation_stack &x);
  };

  /** \brief Operation stack */
  operation_stack opstack;
  /** \brief Contiguous workspace for taped variables (same length as
   * `global::derivs`) */
  std::vector<Scalar> values;
  /** \brief Contiguous workspace for derivatives (same length as
   * `global::values`) */
  std::vector<Scalar> derivs;
  /** \brief Pointers into `global::values` determining operator inputs */
  IndexVector inputs;
  /** \brief Pointers into `global::values` determining **independent**
   * variables */
  std::vector<Index> inv_index;
  /** \brief Pointers into `global::values` determining **dependent** variables
   */
  std::vector<Index> dep_index;

  mutable std::vector<IndexPair> subgraph_ptr;
  std::vector<Index> subgraph_seq;
  /** \brief Optional pointer to compiled code. \warning Experimental */
  void (*forward_compiled)(Scalar *);
  /** \brief Optional pointer to compiled code. \warning Experimental */
  void (*reverse_compiled)(Scalar *, Scalar *);

  global();
  /** \brief Clear all workspace without actually freeing the workspace
      \note Intended use: **retaping** without re-allocating */
  void clear();

  /** \brief Release unnecessary workspace to the system \details In
      contrast to `clear` this method does not destroy the object. Its
      intended use is to be called before and/or after a memory
      intensive routine, e.g.  `global::optimize` or
      `global::build_graph`.
      The function attempts to shrink the workspace of the most critical
     containers:
      - `derivs` is freed completely.
      - `subgraph_ptr` is freed completely.
      - `values` is reduced if the potential relative reduction is
     'significant'.
      - `inputs` is reduced if the potential relative reduction is
     'significant'. \param tol Number between zero and one. A reduction is
      considered 'significant' if it is at least `1-tol`.
  */
  void shrink_to_fit(double tol = .9);

  /** \brief Set derivatives to zero
      \param start Specify 'tail sweep' starting from this position
  */
  void clear_deriv(Position start = Position(0, 0, 0));

  /** \brief Reference to i'th input value (parameter) */
  Scalar &value_inv(Index i);
  /** \brief Reference to i'th component of the gradient */
  Scalar &deriv_inv(Index i);
  /** \brief Reference to i'th component of the function value */
  Scalar &value_dep(Index i);
  /** \brief Reference to i'th 'range direction' used to seed the derivative  */
  Scalar &deriv_dep(Index i);
  /** \brief The three pointers defining the begining of the tape */
  Position begin();
  /** \brief The three pointers defining the end of the tape */
  Position end();

  /** \brief Substitute of std::vector<bool> with all elements `true` */
  struct no_filter {
    constexpr bool operator[](size_t i) const;
  };
  /** \brief Generic forward sweep
      \param begin Skip operators up to this point.
      \param node_filter Skip operators not in this mask.
      \note Don't forget to set `args.ptr` when using `begin`.
  */
  template <class ForwardArgs, class NodeFilter>
  void forward_loop(ForwardArgs &args, size_t begin,
                    const NodeFilter &node_filter) const {
    for (size_t i = begin; i < opstack.size(); i++) {
      if (node_filter[i])
        opstack[i]->forward_incr(args);
      else
        opstack[i]->increment(args.ptr);
    }
  }
  /** \copydoc forward_loop */
  template <class ForwardArgs>
  void forward_loop(ForwardArgs &args, size_t begin = 0) const {
    forward_loop(args, begin, no_filter());
  }
  /** \brief Generic reverse sweep
      \param node_filter Skip operators not in this mask.
      \param begin Skip operators up to this point.
  */
  template <class ReverseArgs, class NodeFilter>
  void reverse_loop(ReverseArgs &args, size_t begin,
                    const NodeFilter &node_filter) const {
    for (size_t i = opstack.size(); i > begin;) {
      i--;
      if (node_filter[i])
        opstack[i]->reverse_decr(args);
      else
        opstack[i]->decrement(args.ptr);
    }
  }
  /** \copydoc reverse_loop */
  template <class ReverseArgs>
  void reverse_loop(ReverseArgs &args, size_t begin = 0) const {
    reverse_loop(args, begin, no_filter());
  }
  /** \brief Generic forward sweep along global::subgraph_seq */
  template <class ForwardArgs>
  void forward_loop_subgraph(ForwardArgs &args) const {
    subgraph_cache_ptr();
    for (size_t j = 0; j < subgraph_seq.size(); j++) {
      Index i = subgraph_seq[j];
      args.ptr = subgraph_ptr[i];
      opstack[i]->forward(args);
    }
  }
  /** \brief Generic reverse sweep along global::subgraph_seq */
  template <class ReverseArgs>
  void reverse_loop_subgraph(ReverseArgs &args) const {
    subgraph_cache_ptr();
    for (size_t j = subgraph_seq.size(); j > 0;) {
      j--;
      Index i = subgraph_seq[j];
      args.ptr = subgraph_ptr[i];
      opstack[i]->reverse(args);
    }
  }
  /** \brief Generic clear array along global::subgraph
      \details Applicable for
      - std::vector<Scalar>
      - std::vector<Replay>
      - std::vector<bool>
      \note This function calls `subgraph_cache_ptr()`. It follows that
      complexity is proportional to the full graph for the first
      call. Following calls have complexity proportional to the
      subgraph.
  */
  template <class Vector>
  void clear_array_subgraph(Vector &array,
                            typename Vector::value_type value =
                                typename Vector::value_type(0)) const {
    if (array.size() != values.size()) {
      array.resize(values.size());
      std::fill(array.begin(), array.end(), value);
      return;
    }
    subgraph_cache_ptr();
    for (size_t j = 0; j < subgraph_seq.size(); j++) {
      Index i = subgraph_seq[j];
      size_t noutput = opstack[i]->output_size();
      for (size_t k = 0; k < noutput; k++)
        array[subgraph_ptr[i].second + k] = value;
    }
  }

  /** \brief Full or partial forward sweep through the operation stack. Updates
     `global::values`. \param start Specify 'tail sweep' starting from this
     position
  */
  void forward(Position start = Position(0, 0, 0));
  /** \brief Full or partial reverse sweep through the operation stack. Updates
     `global::derivs`. \param start Specify 'tail sweep' starting from this
     position \note A reverse tail sweep through nodes `opstack[start:end]` will
     in general affect the derivative array outside the desired range, i.e. the
     derivative array should be considered as invalidated for indices smaller
     than `start.ptr.second`.
  */
  void reverse(Position start = Position(0, 0, 0));
  /** \brief Forward sweep along a subgraph. */
  void forward_sub();
  /** \brief Reverse sweep along a subgraph. */
  void reverse_sub();

  /** \brief Full forward dependency sweep through the operation stack. */
  void forward(std::vector<bool> &marks);
  /** \brief Full reverse dependency sweep through the operation stack. */
  void reverse(std::vector<bool> &marks);
  /** \brief Forward sweep along a subgraph.
      Subgraph can be specified through a sequence `subgraph_seq`
      (default) or through a boolean `node_filter`.
  */
  void forward_sub(std::vector<bool> &marks,
                   const std::vector<bool> &node_filter = std::vector<bool>());
  /** \brief Reverse sweep along a subgraph.
      Subgraph can be specified through a sequence `subgraph_seq`
      (default) or through a boolean `node_filter`.
  */
  void reverse_sub(std::vector<bool> &marks,
                   const std::vector<bool> &node_filter = std::vector<bool>());
  /** \brief Full forward dependency sweep through the operation stack.
      \details For each operator mark *all* outputs if *any* inputs
      are marked.  The intended use of this function is to determine a
      valid (complete topoligically sorted) subgraph.
      \note A reverse version of this function seems less useful so we
      do not yet provide it.
      \warning FIXME Indirect dependencies are not yet accounted for.
 */
  void forward_dense(std::vector<bool> &marks);

  struct replay {
    /** \brief Mimic `orig` operation sequence value array `global::values` */
    std::vector<Replay> values;
    /** \brief Mimic `orig` operation sequence derivative array `global::derivs`
     */
    std::vector<Replay> derivs;
    /** \brief Operation sequence to replay **from** */
    const global &orig;
    /** \brief Operation sequence to replay **to** */
    global &target;
    /** \brief Active operation sequence before this replay was initiated */
    global *parent_glob;
    /** \copydoc global::value_inv */
    Replay &value_inv(Index i);
    /** \copydoc global::deriv_inv */
    Replay &deriv_inv(Index i);
    /** \copydoc global::value_dep */
    Replay &value_dep(Index i);
    /** \copydoc global::deriv_dep */
    Replay &deriv_dep(Index i);
    /** \brief Initialize pointers `orig` and `target`
        \details This is a cheap `O(1)` operation
    */
    replay(const global &orig, global &target);
    /** \brief Initiate this replay
        \details
        - Set `this->target` as the current active operation sequence **and**
        - Populate `this->values` with `Replay` copies of all `orig.values`.
        Complexity is `O(n)`.
        \note `target` is still empty after this operation if `ad_aug` is used
       as replay type (the default).
    */
    void start();
    /** \brief Stop this replay
        \details
        Restore the previous active operation sequence
    */
    void stop();
    /** \brief Initialize derivative array used by this replay */
    void clear_deriv();
    /** \brief Forward replay of `replay::orig` operation sequence.
        Updates `replay::values` array **and** `replay::target` operation
       sequence. \param inv_tags Write an independent variable tag for each
       function argument ? \param dep_tags Write a dependent variable tag for
       each function value ? \param node_filter Operators to keep in replay
    */
    void forward(bool inv_tags = true, bool dep_tags = true,
                 Position start = Position(0, 0, 0),
                 const std::vector<bool> &node_filter = std::vector<bool>());
    /** \brief Reverse replay of `replay::orig` operation sequence.
        Updates `replay::derivs` array **and** `replay::target` operation
       sequence. \param dep_tags Write a dependent variable tag for each
       function derivative ? \param inv_tags Write an independent variable tag
       for each derivative weight ? \param node_filter Operators to keep in
       replay
    */
    void reverse(bool dep_tags = true, bool inv_tags = false,
                 Position start = Position(0, 0, 0),
                 const std::vector<bool> &node_filter = std::vector<bool>());
    /** \brief Replay forward sweep along a subgraph. */
    void forward_sub();
    /** \brief Replay reverse sweep along a subgraph. */
    void reverse_sub();
    /** \copydoc global::clear_deriv_sub */
    void clear_deriv_sub();
  };

  /** \brief Replay this operation sequence to itself
      \param inv_tags Keep independent variable tags?
      \param dep_tags Keep dependent variable tags?
  */
  void forward_replay(bool inv_tags = true, bool dep_tags = true);

  /** \brief Cache array pointers required by all subgraph routines
      \note Requires a sweep through the full computational graph. In
      addition a workspace of `sizeof(IndexPair) * opstack.size()` is
      allocated.
  */
  void subgraph_cache_ptr() const;
  /** \brief Convert selected variables to a subgraph sequence
      \details For each variable check if the variable is marked. If
      yes add the (unique) operator, that generated the variable as
      its result, to the subgraph sequence. Although an operator can
      generate multiple variables it is guarantied that an operator is
      only added once.
  */
  void set_subgraph(const std::vector<bool> &marks, bool append = false);
  /** \brief Mark all outputs of a subgraph */
  void mark_subgraph(std::vector<bool> &marks);
  /** \brief Unmark all outputs of a subgraph */
  void unmark_subgraph(std::vector<bool> &marks);
  /** \brief Create a full subgraph (for testing only) */
  void subgraph_trivial();
  /** \brief Clear derivative array along a subgraph.
      \details Clear all *relevant* input variable derivatives by
      noting that each such *input* variable must also be an *output*
      of some operator node.
  */
  void clear_deriv_sub();
  /** \brief Extract a subgraph as a new global object. Fast when
      called many times.

      \details Sub-graph can either be *sorted* (common case) or just
      *topologically sorted* (e.g. for graph permutations). In both
      cases the subgraph may or may not include all independent/dependent
      variables.

      It's important to note that this routine permutes `inv_index`
      and `dep_index` to reflect the original parameter order (which
      is only relevant in the un-sorted case). FIXME: Correct the
      code!

      \param var_remap Workspace containing garbage on output. Can be
      used to avoid re-allocating.
      \param new_glob Target of extraction. Empty by default.
      \note Complexity proportional to subgraph **if workspace is
      allocated**. Otherwise proportional to full graph.
      \note `global::subgraph_seq` must be calculated in advance.
      \warning FIXME: Must copy the dynamic operator table by default
      At least two applications of this function:
      - Tape optimizer. Here we want to delete the old glob after extraction.
      - Fast quadrature. Here we want to keep the old glob.
      \note A non-empty extraction target `new_glob` may be useful for
      function slicing.
      \note Special attention must be payed to variables that affect
      the subgraph trajectory without being part of the
      trajectory. These *boundary* variables can be remapped (using
      `var_remap`) prior to calling this function. Note that by
      default boundary variables are remapped to an arbitrary variable
      (the first variable `values[0]`).
  */
  global extract_sub(std::vector<Index> &var_remap, global new_glob = global());
  /** \brief In-place subgraph extractor \details Slower, but more
      memory efficient, than `extract_sub`. Subset is by boolean
      marked **variables** rather than actual subgraph nodes.
  */
  void extract_sub_inplace(std::vector<bool> marks);
  /** \brief Extract a subgraph as a new global object. Slow when called many
     times. \note `global::subgraph_seq` must be calculated in advance.
  */
  global extract_sub();

  /** \brief Build variable -> operator node lookup table using a
      single forward pass. The resulting sequence is monotonically
      increasing.
      \details For each variable (i.e. pointer into global::values)
      find operator (i.e. pointer into global::opstack) that generated
      the variable as its results.
      \note Size of this table is `values.size()`.
  */
  std::vector<Index> var2op();
  /** \brief Select operators from a subset of variables
      An operator is selected if it generates any of the selected variables.
      This is the fast equivalent of
      `ans[ var2op() [ which(values) ] ] <- true`
  */
  std::vector<bool> var2op(const std::vector<bool> &values);
  /** \brief Get variables produces by a node seqence */
  std::vector<Index> op2var(const std::vector<Index> &seq);
  /** \brief Get variables produced by a node seqence */
  std::vector<bool> op2var(const std::vector<bool> &seq_mark);
  /** \brief General operator -> variable table generator.
      \details For each operator match its output variable in a given
      subset of variables. If a match exists return the index of the
      match, otherwise return a missing value code. If several matches
      exist, return the index corresponding to the *first match* from
      the left.
      \note Size of this table is `opstack.size()`.
  */
  std::vector<Index> op2idx(const std::vector<Index> &var_subset,
                            Index NA = (Index)-1);
  /** \brief General boolean table generator */
  std::vector<bool> mark_space(size_t n, const std::vector<Index> ind);
  /** \brief Boolean representation of independent variable positions */
  std::vector<bool> inv_marks();
  /** \brief Boolean representation of dependent variable positions */
  std::vector<bool> dep_marks();
  /** \brief Boolean representation of sub graph positions */
  std::vector<bool> subgraph_marks();

  struct append_edges {
    size_t &i;
    const std::vector<bool> &keep_var;
    std::vector<Index> &var2op;
    std::vector<IndexPair> &edges;

    std::vector<bool> op_marks;
    size_t pos;
    append_edges(size_t &i, size_t num_nodes, const std::vector<bool> &keep_var,
                 std::vector<Index> &var2op, std::vector<IndexPair> &edges);
    void operator()(Index dep_j);

    void start_iteration();

    void end_iteration();
  };
  /** \brief Build a graph-representation of the operation stack
      \details The graph is defined as follows:
      - Nodes are operators i.e., the number of nodes is `opstack.size()`
      - A connection from Op1 to Op2 is present <==> there exists a *variable*
     which is input to Op2 and output from Op1 \param transpose Flag to reverse
     all edges. \param keep_var Boolean mask of *variables* to be considered as
     potential edges.
  */
  graph build_graph(bool transpose, const std::vector<bool> &keep_var);
  /** \brief Construct operator graph with forward connections
      \details See `build_graph()`
  */
  graph forward_graph(std::vector<bool> keep_var = std::vector<bool>(0));
  /** \brief Construct operator graph with reverse connections
      \details See `build_graph()`
  */
  graph reverse_graph(std::vector<bool> keep_var = std::vector<bool>(0));

  /** \brief Test if two tapes are identical
      \details We say that two tapes are identical if the same input
      is guarantied to result in the same output.
  */
  bool identical(const global &other) const;

  /** \brief Simple hash code of scalar */
  template <class T>
  void hash(hash_t &h, T x) const {
    static const size_t n =
        (sizeof(T) / sizeof(hash_t)) + (sizeof(T) % sizeof(hash_t) != 0);
    hash_t buffer[n];
    std::fill(buffer, buffer + n, 0);
    for (size_t i = 0; i < sizeof(x); i++)
      ((char *)buffer)[i] = ((char *)&x)[i];
    hash_t A = 54059;
    hash_t B = 76963;
    for (size_t i = 0; i < n; i++) h = (A * h) ^ (B * buffer[i]);
  }

  /** \brief Simple hash code of tape
      \details The hash function has the following properties:
      - If x and y have different hash codes then x and y are **not**
     `identical()`.
      - If x and y have equal hash codes then x and y are **likely**
        `identical()`. This situation must always be followed up by a
        collision test.
  */
  hash_t hash() const;

  /** \brief Configuration of hash_sweep */
  struct hash_config {
    /** \brief Use unique code for each independent variable? (see `hash_sweep`)
     */
    bool strong_inv;
    /** \brief Include numerical value as part of hash code for constants? (see
     * `hash_sweep`) */
    bool strong_const;
    /** \brief Use unique hash code for each output of an operator? */
    bool strong_output;
    /** \brief Reduce returned hash values to one per dependent variable? */
    bool reduce;
    /** \brief Deterministic hash codes? */
    bool deterministic;
    /** \brief Optionally control seeding of InvOp in case `strong_inv=true` */
    std::vector<Index> inv_seed;
  };

  /** \brief Calculate hash codes of each dependent variable using a
      single forward sweep
      \details The hash function has the following properties:
      - If x and y have different hash codes then x and y are **not**
     `identical()`.
      - If x and y have equal hash codes then x and y are **likely**
        `identical()`. This situation must always be followed up by a
        collision test.

        In the **weak** case the hash codes attempt to discriminate
        between *mappings*. In the **strong** case the hash codes
        discriminate between sub-expressions.
        For example consider the computational graph of

        (x1+x2) * (x3+x4)

        In the weak case (x1+x2) and (x3+x4) are considered identical
        because the mappings f(x1,x2)=x1+x2 and f(x3,x4)=x3+x4 are
        identical.
        If, in addition, the mappings are evaluated at the exact same
        arguments then they are identical in the strong sense (equal
        sub-expressions).

        The hash function works as follows. Starting out with a scalar
        hash update function `hash(x,h)` we generalize to vectors
        using the recursion

        `hash(x_1,...,x_n) := hash(x_1, hash(x_2,...,x_n) )`

        Hash codes of the operation

        `(y_1,...,y_m) = f(x_1,...,x_n)`

        are defined as

        `hash(y_k) = hash(f, x_1, ..., x_n) + k - 1`

        assuming `f` is represented by its `OperatorPure::identifier()`.

        Note, that dynamic operators by default have a unique
        identifier (their memory address). It follows that the hash
        code of output variables are (*very* likely) unique as well.

        For the reason above, it may seem tempting to skip input
        hashing completely in the dynamic operator case. However, that
        won't work because we allow dynamic operators to have a static
        identifier, see `Operator::add_static_identifier`.

        \param weak If `true` calculate weak hash codes. Otherwise strong.

        \return In the weak case a vector of same length as the number
        of dependent variables. In the strong case return the entire
        vector of variable hash codes (same length as `values`).
  */
  std::vector<hash_t> hash_sweep(hash_config cfg) const;
  /** \brief Handle two common cases */
  std::vector<hash_t> hash_sweep(bool weak = true) const;

  /** \brief Very simple tape optimizer
      \details The tape optimizer attempts to remove variables unless they are
      - Direct input variables
      - Direct output variables
      - Variables that affect the output variables.
      \warning Sparse operators can have input variables that are
      marked for removal while at the same time having input variables
      that must be kept. In this case the redundant input variables
      are remapped to an arbitrary variable (number zero - see
      `global::extract_sub`). Formally this is correct but in practice
      it could result in nan function evaluations (Example: Vectorized
      log function with negative values of `x[0]`).
  */
  void eliminate();

  /** \brief Configuration of print method */
  struct print_config {
    std::string prefix, mark;
    int depth;
    print_config();
  };
  /** \brief Print workspace */
  void print(print_config cfg);
  /** \brief Print workspace with default configuration (works from debugger) */
  void print();

  /** \brief Operator with input/output dimension known at compile time */
  template <int ninput_, int noutput_ = 1>
  struct Operator {
    /** \brief Does this operator require dynamic allocation ? */
    static const bool dynamic = false;
    /** \brief Number of operator inputs */
    static const int ninput = ninput_;
    /** \brief Number of operator outputs */
    static const int noutput = noutput_;
    /** \brief Is output of this operator an independent variable ? */
    static const int independent_variable = false;
    /** \brief Is output of this operator a dependent variable ? */
    static const int dependent_variable = false;
    /** \brief Have `input_size` and `output_size` members been defined ? */
    static const bool have_input_size_output_size = false;
    /** \brief Have `increment` and `decrement` members been defined ? */
    static const bool have_increment_decrement = false;
    /** \brief Have `forward` and `reverse` members been defined ? */
    static const bool have_forward_reverse = true;
    /** \brief Have `forward_incr` and `reverse_incr` members been defined ? */
    static const bool have_forward_incr_reverse_decr = false;
    /** \brief Have `forward_mark` and `reverse_mark` members been defined ? */
    static const bool have_forward_mark_reverse_mark = false;
    /** \brief Have `dependencies` member been defined ? */
    static const bool have_dependencies = false;
    /** \brief Is it safe to remap the inputs of this operator?
        \details In general it is illegal to remap operators that
        assume a certain memory layout of its inputs, e.g. BLAS
        routines. Make sure to set to `false` for those cases.
    */
    static const bool allow_remap = true;
    /**
       \brief Does this operator have implicit dependencies?
       \details Signify that this operator uses indirect inputs
       - **and** that all these inputs are available through the
       `dependencies()` function (in particular `have_dependencies` must be true
       in this case)
       - **and** that these indirect dependencies should be used to auto
       implement the marker routines `forward_mark_reverse_mark`. \note FIXME: I
       think `allow_remap` can eventually be deprecated?
    */
    static const bool implicit_dependencies = false;
    /** \brief Should this operator have a static identifier ? */
    static const bool add_static_identifier = false;
    /** \brief Should this operator replay it self by invoking the copy CTOR ?
     */
    static const bool add_forward_replay_copy = false;
    /** \brief Does this class have an `eval` member from which to define
     * forward ? */
    static const bool have_eval = false;
    /** \brief How many times can this operator be doubled ? */
    static const int max_fuse_depth = 2;
    /** \brief Is this a linear operator ? */
    static const bool is_linear = false;
    /** \brief Is this operator a 'smart pointer' (with reference counting) ? */
    static const bool smart_pointer = false;
    /** \brief How to fuse this operator (self) with another (other) */
    OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
      return NULL;
    }
    /** \brief Return operator specific dynamic information (optional) */
    void *operator_data() { return NULL; }
    /** \brief Print this operator (optional) */
    void print(print_config cfg) {}
  };
  /** \brief Operator that requires dynamic allocation. Compile time
      known input/output size */
  template <int ninput, int noutput>
  struct DynamicOperator : Operator<ninput, noutput> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
  };
  /** \brief Operator that requires dynamic allocation. Compile time
      known input size */
  template <int ninput>
  struct DynamicOutputOperator : Operator<ninput, -1> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
    Index noutput;
  };
  template <int noutput = 1>
  struct DynamicInputOperator : Operator<-1, noutput> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
    Index ninput;
  };
  struct DynamicInputOutputOperator : Operator<-1, -1> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
    Index ninput_, noutput_;
    DynamicInputOutputOperator(Index ninput, Index noutput);
    Index input_size() const;
    Index output_size() const;
    static const bool have_input_size_output_size = true;
  };
  struct UniqueDynamicOperator : Operator<-1, -1> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
    /** \brief This is an `Operator::smart_pointer` */
    static const bool smart_pointer = false;
    /** \brief Must implement `Operator::input_size` and `Operator::output_size`
     */
    static const bool have_input_size_output_size = true;
  };
  struct SharedDynamicOperator : UniqueDynamicOperator {
    /** \brief This is an `Operator::smart_pointer` */
    static const bool smart_pointer = true;
  };

  /** \brief Add default implementation of mandatory members:
      `input_size` ans `output_size` */
  template <class OperatorBase>
  struct AddInputSizeOutputSize : OperatorBase {
    AddInputSizeOutputSize() {}
    template <class T1>
    AddInputSizeOutputSize(const T1 &x1) : OperatorBase(x1) {}
    template <class T1, class T2>
    AddInputSizeOutputSize(const T1 &x1, const T2 &x2) : OperatorBase(x1, x2) {}
    template <class T1, class T2, class T3>
    AddInputSizeOutputSize(const T1 &x1, const T2 &x2, const T3 &x3)
        : OperatorBase(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    AddInputSizeOutputSize(const T1 &x1, const T2 &x2, const T3 &x3,
                           const T4 &x4)
        : OperatorBase(x1, x2, x3, x4) {}
    Index input_size() const { return this->ninput; }
    Index output_size() const { return this->noutput; }
    static const bool have_input_size_output_size = true;
  };

  /** \brief Add default implementation of mandatory members:
      `increment` and `decrement` */
  template <class OperatorBase>
  struct AddIncrementDecrement : OperatorBase {
    AddIncrementDecrement() {}
    template <class T1>
    AddIncrementDecrement(const T1 &x1) : OperatorBase(x1) {}
    template <class T1, class T2>
    AddIncrementDecrement(const T1 &x1, const T2 &x2) : OperatorBase(x1, x2) {}
    template <class T1, class T2, class T3>
    AddIncrementDecrement(const T1 &x1, const T2 &x2, const T3 &x3)
        : OperatorBase(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    AddIncrementDecrement(const T1 &x1, const T2 &x2, const T3 &x3,
                          const T4 &x4)
        : OperatorBase(x1, x2, x3, x4) {}
    void increment(IndexPair &ptr) {
      ptr.first += this->input_size();
      ptr.second += this->output_size();
    }
    void decrement(IndexPair &ptr) {
      ptr.first -= this->input_size();
      ptr.second -= this->output_size();
    }
    static const bool have_increment_decrement = true;
  };

  /** \brief Add default implementation of mandatory members:
      `forward` and `reverse` **from** `forward_incr` and
      `reverse_decr` */
  template <class OperatorBase>
  struct AddForwardReverse : OperatorBase {
    AddForwardReverse() {}
    template <class T1>
    AddForwardReverse(const T1 &x1) : OperatorBase(x1) {}
    template <class T1, class T2>
    AddForwardReverse(const T1 &x1, const T2 &x2) : OperatorBase(x1, x2) {}
    template <class T1, class T2, class T3>
    AddForwardReverse(const T1 &x1, const T2 &x2, const T3 &x3)
        : OperatorBase(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    AddForwardReverse(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4)
        : OperatorBase(x1, x2, x3, x4) {}

    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      ForwardArgs<Type> args_cpy(args);
      OperatorBase::forward_incr(args_cpy);
    }
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      ReverseArgs<Type> args_cpy(args);
      OperatorBase::increment(args_cpy.ptr);
      OperatorBase::reverse_decr(args_cpy);
    }
    static const bool have_forward_reverse = true;
  };

  /** \brief Add default implementation of mandatory members:
      `forward_incr` and `reverse_decr` **from** `forward` and
      `reverse` */
  template <class OperatorBase>
  struct AddForwardIncrReverseDecr : OperatorBase {
    AddForwardIncrReverseDecr() {}
    template <class T1>
    AddForwardIncrReverseDecr(const T1 &x1) : OperatorBase(x1) {}
    template <class T1, class T2>
    AddForwardIncrReverseDecr(const T1 &x1, const T2 &x2)
        : OperatorBase(x1, x2) {}
    template <class T1, class T2, class T3>
    AddForwardIncrReverseDecr(const T1 &x1, const T2 &x2, const T3 &x3)
        : OperatorBase(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    AddForwardIncrReverseDecr(const T1 &x1, const T2 &x2, const T3 &x3,
                              const T4 &x4)
        : OperatorBase(x1, x2, x3, x4) {}

    template <class Type>
    void forward_incr(ForwardArgs<Type> &args) {
      OperatorBase::forward(args);
      OperatorBase::increment(args.ptr);
    }

    template <class Type>
    void reverse_decr(ReverseArgs<Type> &args) {
      OperatorBase::decrement(args.ptr);
      OperatorBase::reverse(args);
    }
    static const bool have_forward_incr_reverse_decr = true;
  };

  /** \brief Add default implementation of mandatory members:
      `forward_mark` and `reverse_mark` */
  template <class OperatorBase>
  struct AddForwardMarkReverseMark : OperatorBase {
    AddForwardMarkReverseMark() {}
    template <class T1>
    AddForwardMarkReverseMark(const T1 &x1) : OperatorBase(x1) {}
    template <class T1, class T2>
    AddForwardMarkReverseMark(const T1 &x1, const T2 &x2)
        : OperatorBase(x1, x2) {}
    template <class T1, class T2, class T3>
    AddForwardMarkReverseMark(const T1 &x1, const T2 &x2, const T3 &x3)
        : OperatorBase(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    AddForwardMarkReverseMark(const T1 &x1, const T2 &x2, const T3 &x3,
                              const T4 &x4)
        : OperatorBase(x1, x2, x3, x4) {}

    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      OperatorBase::forward(args);
    }
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      OperatorBase::reverse(args);
    }

    void forward(ForwardArgs<bool> &args) { args.mark_dense(*this); }
    void reverse(ReverseArgs<bool> &args) { args.mark_dense(*this); }
    static const bool have_forward_mark_reverse_mark = true;
  };

  /** \brief Add default implementation of mandatory member:
      `dependencies` */
  template <class OperatorBase>
  struct AddDependencies : OperatorBase {
    AddDependencies() {}
    template <class T1>
    AddDependencies(const T1 &x1) : OperatorBase(x1) {}
    template <class T1, class T2>
    AddDependencies(const T1 &x1, const T2 &x2) : OperatorBase(x1, x2) {}
    template <class T1, class T2, class T3>
    AddDependencies(const T1 &x1, const T2 &x2, const T3 &x3)
        : OperatorBase(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    AddDependencies(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4)
        : OperatorBase(x1, x2, x3, x4) {}
    void dependencies(Args<> &args, Dependencies &dep) const {
      Index ninput_ = this->input_size();
      for (Index j = 0; j < ninput_; j++) dep.push_back(args.input(j));
    }
    static const bool have_dependencies = true;
  };

  /** \brief Add default implementation of mandatory member:
      `forward` **from** optional member `eval` */
  template <class OperatorBase, int ninput>
  struct AddForwardFromEval : OperatorBase {};
  /** \brief Unary case */
  template <class OperatorBase>
  struct AddForwardFromEval<OperatorBase, 1> : OperatorBase {
    AddForwardFromEval() {}
    template <class T1>
    AddForwardFromEval(const T1 &x1) : OperatorBase(x1) {}
    template <class T1, class T2>
    AddForwardFromEval(const T1 &x1, const T2 &x2) : OperatorBase(x1, x2) {}
    template <class T1, class T2, class T3>
    AddForwardFromEval(const T1 &x1, const T2 &x2, const T3 &x3)
        : OperatorBase(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    AddForwardFromEval(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4)
        : OperatorBase(x1, x2, x3, x4) {}
    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      args.y(0) = this->eval(args.x(0));
    }
  };
  /** \brief Binary case */
  template <class OperatorBase>
  struct AddForwardFromEval<OperatorBase, 2> : OperatorBase {
    AddForwardFromEval() {}
    template <class T1>
    AddForwardFromEval(const T1 &x1) : OperatorBase(x1) {}
    template <class T1, class T2>
    AddForwardFromEval(const T1 &x1, const T2 &x2) : OperatorBase(x1, x2) {}
    template <class T1, class T2, class T3>
    AddForwardFromEval(const T1 &x1, const T2 &x2, const T3 &x3)
        : OperatorBase(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    AddForwardFromEval(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4)
        : OperatorBase(x1, x2, x3, x4) {}
    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      args.y(0) = this->eval(args.x(0), args.x(1));
    }
  };

  /** \brief Reference counting */
  template <bool flag, class dummy>
  struct ReferenceCounter {
    void increment() {}
    void decrement() {}
    size_t operator()() const { return 0; }
  };
  template <class dummy>
  struct ReferenceCounter<true, dummy> {
    size_t counter;
    ReferenceCounter() : counter(0) {}
    void increment() { counter++; }
    void decrement() { counter--; }
    size_t operator()() const { return counter; }
  };

  /** \brief Utility for member completion */
  template <bool flag, class Yes, class No>
  struct if_else {};
  template <class Yes, class No>
  struct if_else<true, Yes, No> {
    typedef Yes type;
  };
  template <class Yes, class No>
  struct if_else<false, Yes, No> {
    typedef No type;
  };

  /** \brief Generate all mandatory members */
  template <class OperatorBase>
  struct CPL {
    static const bool test1 = !OperatorBase::have_eval;
    /** \copydoc AddForwardFromEval */
    typedef typename if_else<
        test1, OperatorBase,
        AddForwardFromEval<OperatorBase, OperatorBase::ninput> >::type Result1;

    static const bool test2 = Result1::have_input_size_output_size;
    /** \copydoc AddInputSizeOutputSize */
    typedef
        typename if_else<test2, Result1, AddInputSizeOutputSize<Result1> >::type
            Result2;

    static const bool test3 = !Result2::have_dependencies;
    /** \copydoc AddDependencies */
    typedef typename if_else<test3, AddDependencies<Result2>, Result2>::type
        Result3;

    static const bool test4 = Result3::have_increment_decrement;
    /** \copydoc AddIncrementDecrement */
    typedef
        typename if_else<test4, Result3, AddIncrementDecrement<Result3> >::type
            Result4;

    static const bool test5 = Result4::have_forward_mark_reverse_mark;
    /** \copydoc AddForwardMarkReverseMark */
    typedef typename if_else<test5, Result4,
                             AddForwardMarkReverseMark<Result4> >::type Result5;

    static const bool test6 = Result5::have_forward_reverse &&
                              !Result5::have_forward_incr_reverse_decr;
    /** \copydoc AddForwardIncrReverseDecr */
    typedef typename if_else<test6, AddForwardIncrReverseDecr<Result5>,
                             Result5>::type Result6;

    static const bool test7 = Result6::have_forward_incr_reverse_decr &&
                              !Result6::have_forward_reverse;
    /** \copydoc AddForwardReverse */
    typedef typename if_else<test7, AddForwardReverse<Result6>, Result6>::type
        Result7;

    typedef Result7 type;
  };

  /** \brief Fuse two operators */
  template <class Operator1, class Operator2>
  struct Fused : Operator<Operator1::ninput + Operator2::ninput,
                          Operator1::noutput + Operator2::noutput> {
    typename CPL<Operator1>::type Op1;
    typename CPL<Operator2>::type Op2;
    /** \copydoc Operator::independent_variable */
    static const int independent_variable =
        Operator1::independent_variable && Operator2::independent_variable;
    /** \copydoc Operator::dependent_variable */
    static const int dependent_variable =
        Operator1::dependent_variable && Operator2::dependent_variable;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth =
        (Operator1::max_fuse_depth < Operator2::max_fuse_depth
             ? Operator1::max_fuse_depth - 1
             : Operator2::max_fuse_depth - 1);
    /** \copydoc Operator::is_linear */
    static const bool is_linear = Operator1::is_linear && Operator2::is_linear;
    template <class Type>
    void forward_incr(ForwardArgs<Type> &args) {
      Op1.forward_incr(args);
      Op2.forward_incr(args);
    }
    template <class Type>
    void reverse_decr(ReverseArgs<Type> &args) {
      Op2.reverse_decr(args);
      Op1.reverse_decr(args);
    }
    /** \copydoc Operator::have_forward_incr_reverse_decr */
    static const bool have_forward_incr_reverse_decr = true;
    /** \copydoc Operator::have_forward_reverse */
    static const bool have_forward_reverse = false;
    const char *op_name() { return "Fused"; }
  };
  /** \brief Replicate an operator
      \details If an operator occurs many times in a row the
      information can be compressed. Knowing the operator type and the
      number of occurences is sufficient.
      \note When replacing a batch of binary operators by `Rep` the
      memory *reduction* is assymptotically `1/3` (ratio will in
      general depend on the configuration typedefs).
  */
  template <class Operator1>
  struct Rep : DynamicOperator<-1, -1> {
    typename CPL<Operator1>::type Op;
    /** \copydoc Operator::independent_variable */
    static const int independent_variable = Operator1::independent_variable;
    /** \copydoc Operator::dependent_variable */
    static const int dependent_variable = Operator1::dependent_variable;
    /** \copydoc Operator::is_linear */
    static const bool is_linear = Operator1::is_linear;
    Index n;
    Rep(Index n) : n(n) {}
    Index input_size() const { return Operator1::ninput * n; }
    Index output_size() const { return Operator1::noutput * n; }
    /** \brief \copydoc Operator::have_input_size_output_size */
    static const bool have_input_size_output_size = true;
    template <class Type>
    void forward_incr(ForwardArgs<Type> &args) {
      for (size_t i = 0; i < (size_t)n; i++) Op.forward_incr(args);
    }
    template <class Type>
    void reverse_decr(ReverseArgs<Type> &args) {
      for (size_t i = 0; i < (size_t)n; i++) Op.reverse_decr(args);
    }
    /** \brief \copydoc Operator::have_forward_incr_reverse_decr */
    static const bool have_forward_incr_reverse_decr = true;
    /** \brief \copydoc Operator::have_forward_reverse */
    static const bool have_forward_reverse = false;
    /** \brief Attempt to apply input compression to this Rep operator.
        \details Try to convert this operator to a
        `global::RepCompress` operator. If conversion fails return
        `NULL`.
    */
    OperatorPure *compress() {
      TMBAD_ASSERT(false);
      std::vector<Index> &inputs = get_glob()->inputs;
      size_t k = Op.input_size();
      size_t start = inputs.size() - k * n;
      std::valarray<Index> increment(k);
      if (k > 0) {
        for (size_t i = 0; i < (size_t)n - 1; i++) {
          std::valarray<Index> v1(&inputs[start + i * k], k);
          std::valarray<Index> v2(&inputs[start + (i + 1) * k], k);
          if (i == 0) {
            increment = v2 - v1;
          } else {
            bool ok = (increment == (v2 - v1)).min();
            if (!ok) return NULL;
          }
        }
      }

      size_t reduction = (n - 1) * k;
      inputs.resize(inputs.size() - reduction);
      return get_glob()->getOperator<RepCompress<Operator1> >(n, increment);
    }
    OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
      OperatorPure *op1 = get_glob()->getOperator<Operator1>();
      if (op1 == other) {
        this->n++;
        return self;
      }
      return NULL;
    }
    const char *op_name() { return "Rep"; }
  };
  /** \brief Replicate an operator and apply input compression
      \details This operator is like `global::Rep` except that it also
      represents input compression. Input compression means that the
      input indices of any given operator can be obtained by adding a
      constant vector to the inputs of the previous operator.
      \note When replacing a batch of binary operators by
      `RepCompress` the memory *reduction* is assymptotically `2/3`
      (ratio will in general depend on the configuration typedefs).
      \warning TO BE DEPRECATED. HAS IMPLICIT DEPENDENCIES.
  */
  template <class Operator1>
  struct RepCompress : DynamicOperator<-1, -1> {
    /** \copydoc Operator::independent_variable */
    static const int independent_variable = Operator1::independent_variable;
    /** \copydoc Operator::dependent_variable */
    static const int dependent_variable = Operator1::dependent_variable;
    /** \copydoc Operator::is_linear */
    static const bool is_linear = Operator1::is_linear;
    typename CPL<Operator1>::type Op;
    Index n;

    std::valarray<Index> increment_pattern;
    RepCompress(Index n, std::valarray<Index> v) : n(n), increment_pattern(v) {}
    Index input_size() const { return Operator1::ninput; }
    Index output_size() const { return Operator1::noutput * n; }
    /** \brief \copydoc Operator::have_input_size_output_size */
    static const bool have_input_size_output_size = true;
    /** \brief Forward method applicable for Scalar and bool case */
    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      std::valarray<Index> inputs(input_size());
      for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
      ForwardArgs<Type> args_cpy = args;
      args_cpy.inputs = &inputs[0];
      args_cpy.ptr.first = 0;
      for (size_t i = 0; i < (size_t)n; i++) {
        Op.forward(args_cpy);
        inputs += this->increment_pattern;
        args_cpy.ptr.second += Op.output_size();
      }
    }
    /** \brief Reverse method applicable for Scalar and bool case */
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      std::valarray<Index> inputs(input_size());
      for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
      inputs += n * this->increment_pattern;
      ReverseArgs<Type> args_cpy = args;
      args_cpy.inputs = &inputs[0];
      args_cpy.ptr.first = 0;
      args_cpy.ptr.second += n * Op.output_size();
      for (size_t i = 0; i < (size_t)n; i++) {
        inputs -= this->increment_pattern;
        args_cpy.ptr.second -= Op.output_size();
        Op.reverse(args_cpy);
      }
    }
    /** \brief Calculate all inputs */
    void dependencies(Args<> &args, Dependencies &dep) const {
      std::valarray<Index> inputs(input_size());
      for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
      for (size_t i = 0; i < (size_t)n; i++) {
        dep.insert(dep.end(), &inputs[0], &inputs[0] + inputs.size());
        inputs += this->increment_pattern;
      }
    }
    static const bool have_dependencies = true;
    void forward(ForwardArgs<Writer> &args) {
      std::valarray<Index> inputs(Op.input_size());
      for (size_t i = 0; i < (size_t)Op.input_size(); i++)
        inputs[i] = args.input(i);
      std::valarray<Index> outputs(Op.output_size());
      for (size_t i = 0; i < (size_t)Op.output_size(); i++)
        outputs[i] = args.output(i);
      Writer w;
      int ninp = Op.input_size();
      int nout = Op.output_size();

      w << "for (int count = 0, "
        << "i[" << ninp << "]=" << inputs << ", "
        << "di[" << ninp << "]=" << increment_pattern << ", "
        << "o[" << nout << "]=" << outputs << "; "
        << "count < " << n << "; count++) {\n";

      w << "    ";
      ForwardArgs<Writer> args_cpy = args;
      args_cpy.set_indirect();
      Op.forward(args_cpy);
      w << "\n";

      w << "    ";
      w << "for (int k=0; k<" << ninp << "; k++) i[k] += di[k];\n";
      w << "    ";
      w << "for (int k=0; k<" << nout << "; k++) o[k] += " << nout << ";\n";

      w << "  ";
      w << "}";
    }
    void reverse(ReverseArgs<Writer> &args) {
      std::valarray<Index> inputs(Op.input_size());
      for (size_t i = 0; i < (size_t)Op.input_size(); i++)
        inputs[i] = args.input(i);
      inputs += n * increment_pattern;
      std::valarray<Index> outputs(Op.output_size());
      for (size_t i = 0; i < (size_t)Op.output_size(); i++)
        outputs[i] = args.output(i);
      outputs += n * Op.output_size();
      Writer w;
      int ninp = Op.input_size();
      int nout = Op.output_size();

      w << "for (int count = 0, "
        << "i[" << ninp << "]=" << inputs << ", "
        << "di[" << ninp << "]=" << increment_pattern << ", "
        << "o[" << nout << "]=" << outputs << "; "
        << "count < " << n << "; count++) {\n";

      w << "    ";
      w << "for (int k=0; k<" << ninp << "; k++) i[k] -= di[k];\n";
      w << "    ";
      w << "for (int k=0; k<" << nout << "; k++) o[k] -= " << nout << ";\n";

      w << "    ";
      ReverseArgs<Writer> args_cpy = args;
      args_cpy.set_indirect();
      Op.reverse(args_cpy);
      w << "\n";

      w << "  ";
      w << "}";
    }
    /** \brief \copydoc Operator::have_forward_incr_reverse_decr */
    static const bool have_forward_incr_reverse_decr = false;
    /** \brief \copydoc Operator::have_forward_reverse */
    static const bool have_forward_reverse = true;
    /** \brief \copydoc Operator::have_forward_mark_reverse_mark */
    static const bool have_forward_mark_reverse_mark = true;
    const char *op_name() { return "CRep"; }

    struct operator_data_t {
      OperatorPure *Op;
      Index n;
      std::valarray<Index> ip;
      operator_data_t(const RepCompress &x)
          : Op(get_glob()->getOperator<Operator1>()),
            n(x.n),
            ip(x.increment_pattern) {}
      ~operator_data_t() { Op->deallocate(); }
      bool operator==(const operator_data_t &other) {
        return (Op == other.Op) && (ip.size() == other.ip.size()) &&
               ((ip - other.ip).min() == 0);
      }
    };
    void *operator_data() { return new operator_data_t(*this); }
    OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
      if (this->op_name() == other->op_name()) {
        operator_data_t *p1 =
            static_cast<operator_data_t *>(self->operator_data());
        operator_data_t *p2 =
            static_cast<operator_data_t *>(other->operator_data());
        bool match = (*p1 == *p2);
        int other_n = p2->n;
        delete p1;
        delete p2;
        if (match) {
          std::vector<Index> &inputs = get_glob()->inputs;
          size_t reduction = increment_pattern.size();
          inputs.resize(inputs.size() - reduction);
          this->n += other_n;
          other->deallocate();
          return self;
        }
      }
      return NULL;
    }
  };

  /** \brief Operator auto-completion.

      Generate the mandatory members required by `OperatorPure` based on a given
     `Operator`.
  */
  template <class OperatorBase>
  struct Complete : OperatorPure {
    typename CPL<OperatorBase>::type Op;
    Complete() {}
    template <class T1>
    Complete(const T1 &x1) : Op(x1) {}
    template <class T1, class T2>
    Complete(const T1 &x1, const T2 &x2) : Op(x1, x2) {}
    template <class T1, class T2, class T3>
    Complete(const T1 &x1, const T2 &x2, const T3 &x3) : Op(x1, x2, x3) {}
    template <class T1, class T2, class T3, class T4>
    Complete(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4)
        : Op(x1, x2, x3, x4) {}
    ~Complete() {}
    void forward(ForwardArgs<Scalar> &args) { Op.forward(args); }
    void reverse(ReverseArgs<Scalar> &args) { Op.reverse(args); }
    void forward_incr(ForwardArgs<Scalar> &args) { Op.forward_incr(args); }
    void reverse_decr(ReverseArgs<Scalar> &args) { Op.reverse_decr(args); }

    void forward(ForwardArgs<Replay> &args) {
      if (Op.add_forward_replay_copy)
        forward_replay_copy(args);
      else
        Op.forward(args);
    }
    void reverse(ReverseArgs<Replay> &args) { Op.reverse(args); }
    void forward_incr(ForwardArgs<Replay> &args) {
      if (Op.add_forward_replay_copy) {
        forward_replay_copy(args);
        increment(args.ptr);
      } else
        Op.forward_incr(args);
    }
    void reverse_decr(ReverseArgs<Replay> &args) { Op.reverse_decr(args); }

    void forward(ForwardArgs<bool> &args) { Op.forward(args); }
    void reverse(ReverseArgs<bool> &args) { Op.reverse(args); }
    void forward_incr(ForwardArgs<bool> &args) { Op.forward_incr(args); }
    void reverse_decr(ReverseArgs<bool> &args) { Op.reverse_decr(args); }
    void forward_incr_mark_dense(ForwardArgs<bool> &args) {
      args.mark_dense(Op);
      Op.increment(args.ptr);
    };

    void forward(ForwardArgs<Writer> &args) { Op.forward(args); }
    void reverse(ReverseArgs<Writer> &args) { Op.reverse(args); }
    void forward_incr(ForwardArgs<Writer> &args) { Op.forward_incr(args); }
    void reverse_decr(ReverseArgs<Writer> &args) { Op.reverse_decr(args); }
    /** \brief Move a stack allocated instance to the heap and let the
       `operation_stack` manage the memory \details This is only useful for
       dynamic operators and therfore disallowed for static operators.
    */
    std::vector<ad_plain> operator()(const std::vector<ad_plain> &x) {
      TMBAD_ASSERT2(OperatorBase::dynamic,
                    "Stack to heap copy only allowed for dynamic operators");
      Complete *pOp = new Complete(*this);
      TMBAD_ASSERT2(pOp->ref_count() == 0, "Operator already on the heap");
      pOp->ref_count.increment();
      return get_glob()->add_to_stack<OperatorBase>(pOp, x);
    }
    template <class T>
    std::vector<T> operator()(const std::vector<T> &x) {
      std::vector<ad_plain> x_(x.begin(), x.end());
      std::vector<ad_plain> y_ = (*this)(x_);
      std::vector<T> y(y_.begin(), y_.end());
      return y;
    }
    void forward_replay_copy(ForwardArgs<Replay> &args) {
      std::vector<ad_plain> x(Op.input_size());
      for (size_t i = 0; i < x.size(); i++) x[i] = args.x(i);
      std::vector<ad_plain> y =
          get_glob()->add_to_stack<OperatorBase>(this->copy(), x);
      for (size_t i = 0; i < y.size(); i++) args.y(i) = y[i];
    }
    void dependencies(Args<> &args, Dependencies &dep) {
      Op.dependencies(args, dep);
    }
    void increment(IndexPair &ptr) { Op.increment(ptr); }
    void decrement(IndexPair &ptr) { Op.decrement(ptr); }
    Index input_size() { return Op.input_size(); }
    Index output_size() { return Op.output_size(); }
    const char *op_name() { return Op.op_name(); }
    void print(print_config cfg) { Op.print(cfg); }

    template <class Operator_, int depth>
    struct SelfFuse {
      typedef Rep<Operator_> type;
      OperatorPure *operator()() {
        return get_glob()->template getOperator<type>(2);
      }
    };
    template <class Operator_>
    struct SelfFuse<Operator_, 0> {
      OperatorPure *operator()() { return NULL; }
    };
    OperatorPure *self_fuse() {
      return SelfFuse<OperatorBase, OperatorBase::max_fuse_depth>()();
    }
    OperatorPure *other_fuse(OperatorPure *other) {
      return Op.other_fuse(this, other);
    }
    ReferenceCounter<OperatorBase::smart_pointer, void> ref_count;
    OperatorPure *copy() {
      if (Op.smart_pointer) {
        ref_count.increment();
        return this;
      } else if (Op.dynamic)
        return new Complete(*this);
      else
        return this;
    }
    void deallocate() {
      if (!Op.dynamic) return;
      if (Op.smart_pointer) {
        if (ref_count() > 1) {
          ref_count.decrement();
          return;
        }
      }
      delete this;
    }
    OperatorPure::op_info info() {
      OperatorPure::op_info info;
      info.dynamic = Op.dynamic;
      info.smart_pointer = Op.smart_pointer;
      info.independent_variable = Op.independent_variable;
      info.dependent_variable = Op.dependent_variable;
      info.allow_remap = Op.allow_remap;
      info.is_linear = Op.is_linear;

      return info;
    }
    void *identifier() {
      if (Op.add_static_identifier) {
        static void *id = new char();
        return id;
      } else
        return (void *)this;
    }
    void *operator_data() { return Op.operator_data(); }
    void *incomplete() { return &Op; }
  };

  template <class OperatorBase>
  Complete<OperatorBase> *getOperator() const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()();
  }
  template <class OperatorBase, class T1>
  Complete<OperatorBase> *getOperator(const T1 &x1) const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
        x1);
  }
  template <class OperatorBase, class T1, class T2>
  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2) const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
        x1, x2);
  }
  template <class OperatorBase, class T1, class T2, class T3>
  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2,
                                      const T3 &x3) const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
        x1, x2, x3);
  }
  template <class OperatorBase, class T1, class T2, class T3, class T4>
  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2, const T3 &x3,
                                      const T4 &x4) const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
        x1, x2, x3, x4);
  }
  struct InvOp : Operator<0> {
    static const int independent_variable = true;
    template <class Type>
    void forward(ForwardArgs<Type> &args) {}
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {}
    const char *op_name();
  };

  struct DepOp : Operator<1> {
    static const bool is_linear = true;
    static const int dependent_variable = true;
    static const bool have_eval = true;
    template <class Type>
    Type eval(Type x0) {
      return x0;
    }
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      args.dx(0) += args.dy(0);
    }
    const char *op_name();
  };

  struct ConstOp : Operator<0, 1> {
    static const bool is_linear = true;
    template <class Type>
    void forward(ForwardArgs<Type> &args) {}
    void forward(ForwardArgs<Replay> &args);
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {}
    const char *op_name();
    void forward(ForwardArgs<Writer> &args);
  };
  /** \brief Empty operator **without** inputs or outputs */
  struct NullOp : Operator<0, 0> {
    NullOp();
    const char *op_name();
    template <class T>
    void forward(ForwardArgs<T> &args) {}
    template <class T>
    void reverse(ReverseArgs<T> &args) {}
  };
  /** \brief Empty operator **with** inputs and outputs */
  struct NullOp2 : DynamicInputOutputOperator {
    NullOp2(Index ninput, Index noutput);
    const char *op_name();
    template <class T>
    void forward(ForwardArgs<T> &args) {}
    template <class T>
    void reverse(ReverseArgs<T> &args) {}
  };
  /** \brief Reference a variable on another tape

      \details References to other tapes are a handy alternative to
      independent variables, but there are caveats. One must note that
      these references are absolute pointers into an other tape's
      value space. If the other tape is optimized the value indices
      might get remapped and the absolute references would no longer
      be valid!
      It follows that we must have some strict limitations on their use:

      - References to other tapes are only allowed to exist
        temporarily, during tape construction.
      - If a child tape has references to a parent tape the parent
        tape is not complete until all references are 'resolved',
        i.e. the child tape has been replayed on top of the parent.
      - Atomic functions can never be allowed to contain references
        because atomic function replay does not change whats inside
        the atomic function. In other words, references would persist
        during replay and never get resolved.
  */
  struct RefOp : DynamicOperator<0, 1> {
    static const bool dynamic = true;
    global *glob;
    Index i;
    RefOp(global *glob, Index i);
    /** \brief RefOp copies value from other tape */
    void forward(ForwardArgs<Scalar> &args);
    /** \brief When replayed it 'resolves' references if applicable */
    void forward(ForwardArgs<Replay> &args);
    /** \brief Reverse mode updates are forbidden until all references are
     * resolved */
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      TMBAD_ASSERT2(false,
                    "Reverse mode updates are forbidden until all references "
                    "are resolved");
    }
    /** \brief Reverse mode updates are allowed in replay mode */
    void reverse(ReverseArgs<Replay> &args);
    const char *op_name();
  };

  typedef Operator<1> UnaryOperator;
  typedef Operator<2> BinaryOperator;

  OperatorPure *Fuse(OperatorPure *Op1, OperatorPure *Op2);

  static bool fuse;

  /** \brief Enable/disable operator fusion \details This function is
      applicable if TMBad has been compiled with the preprocessor flag
      `-DFUSE`. The function sets/unsets the *global* `fuse` flag.
  */
  void set_fuse(bool flag);

  /** \brief Add `OperatorPure` to stack and trigger operator fusion
      if enabled */
  template <bool dynamic>
  void add_to_opstack(OperatorPure *pOp) {
    if (fuse) {
      while (this->opstack.size() > 0) {
        OperatorPure *OpTry = this->Fuse(this->opstack.back(), pOp);
        if (OpTry == NULL) break;

        this->opstack.pop_back();
        pOp = OpTry;

        this->opstack.any_dynamic = true;
      }
    }

    this->opstack.push_back<dynamic>(pOp);
  }
  /** \brief Add nullary operator to the stack based on its **result**  */
  template <class OperatorBase>
  ad_plain add_to_stack(Scalar result = 0) {
    ad_plain ans;
    ans.index = this->values.size();

    this->values.push_back(result);

    Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
    add_to_opstack<OperatorBase::dynamic>(pOp);

    TMBAD_ASSERT(!((size_t)(values.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    return ans;
  }
  /** \brief Add unary operator to the stack based on its **argument** */
  template <class OperatorBase>
  ad_plain add_to_stack(const ad_plain &x) {
    ad_plain ans;
    ans.index = this->values.size();

    this->values.push_back(OperatorBase().eval(x.Value()));

    this->inputs.push_back(x.index);

    Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
    add_to_opstack<OperatorBase::dynamic>(pOp);

    TMBAD_ASSERT(!((size_t)(values.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    TMBAD_ASSERT(!((size_t)(inputs.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    return ans;
  }
  /** \brief Add binary operator to the stack based on its two **arguments** */
  template <class OperatorBase>
  ad_plain add_to_stack(const ad_plain &x, const ad_plain &y) {
    ad_plain ans;
    ans.index = this->values.size();

    this->values.push_back(OperatorBase().eval(x.Value(), y.Value()));

    this->inputs.push_back(x.index);
    this->inputs.push_back(y.index);

    Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
    add_to_opstack<OperatorBase::dynamic>(pOp);

    TMBAD_ASSERT(!((size_t)(values.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    TMBAD_ASSERT(!((size_t)(inputs.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    return ans;
  }
  template <class OperatorBase>
  ad_range add_to_stack(ad_range lhs, ad_range rhs) {
    IndexPair ptr((Index)inputs.size(), (Index)values.size());
    Complete<OperatorBase> *pOp =
        this->template getOperator<OperatorBase>(lhs, rhs);
    size_t n = pOp->output_size();
    ad_range ans(values.size(), n);
    inputs.push_back(ad_plain(lhs).index);
    inputs.push_back(ad_plain(rhs).index);
    opstack.push_back<OperatorBase::dynamic>(pOp);
    values.resize(values.size() + n);
    ForwardArgs<Scalar> args(inputs, values);
    args.ptr = ptr;
    pOp->forward(args);

    TMBAD_ASSERT(!((size_t)(values.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    TMBAD_ASSERT(!((size_t)(inputs.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    return ans;
  }
  /** \brief Add vector operator to the stack based on its **vector argument**
   */
  template <class OperatorBase>
  std::vector<ad_plain> add_to_stack(OperatorPure *pOp,
                                     const std::vector<ad_plain> &x) {
    IndexPair ptr((Index)inputs.size(), (Index)values.size());
    size_t m = pOp->input_size();
    size_t n = pOp->output_size();
    ad_range ans(values.size(), n);
    for (size_t i = 0; i < m; i++) inputs.push_back(x[i].index);
    opstack.push_back<OperatorBase::dynamic>(pOp);
    values.resize(values.size() + n);
    ForwardArgs<Scalar> args(inputs, values);
    args.ptr = ptr;
    pOp->forward(args);

    TMBAD_ASSERT(!((size_t)(values.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    TMBAD_ASSERT(!((size_t)(inputs.size()) >=
                   (size_t)std::numeric_limits<unsigned int>::max()));
    std::vector<ad_plain> out(n);
    for (size_t i = 0; i < n; i++) out[i].index = ans.index + i;
    return out;
  }

  struct ad_plain {
    Index index;
    static const Index NA = (Index)-1;
    bool initialized() const;
    bool ontape() const;
    /** \brief Compatibiltiy with ad_aug */
    void addToTape() const;
    /** \brief Get the tape of this ad_plain */
    global *glob() const;
    /** \brief Override this ad_plain
        \details Ignored
    */
    void override_by(const ad_plain &x) const;

    /** \brief Default CTOR \details Default constuction, along with
        copy construction and copy assignment, are **the only
        `ad_plain` operations that do not modify the tape**.
    */
    ad_plain();

    /** \brief CTOR from the `Scalar` */
    ad_plain(Scalar x);
    /** \brief CTOR from `global::ad_aug` */
    ad_plain(ad_aug x);

    /** \brief Copy operation */
    struct CopyOp : Operator<1> {
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0) {
        return x0;
      }
      Replay eval(Replay x0);
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        args.dx(0) += args.dy(0);
      }
      const char *op_name();
    };
    /** \brief Deep copy existing ad_plain. Result will be at the end
        of the active sequence.
        \details In contrast to the default copy CTOR this function
        puts an actual copy instruction on the tape. This is mainly
        useful to ensure a contiguous memory layout of selected
        variables.
    */
    ad_plain copy() const;
    /** \brief Copy value and set derivative to zero

        \details The purpose of this operator is to allow using
        temporaries without tracking their derivatives. The ValOp has
        the following properties:
        - Its reverse mode derivatives are zero.
        - Its ingoing edges are reported to be empty. It follows that sparse
       derivatives are zero.
        - Otherwise, it behaves like the copy operator `CopyOp`.
    */
    struct ValOp : Operator<1> {
      static const bool have_dependencies = true;
      static const bool have_eval = true;
      /** \brief Evaluation is as the CopyOp except in replay mode */
      template <class Type>
      Type eval(Type x0) {
        return x0;
      }
      Replay eval(Replay x0);
      /** \brief Derivatives in the dense case are zero */
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {}
      /** \brief Derivatives in the sparse case are untracked, i.e. zero.
          \note Inputs to `ValOp` are **not** marked for removal by
          the tape optimizer despite the empty `dependencies` (because
          the tape optimizer uses the boolean pattern which has the
          default `mark_dense` implementation).
      */
      void dependencies(Args<> &args, Dependencies &dep) const;
      const char *op_name();
    };
    /** \brief Deep copy existing ad_plain and set derivative to
        zero. Result will be at the end of the active sequence.
    */
    ad_plain copy0() const;

    template <bool left_var, bool right_var>
    struct AddOp_ : BinaryOperator {
      static const bool is_linear = true;
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0, Type x1) {
        return x0 + x1;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        if (left_var) args.dx(0) += args.dy(0);
        if (right_var) args.dx(1) += args.dy(0);
      }
      const char *op_name() { return "AddOp"; }
      OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
        if (other == get_glob()->getOperator<MulOp>()) {
          return get_glob()->getOperator<Fused<AddOp_, MulOp> >();
        }
        return NULL;
      }
    };
    typedef AddOp_<true, true> AddOp;
    ad_plain operator+(const ad_plain &other) const;

    template <bool left_var, bool right_var>
    struct SubOp_ : BinaryOperator {
      static const bool is_linear = true;
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0, Type x1) {
        return x0 - x1;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        if (left_var) args.dx(0) += args.dy(0);
        if (right_var) args.dx(1) -= args.dy(0);
      }
      const char *op_name() { return "SubOp"; }
    };
    typedef SubOp_<true, true> SubOp;
    ad_plain operator-(const ad_plain &other) const;

    template <bool left_var, bool right_var>
    struct MulOp_ : BinaryOperator {
      static const bool have_eval = true;
      static const bool is_linear = !left_var || !right_var;
      template <class Type>
      Type eval(Type x0, Type x1) {
        return x0 * x1;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        if (left_var) args.dx(0) += args.x(1) * args.dy(0);
        if (right_var) args.dx(1) += args.x(0) * args.dy(0);
      }
      const char *op_name() { return "MulOp"; }
    };
    typedef MulOp_<true, true> MulOp;
    ad_plain operator*(const ad_plain &other) const;
    ad_plain operator*(const Scalar &other) const;

    template <bool left_var, bool right_var>
    struct DivOp_ : BinaryOperator {
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0, Type x1) {
        return x0 / x1;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        Type tmp0 = args.dy(0) / args.x(1);
        if (left_var) args.dx(0) += tmp0;
        if (right_var) args.dx(1) -= args.y(0) * tmp0;
      }
      const char *op_name() { return "DivOp"; }
    };
    typedef DivOp_<true, true> DivOp;
    ad_plain operator/(const ad_plain &other) const;

    struct NegOp : UnaryOperator {
      static const bool is_linear = true;
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0) {
        return -x0;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        args.dx(0) -= args.dy(0);
      }
      const char *op_name();
    };
    ad_plain operator-() const;

    ad_plain &operator+=(const ad_plain &other);
    ad_plain &operator-=(const ad_plain &other);
    ad_plain &operator*=(const ad_plain &other);
    ad_plain &operator/=(const ad_plain &other);

    void Dependent();

    void Independent();
    Scalar &Value();
    Scalar Value() const;
    Scalar Value(global *glob) const;
    Scalar &Deriv();
  };
  /** \brief Previous ad context to be restored then this context ends
      \details The value `NULL` signifies that either
      - this glob is not in use **or**
      - this glob is the first in the context stack
  */
  global *parent_glob;
  /** \brief Is this glob present in the context stack? */
  bool in_use;
  /** \brief Enable ad calulations to be piped to this glob
      \note It is not allowed to start the same ad context several times
  */
  void ad_start();
  /** \brief Stop ad calculations from being piped to this glob. */
  void ad_stop();
  void Independent(std::vector<ad_plain> &x);
  /** \brief Range of `ad_plain` with optional extra information */
  struct ad_range : ad_plain {
    size_t n;
    size_t c;
    ad_range(ad_plain x, size_t n);
    ad_range(Index idx, size_t n);
    ad_range(ad_plain x, size_t r, size_t c);
    size_t size() const;
    size_t rows() const;
    size_t cols() const;
  };
  /** \brief Augmented AD type \details `ad_aug` is an augmentation of
      the simple type `ad_plain`. It tries hard to keep tapes small by
      only adding operations when strictly necessary. For instance, it
      detects whether a variable is constant or identical zero to see
      if a reduction is possible to avoid adding the variable to the
      tape. The downside is that `ad_aug` takes up slightly more
      memory.
 */
  struct ad_aug {
    /** \brief If `taped_value` **is** initialized (see
        `ad_plain::initialize`) this is the value of `ad_aug`. */
    mutable ad_plain taped_value;
    /** \brief If `taped_value` is **not** initialized then this is
        the value of `ad_aug`. Otherwise, `value` is un-defined, and
        `glob` is the tape on which this `ad_aug` resides. */
    union {
      Scalar value;
      mutable global *glob;
    } data;
    bool ontape() const;
    bool constant() const;
    /** \brief Get the tape of this ad_aug
        \return Returns the tape address of this variable **if**
        variable belongs to *some* tape.  Otherwise `NULL` is
        returned.
    */
    global *glob() const;
    /** \brief Return the underlying scalar value of this `ad_aug`. */
    Scalar Value() const;
    /** \brief Default CTOR \details Leaves `taped_value` in
        uninitialized state to signify that this variable is
        constant. */
    ad_aug();
    /** \brief CTOR from scalar \details Leaves `taped_value` in
        uninitialized state to signify that this variable is
        constant. */
    ad_aug(Scalar x);
    /** \brief CTOR from `ad_plain` \details Leaves `value` uninitialized. */
    ad_aug(ad_plain x);
    /** \brief Force this variable to be put on the tape. \details If
        variable already is on the tape nothing happens. In particular
        the variable cannot be assumed to be the most recently
        added. */
    void addToTape() const;
    /** \brief Override this ad_plain and set glob to `get_glob()`
        \warning Internal use only
    */
    void override_by(const ad_plain &x) const;
    /** \brief Check if 'glob' exists in the active context stack */
    bool in_context_stack(global *glob) const;
    /** \brief Deep copy existing ad_aug. Result will be last value of
        the current tape. */
    ad_aug copy() const;
    /** \brief Deep copy existing ad_aug *wihout derivatives*. */
    ad_aug copy0() const;
    /** \brief If `true` this variable is identical zero. If `false`
        nothing can be concluded. */
    bool identicalZero() const;
    /** \brief If `true` this variable is identical one. If `false`
        nothing can be concluded. */
    bool identicalOne() const;
    /** \brief If `true` 'this' and 'other' are both constants. If
        `false` nothing can be concluded (they might be constants on
        the tape) */
    bool bothConstant(const ad_aug &other) const;
    /** \brief If `true` 'this' and 'other' are identical. If `false`
        nothing can be concluded (they might be equal, but different,
        expressions on the tape) */
    bool identical(const ad_aug &other) const;
    /** \brief Addition \details Included reductions:
        - 0 + x = x
        - x + 0 = x
    */
    ad_aug operator+(const ad_aug &other) const;
    /** \brief Subtraction \details Included reductions:
        - x - 0 = x
        - 0 - x = -x
        - x - x = 0
    */
    ad_aug operator-(const ad_aug &other) const;
    /** \brief Negation */
    ad_aug operator-() const;
    /** \brief Multiplication \details Included reductions:
        - 0 * x = 0
        - x * 0 = 0
        - 1 * x = x
        - x * 1 = x
    */
    ad_aug operator*(const ad_aug &other) const;
    /** \brief Division \details Included reductions:
        - 0 / x = 0
        - x / 1 = x
    */
    ad_aug operator/(const ad_aug &other) const;
    /** \brief Compound assignment taking advantage of `operator+`
        reductions */
    ad_aug &operator+=(const ad_aug &other);
    /** \brief Compound assignment taking advantage of `operator-`
        reductions */
    ad_aug &operator-=(const ad_aug &other);
    /** \brief Compound assignment taking advantage of `operator*`
        reductions */
    ad_aug &operator*=(const ad_aug &other);
    /** \brief Compound assignment taking advantage of `operator/`
        reductions */
    ad_aug &operator/=(const ad_aug &other);
    /** \brief Set this `ad_aug` as dependent */
    void Dependent();
    /** \brief Set this `ad_aug` as independent */
    void Independent();
    Scalar &Value();
    Scalar &Deriv();
  };
  void Independent(std::vector<ad_aug> &x);
};

template <class S, class T>
std::ostream &operator<<(std::ostream &os, const std::pair<S, T> &x) {
  os << "(" << x.first << ", " << x.second << ")";
  return os;
}

std::ostream &operator<<(std::ostream &os, const global::ad_plain &x);
std::ostream &operator<<(std::ostream &os, const global::ad_aug &x);

/** \brief Enable weak comparison operators of an ad type
    \details By weak comparison we mean that these comparisons are
    performed on the numerical values and *not* on expression
    trees. One must in general be careful using comparison operators
    with ad types because
    - Comparison (branching) can ruin differentiability.
    - Branching requires retaping. That is, we must re-build the
    expression tree for each function evaluation to be sure the
    branching is correctly differentiated.
*/
template <class T>
struct adaptive : T {
  adaptive() {}
  template <class T1>
  adaptive(const T1 &x1) : T(x1) {}
  template <class T1, class T2>
  adaptive(const T1 &x1, const T2 &x2) : T(x1, x2) {}
  template <class T1, class T2, class T3>
  adaptive(const T1 &x1, const T2 &x2, const T3 &x3) : T(x1, x2, x3) {}
  template <class T1, class T2, class T3, class T4>
  adaptive(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4)
      : T(x1, x2, x3, x4) {}
  bool operator==(const T &other) const {
    return this->Value() == other.Value();
  }
  bool operator!=(const T &other) const {
    return this->Value() != other.Value();
  }
  bool operator>=(const T &other) const {
    return this->Value() >= other.Value();
  }
  bool operator<=(const T &other) const {
    return this->Value() <= other.Value();
  }
  bool operator<(const T &other) const { return this->Value() < other.Value(); }
  bool operator>(const T &other) const { return this->Value() > other.Value(); }

  adaptive operator+(const T &other) const {
    return adaptive(T(*this) + other);
  }
  adaptive operator-(const T &other) const {
    return adaptive(T(*this) - other);
  }
  adaptive operator*(const T &other) const {
    return adaptive(T(*this) * other);
  }
  adaptive operator/(const T &other) const {
    return adaptive(T(*this) / other);
  }

  adaptive operator-() const { return adaptive(-(T(*this))); }
};

typedef global::ad_plain ad_plain;
typedef global::ad_aug ad_aug;
typedef global::Replay Replay;
typedef adaptive<ad_aug> ad_adapt;
typedef global::OperatorPure OperatorPure;
typedef global::operation_stack operation_stack;
typedef global::print_config print_config;
/** \brief Construct `ad_plain` from index
    \details `ad_plain` cannot have CTOR from `Index` because it would
    conflict with `Scalar` CTOR. The class `ad_plain_index` is needed to
    allow CTOR from `Index`.
*/
struct ad_plain_index : ad_plain {
  ad_plain_index(const Index &i);
  ad_plain_index(const ad_plain &x);
};
struct ad_aug_index : ad_aug {
  ad_aug_index(const Index &i);
  ad_aug_index(const ad_aug &x);
  ad_aug_index(const ad_plain &x);
};

template <class T>
void Independent(std::vector<T> &x) {
  for (size_t i = 0; i < x.size(); i++) x[i].Independent();
}
template <class T>
void Dependent(std::vector<T> &x) {
  for (size_t i = 0; i < x.size(); i++) x[i].Dependent();
}
template <class T>
Scalar Value(T x) {
  return x.Value();
}
Scalar Value(Scalar x);

/** \brief Is this ad vector available as a contiguous block on the tape?
    \details Template type 'T' can be
    - ad_plain
    - ad_aug
    - ad_adapt
*/
template <class T>
bool isContiguous(std::vector<T> &x) {
  bool ok = true;
  Index j_previous;
  for (size_t i = 0; i < (size_t)x.size(); i++) {
    if (!x[i].ontape()) {
      ok = false;
      break;
    }
    Index j = ad_plain(x[i]).index;
    if (i > 0) {
      if (j != j_previous + 1) {
        ok = false;
        break;
      }
    }
    j_previous = j;
  }
  return ok;
}
/** \brief Get contiguous (deep) copy of this vector
    \details Template type 'T' can be
    - ad_plain
    - ad_aug
    - ad_adapt
*/
template <class T>
std::vector<T> getContiguous(const std::vector<T> &x) {
  std::vector<T> y(x.size());
  for (size_t i = 0; i < (size_t)x.size(); i++) y[i] = x[i].copy();
  return y;
}
/** \brief Make contiguous ad vector
    \details Template type 'T' can be
    - ad_plain
    - ad_aug
    - ad_adapt
*/
template <class T>
void forceContiguous(std::vector<T> &x) {
  if (!isContiguous(x)) x = getContiguous(x);
}
ad_aug operator+(const double &x, const ad_aug &y);
ad_aug operator-(const double &x, const ad_aug &y);
ad_aug operator*(const double &x, const ad_aug &y);
ad_aug operator/(const double &x, const ad_aug &y);

bool operator<(const double &x, const ad_adapt &y);
bool operator<=(const double &x, const ad_adapt &y);
bool operator>(const double &x, const ad_adapt &y);
bool operator>=(const double &x, const ad_adapt &y);
bool operator==(const double &x, const ad_adapt &y);
bool operator!=(const double &x, const ad_adapt &y);
using ::round;
using ::trunc;
using std::ceil;
using std::floor;
Writer floor(const Writer &x);
struct FloorOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return floor(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain floor(const ad_plain &x);
ad_aug floor(const ad_aug &x);
Writer ceil(const Writer &x);
struct CeilOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return ceil(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain ceil(const ad_plain &x);
ad_aug ceil(const ad_aug &x);
Writer trunc(const Writer &x);
struct TruncOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return trunc(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain trunc(const ad_plain &x);
ad_aug trunc(const ad_aug &x);
Writer round(const Writer &x);
struct RoundOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return round(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain round(const ad_plain &x);
ad_aug round(const ad_aug &x);

double sign(const double &x);
Writer sign(const Writer &x);
struct SignOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return sign(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain sign(const ad_plain &x);
ad_aug sign(const ad_aug &x);

double ge0(const double &x);
double lt0(const double &x);
Writer ge0(const Writer &x);
struct Ge0Op : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return ge0(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain ge0(const ad_plain &x);
ad_aug ge0(const ad_aug &x);
Writer lt0(const Writer &x);
struct Lt0Op : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return lt0(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain lt0(const ad_plain &x);
ad_aug lt0(const ad_aug &x);
using ::expm1;
using ::fabs;
using ::log1p;
using std::acos;
using std::asin;
using std::atan;
using std::cos;
using std::cosh;
using std::exp;
using std::log;
using std::sin;
using std::sinh;
using std::sqrt;
using std::tan;
using std::tanh;

Writer fabs(const Writer &x);
struct AbsOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return fabs(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * sign(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain fabs(const ad_plain &x);
ad_aug fabs(const ad_aug &x);
ad_adapt fabs(const ad_adapt &x);
Writer cos(const Writer &x);
ad_aug cos(const ad_aug &x);
Writer sin(const Writer &x);
struct SinOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return sin(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * cos(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain sin(const ad_plain &x);
ad_aug sin(const ad_aug &x);
ad_adapt sin(const ad_adapt &x);
Writer cos(const Writer &x);
struct CosOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return cos(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * -sin(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain cos(const ad_plain &x);
ad_aug cos(const ad_aug &x);
ad_adapt cos(const ad_adapt &x);
Writer exp(const Writer &x);
struct ExpOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return exp(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * args.y(0);
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain exp(const ad_plain &x);
ad_aug exp(const ad_aug &x);
ad_adapt exp(const ad_adapt &x);
Writer log(const Writer &x);
struct LogOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return log(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / args.x(0);
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain log(const ad_plain &x);
ad_aug log(const ad_aug &x);
ad_adapt log(const ad_adapt &x);
Writer sqrt(const Writer &x);
struct SqrtOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return sqrt(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(0.5) / args.y(0);
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain sqrt(const ad_plain &x);
ad_aug sqrt(const ad_aug &x);
ad_adapt sqrt(const ad_adapt &x);
Writer tan(const Writer &x);
struct TanOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return tan(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (cos(args.x(0)) * cos(args.x(0)));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain tan(const ad_plain &x);
ad_aug tan(const ad_aug &x);
ad_adapt tan(const ad_adapt &x);
Writer cosh(const Writer &x);
ad_aug cosh(const ad_aug &x);
Writer sinh(const Writer &x);
struct SinhOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return sinh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * cosh(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain sinh(const ad_plain &x);
ad_aug sinh(const ad_aug &x);
ad_adapt sinh(const ad_adapt &x);
Writer cosh(const Writer &x);
struct CoshOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return cosh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * sinh(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain cosh(const ad_plain &x);
ad_aug cosh(const ad_aug &x);
ad_adapt cosh(const ad_adapt &x);
Writer tanh(const Writer &x);
struct TanhOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return tanh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (cosh(args.x(0)) * cosh(args.x(0)));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain tanh(const ad_plain &x);
ad_aug tanh(const ad_aug &x);
ad_adapt tanh(const ad_adapt &x);
Writer expm1(const Writer &x);
struct Expm1 : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return expm1(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * args.y(0) + Type(1.);
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain expm1(const ad_plain &x);
ad_aug expm1(const ad_aug &x);
ad_adapt expm1(const ad_adapt &x);
Writer log1p(const Writer &x);
struct Log1p : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return log1p(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (args.x(0) + Type(1.));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain log1p(const ad_plain &x);
ad_aug log1p(const ad_aug &x);
ad_adapt log1p(const ad_adapt &x);
Writer asin(const Writer &x);
struct AsinOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return asin(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) +=
        args.dy(0) * Type(1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain asin(const ad_plain &x);
ad_aug asin(const ad_aug &x);
ad_adapt asin(const ad_adapt &x);
Writer acos(const Writer &x);
struct AcosOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return acos(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) +=
        args.dy(0) * Type(-1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain acos(const ad_plain &x);
ad_aug acos(const ad_aug &x);
ad_adapt acos(const ad_adapt &x);
Writer atan(const Writer &x);
struct AtanOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return atan(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (Type(1.) + args.x(0) * args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain atan(const ad_plain &x);
ad_aug atan(const ad_aug &x);
ad_adapt atan(const ad_adapt &x);
template <class T>
T abs(const T &x) {
  return fabs(x);
}
using std::pow;
Writer pow(const Writer &x1, const Writer &x2);
struct PowOp : global::BinaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x1, Type x2) {
    return pow(x1, x2);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * args.x(1) * pow(args.x(0), args.x(1) - Type(1.));
    args.dx(1) += args.dy(0) * args.y(0) * log(args.x(0));
  }
  const char *op_name();
};
ad_plain pow(const ad_plain &x1, const ad_plain &x2);
ad_aug pow(const ad_aug &x1, const ad_aug &x2);
ad_adapt pow(const ad_adapt &x1, const ad_adapt &x2);
using std::max;
Writer max(const Writer &x1, const Writer &x2);
struct MaxOp : global::BinaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x1, Type x2) {
    return max(x1, x2);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * ge0(args.x(0) - args.x(1));
    args.dx(1) += args.dy(0) * lt0(args.x(0) - args.x(1));
  }
  const char *op_name();
};
ad_plain max(const ad_plain &x1, const ad_plain &x2);
ad_aug max(const ad_aug &x1, const ad_aug &x2);
ad_adapt max(const ad_adapt &x1, const ad_adapt &x2);

using std::min;
Writer min(const Writer &x1, const Writer &x2);
struct MinOp : global::BinaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x1, Type x2) {
    return min(x1, x2);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * ge0(args.x(1) - args.x(0));
    args.dx(1) += args.dy(0) * lt0(args.x(1) - args.x(0));
  }
  const char *op_name();
};
ad_plain min(const ad_plain &x1, const ad_plain &x2);
ad_aug min(const ad_aug &x1, const ad_aug &x2);
ad_adapt min(const ad_adapt &x1, const ad_adapt &x2);
Replay CondExpEq(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpEqOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpEq(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpEq(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpEq(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpNe(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpNeOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpNe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpNe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpNe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpGt(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpGtOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpGt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpGt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpGt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpLt(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpLtOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpLt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpLt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpLt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpGe(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpGeOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpGe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpGe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpGe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpLe(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpLeOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpLe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpLe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpLe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);

struct SumOp : global::DynamicOperator<-1, 1> {
  static const bool is_linear = true;
  static const bool have_input_size_output_size = true;
  static const bool add_forward_replay_copy = true;
  size_t n;
  Index input_size() const;
  Index output_size() const;
  SumOp(size_t n);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    args.y(0) = 0;
    for (size_t i = 0; i < n; i++) {
      args.y(0) += args.x(i);
    }
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    for (size_t i = 0; i < n; i++) {
      args.dx(i) += args.dy(0);
    }
  }
  const char *op_name();
};
template <class T>
T sum(const std::vector<T> &x) {
  return global::Complete<SumOp>(x.size())(x)[0];
}

ad_plain logspace_sum(const std::vector<ad_plain> &x);
struct LogSpaceSumOp : global::DynamicOperator<-1, 1> {
  size_t n;
  static const bool have_input_size_output_size = true;
  Index input_size() const;
  Index output_size() const;
  LogSpaceSumOp(size_t n);
  void forward(ForwardArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    for (size_t i = 0; i < n; i++) {
      args.dx(i) += exp(args.x(i) - args.y(0)) * args.dy(0);
    }
  }
  const char *op_name();
};
ad_plain logspace_sum(const std::vector<ad_plain> &x);
template <class T>
T logspace_sum(const std::vector<T> &x_) {
  std::vector<ad_plain> x(x_.begin(), x_.end());
  return logspace_sum(x);
}

ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
                             const std::vector<Index> &stride, size_t n);
struct LogSpaceSumStrideOp : global::DynamicOperator<-1, 1> {
  std::vector<Index> stride;
  size_t n;
  static const bool have_input_size_output_size = true;

  Index number_of_terms() const;
  template <class Type>
  Type &entry(Type **px, size_t i, size_t j) const {
    return px[j][0 + i * stride[j]];
  }
  template <class Type>
  Type rowsum(Type **px, size_t i) const {
    size_t m = stride.size();
    Type s = (Scalar)(0);
    for (size_t j = 0; j < m; j++) {
      s += entry(px, i, j);
    }
    return s;
  }
  Index input_size() const;
  Index output_size() const;
  LogSpaceSumStrideOp(std::vector<Index> stride, size_t n);
  void forward(ForwardArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    size_t m = stride.size();
    std::vector<Type *> wrk1(m);
    std::vector<Type *> wrk2(m);
    Type **px = &(wrk1[0]);
    Type **pdx = &(wrk2[0]);
    for (size_t i = 0; i < m; i++) {
      px[i] = args.x_ptr(i);
      pdx[i] = args.dx_ptr(i);
    }
    for (size_t i = 0; i < n; i++) {
      Type s = rowsum(px, i);
      Type tmp = exp(s - args.y(0)) * args.dy(0);
      for (size_t j = 0; j < m; j++) {
        entry(pdx, i, j) += tmp;
      }
    }
  }
  /** \brief Dependencies (including indirect)
      \details Stride based operations assume a certain memory layout
      and therefore have *indirect dependencies*.
  */
  void dependencies(Args<> &args, Dependencies &dep) const;
  /** \brief `dependencies` has been implemented */
  static const bool have_dependencies = true;
  /** \brief Add marker routines based on `dependencies` */
  static const bool implicit_dependencies = true;
  /** \brief It is **not* safe to remap the inputs of this operator */
  static const bool allow_remap = false;
  const char *op_name();

  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
};
ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
                             const std::vector<Index> &stride, size_t n);
template <class T>
T logspace_sum_stride(const std::vector<T> &x_,
                      const std::vector<Index> &stride, size_t n) {
  std::vector<ad_plain> x(x_.begin(), x_.end());
  return logspace_sum_stride(x, stride, n);
}
}  // namespace TMBad
#endif  // HAVE_GLOBAL_HPP
