% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/regret.R
\name{regret}
\alias{regret}
\title{Calculate the Regret of a Policy}
\usage{
regret(policy, benchmark, belief = NULL)
}
\arguments{
\item{policy}{a solved POMDP containing the policy to calculate the regret for.}

\item{benchmark}{a solved POMDP with the (optimal) policy. Regret is calculated relative to this
policy.}

\item{belief}{the used start belief. If NULL then the start belief of the \code{benchmark} is used.}
}
\value{
the regret as a difference of expected long-term rewards.
}
\description{
Calculates the regret of a policy relative to a benchmark policy.
}
\details{
Calculates the regret defined as \eqn{J^{\pi^*}(b_0) - J^{\pi}(b_0)} with \eqn{J^\pi} representing the expected long-term
reward given the policy \eqn{\pi} and the start belief \eqn{b_0}. Note that for regret usually the optimal policy \eqn{\pi^*} is used as the benchmark.
Since the optimal policy may not be known, regret relative to the best known policy can be used.
}
\examples{
data(Tiger)

sol_optimal <- solve_POMDP(Tiger)
sol_optimal

# perform exact value iteration for 10 epochs
sol_quick <- solve_POMDP(Tiger, method = "enum", horizon = 10)
sol_quick

regret(sol_quick, sol_optimal)
}
\seealso{
Other POMDP: 
\code{\link{POMDP_accessors}},
\code{\link{POMDP}()},
\code{\link{add_policy}()},
\code{\link{plot_belief_space}()},
\code{\link{projection}()},
\code{\link{sample_belief_space}()},
\code{\link{simulate_POMDP}()},
\code{\link{solve_POMDP}()},
\code{\link{solve_SARSOP}()},
\code{\link{transition_graph}()},
\code{\link{update_belief}()},
\code{\link{value_function}()},
\code{\link{write_POMDP}()}
}
\author{
Michael Hahsler
}
\concept{POMDP}
