## ---- include=FALSE-----------------------------------------------------------
set.seed(0)

## ----eval=FALSE---------------------------------------------------------------
#  # install.packages("devtools")
#  
#  # Option 1: download and install latest version from GitHub
#  devtools::install_github("nproellochs/ReinforcementLearning")
#  
#  # Option 2: install directly from bundled archive
#  devtoos::install_local("ReinforcementLearning_1.0.0.tar.gz")

## ---- message=FALSE-----------------------------------------------------------
library(ReinforcementLearning)

## -----------------------------------------------------------------------------
data("tictactoe")
head(tictactoe, 5)

## ---- eval=FALSE--------------------------------------------------------------
#  # Define state and action sets
#  states <- c("s1", "s2", "s3", "s4")
#  actions <- c("up", "down", "left", "right")
#  
#  env <- gridworldEnvironment
#  
#  # Sample N = 1000 random sequences from the environment
#  data <- sampleExperience(N = 1000,
#                           env = env,
#                           states = states,
#                           actions = actions)

## ---- eval=FALSE--------------------------------------------------------------
#  # Load dataset
#  data("tictactoe")
#  
#  # Perform reinforcement learning
#  model <- ReinforcementLearning(data = tictactoe,
#                                 s = "State",
#                                 a = "Action",
#                                 r = "Reward",
#                                 s_new = "NextState",
#                                 iter = 1)
#  

## ---- eval=FALSE--------------------------------------------------------------
#  # Define control object
#  control <- list(alpha = 0.1, gamma = 0.1, epsilon = 0.1)
#  
#  # Pass learning parameters to reinforcement learning function
#  model <- ReinforcementLearning(data, iter = 10, control = control)

## ---- eval=FALSE--------------------------------------------------------------
#  # Print policy
#  computePolicy(model)
#  
#  # Print state-action table
#  print(model)
#  
#  # Print summary statistics
#  summary(model)

## ---- message=FALSE-----------------------------------------------------------
# Define state and action sets
states <- c("s1", "s2", "s3", "s4")
actions <- c("up", "down", "left", "right")

## ---- message=FALSE-----------------------------------------------------------
# Load built-in environment function for 2x2 gridworld 
env <- gridworldEnvironment
print(env)

## ---- message=FALSE-----------------------------------------------------------
# Sample N = 1000 random sequences from the environment
data <- sampleExperience(N = 1000, 
                         env = env, 
                         states = states, 
                         actions = actions)
head(data)

## ---- message=FALSE-----------------------------------------------------------
# Define reinforcement learning parameters
control <- list(alpha = 0.1, gamma = 0.5, epsilon = 0.1)

# Perform reinforcement learning
model <- ReinforcementLearning(data, 
                               s = "State", 
                               a = "Action", 
                               r = "Reward", 
                               s_new = "NextState", 
                               control = control)


## ---- message=FALSE-----------------------------------------------------------
# Print policy
computePolicy(model)

# Print state-action function
print(model)

## ---- message=FALSE-----------------------------------------------------------
# Print summary statistics
summary(model)


## ---- message=FALSE-----------------------------------------------------------
# Example data
data_unseen <- data.frame(State = c("s1", "s2", "s1"), 
                          stringsAsFactors = FALSE)

# Pick optimal action
data_unseen$OptimalAction <- predict(model, data_unseen$State)

data_unseen

## ---- message=FALSE-----------------------------------------------------------
# Sample N = 1000 sequences from the environment
# using epsilon-greedy action selection
data_new <- sampleExperience(N = 1000, 
                             env = env, 
                             states = states, 
                             actions = actions, 
                             actionSelection = "epsilon-greedy",
                             model = model, 
                             control = control)

# Update the existing policy using new training data
model_new <- ReinforcementLearning(data_new, 
                                   s = "State", 
                                   a = "Action", 
                                   r = "Reward", 
                                   s_new = "NextState", 
                                   control = control,
                                   model = model)

## ---- message=FALSE, fig.width=5, fig.height=3--------------------------------
# Print result
print(model_new)

# Plot reinforcement learning curve
plot(model_new)

## ---- message=FALSE, echo=FALSE-----------------------------------------------
cat("......X.B")

cat("|  .  |  .  |  .   |
|------------------|
|  .  |  .  |  .   |
|------------------|
|  X  |  .  |   B  |")


## ---- eval=FALSE--------------------------------------------------------------
#  # Load dataset
#  data("tictactoe")
#  
#  # Define reinforcement learning parameters
#  control <- list(alpha = 0.2, gamma = 0.4, epsilon = 0.1)
#  
#  # Perform reinforcement learning
#  model <- ReinforcementLearning(tictactoe, s = "State", a = "Action", r = "Reward",
#                                 s_new = "NextState", iter = 1, control = control)
#  
#  # Calculate optimal policy
#  pol <- computePolicy(model)
#  
#  # Print policy
#  head(pol)
#  

## ---- message=FALSE, echo=FALSE-----------------------------------------------
cat('.XXBB..XB XXBB.B.X. .XBB..BXX BXX...B.. ..XB..... XBXBXB... 
     "c1"      "c5"      "c5"      "c4"      "c5"      "c9"')


## ---- message=FALSE, echo=FALSE-----------------------------------------------
cat("|  .  |  X  |  X   |
|------------------|
|  B  |  B  |  .   |
|------------------|
|  .  |  X  |   B  |")

cat("|  c1  |  c2  |  c3   |
|---------------------|
|  c4  |  c5  |  c6   |
|---------------------|
|  c7  |  c8  |   c9  |")