#' @title Self-Sparring Learner
#'
#' @include Learner.R
#'
#' @description
#' This Learner specializes [Learner] to match the Self-Sparring algorithm:
#'
SelfSparring = R6Class("SelfSparring", inherit = Learner,
                      public = list(
                        
                        #' @field successes (`integer(n)`)\cr
                        #' Stores the number of successes made by the learner over the time.
                        successes = NULL,
                        
                        #' @field fails (`integer(n)`)\cr
                        #' Stores the fails made by the learner over the time.
                        fails = NULL,
                        
                        #' @field eta (`numeric()`)\cr
                        #' the learning rate.
                        eta = NULL,
                        
                        
                        #' @description
                        #' Creates a new instance of this [R6][R6::R6Class] class.
                        initialize = function(data_model_specs = list(num_arms = 2, dim = 1), aggregation, action_size, eta){
                          super$initialize(aggregation = aggregation, action_size = action_size)
                          
                          self$successes  = rep(0,data_model_specs$n)
                          self$fails      = rep(0,data_model_specs$n)
                          self$eta        = eta
                          
                        },
                        
                        action = function(data_model) {
                          selection = c()
                          while (length(selection) <  self$action_size){
                            hat_theta = c()
                            temp = 0
                            temp_val = - 1
                            for (i in 1:length(self$successes)){
                              hat_theta[i] = rbeta(1,1+self$successes[i],1+self$fails[i])
                              if( ((max(i==selection))==-Inf || (max(i==selection))==0) && temp_val<hat_theta[i]){
                                temp = i
                                temp_val = hat_theta[i]
                              }
                            }
                            selection = c(selection,temp)
                            selection = unique(selection)  
                          }
                          return (selection)
                          
                        },
                        update = function(chosen_arms,data_model) { 

                          for (i in 1:(self$action_size-1)){
                            for (j in (i+1):self$action_size){
                              temp = data_model$getFeedback(c(chosen_arms[i],chosen_arms[j]),self$timestep)
                              if (temp==TRUE){
                                self$successes[chosen_arms[i]] = self$successes[chosen_arms[i]] + self$eta
                                self$fails[chosen_arms[j]]     = self$fails[chosen_arms[j]] + self$eta
                              }
                              else{
                                self$successes[chosen_arms[j]] = self$successes[chosen_arms[j]] + self$eta
                                self$fails[chosen_arms[i]]     = self$fails[chosen_arms[i]] + self$eta
                              }
                            }	
                          }
                         }
                      )
)