#' @title Maximum-Informative-Pair Learner
#'
#' @include Learner.R
#'
#' @description
#' This Learner specializes [Learner] to match the Maximum-Informative-Pair algorithm
#'
MaxInP = R6Class("MaxInP", inherit = Learner,
              public = list(
                
                #' @field V_mat (`numeric(d,d)`)\cr
			    #' Stores the Gram matrix.
                V_mat = NULL,
                
				#' @field theta_hat (`numeric(d)`)\cr
				#' Stores the current weight vector estimate.
				theta_hat = NULL,
                
                #' @field z_t (`numeric(d)`)\cr
                #' Stores the contrast vectors over the time.
                z_t = NULL,
                
                
                #' @field o_t (`integer()`)\cr
                #' Stores the feedback obtained by the learner over the time.
                o_t = NULL,
                
				#' @field eta (`numeric()`)\cr
                #' confidence width.
                eta = NULL,
                
                #' @field tau_0 (`integer()`)\cr
                #' the initial exploration length..
                tau_0 = NULL,
                
                #' @field d (`integer()`)\cr
                #' dimensionality of the problem.
                d = NULL,
                
                #' @field n (`integer()`)\cr
                #' number of arms.
                n = NULL,
                
                
                #' @field fullMLE (`Boolean()`)\cr
                #' specifies whether the full MLE should be used.
                fullMLE = TRUE,
                
                #' @field alpha (`numeric()`)\cr
                #' learning rate of the SGD variant.
                alpha = NULL,
                
                
                #' @description
                #' Creates a new instance of this [R6][R6::R6Class] class.
                initialize = function(data_model_specs = list(num_arms = 2, dim = 1), aggregation, eta, tau_0, fullMLE = TRUE, alpha =NULL){
                  super$initialize(aggregation = aggregation, action_size = 2)
                  
                  self$n            = data_model_specs$num_arms
                  self$d            = data_model_specs$dim
                  self$V_mat        = matrix(rep(0,self$d^2),ncol=self$d)
                  self$theta_hat    = rep(0,self$d)
                  self$z_t          = list()
                  self$o_t          = c()
                  self$eta          = eta
                  self$tau_0        = tau_0
                  self$fullMLE      = fullMLE
                  self$alpha        = alpha
                  
                },
                
                action = function(data_model) {
                  
                  selection = c()
                  if (self$timestep < self$tau_0){
                    selection = sample(1:self$n,2,replace=FALSE)
                  }
                  else{
                    
                    X_t = data_model$getContext(self$timestep)
                    
                    if(self$fullMLE){
                      self$theta_hat = MLE_estimate(feedback = self$o_t,covariates = self$z_t,comparison = plogis, comparison_der = dlogis, theta_start = self$theta_hat)
                    }
                    else{
                      self$theta_hat = self$theta_hat + self$alpha/sqrt(self$timestep)*gradLikelihood(theta=self$theta_hat, covariates = self$z_t[[self$timestep-1]], comparison = plogis, feedback = self$o_t[self$timestep-1] )
                    }
                    
                    temp_max = -Inf
                    
                    for (i in 1:(self$n-1)){
                      for (j in (i+1):self$n){
                        tup_diff = X_t[,i] - X_t[,j]
                        tup_norm = sqrt(  t(tup_diff)%*% solve(self$V_mat) %*% tup_diff   )
                        if ((t(tup_diff)%*%self$theta_hat + self$eta*tup_norm > 0) && tup_norm > temp_max){
                          temp_max = tup_norm
                          selection = c(i,j)
                        }
                      }	
                    }
                  }
    
                  return (selection)
                  
                },
                
                
                update = function(chosen_arms,data_model) { 
                  
                  temp 							= data_model$getFeedback(c(chosen_arms[1],chosen_arms[2]),self$timestep)
                  
                  X_t 							= data_model$getContext(self$timestep)

                  self$z_t[[self$timestep]]		= X_t[,chosen_arms[1]] - X_t[,chosen_arms[2]]
                  self$o_t[self$timestep]     	= temp
                  
                  # update estimates
                  self$V_mat      				= self$V_mat + outer(X_t[,chosen_arms[1]] - X_t[,chosen_arms[2]],X_t[,chosen_arms[1]] - X_t[,chosen_arms[2]])
                }
              )
)