# arm parameters p <- c(0.4, 0.5, 0.9, 0.8, 0.7); K <- length(p); runUCB <- function(n){ # plays one run of bandit problem with parameters p # returns the sequence of binary rewards c <- 1/2; # what is the optimal constant? S <- array(0, K); N <- array(0, K); rew <- array(0, n); for(t in 1:n){ if (t<=K) {A <- t;} else{ # confidence must increase with t ucb <- mapply(function(s, n) s/n + sqrt(c*log(t)/n), S, N); A <- which.max(ucb); } rew[t] <- runif(1)