# creation of the sample alpha <- 0.2; beta <- 0.8; # the following assumes alpha < 0.5 < beta rexemple <- function(n){ res <- list(); res$x <- cbind(runif(n), runif(n)); res$y <- array(n); u <- runif(n); T = 2 * res$x[,1] + res$x[,2] < 1.5; res$y[T] <- u[T] 1.5;} bayesError <- evaluateClassifError(bayesClassifier) # method 2: k-nn require(class) test <- rexemple(50000); # could also use LHS here test$guessed <- knn(train$x, test$x, train$y, k=1) knnError <- mean(test$y != test$guessed) plot(test$x[,1], test$x[,2], col = c("pink", "cyan")[as.numeric(test$guessed)], pch = 19, xlim = c(-0.2, 1.2), ylim = c(-0.2, 1.2)) points(train$x[,1], train$x[,2], col = c("red", "blue")[1+train$y], pch = 19) lines(c(0, 1), c(1.5, -0.5), col="black") # graph legend and result prints title("simulated data classification") legend(0.8, 1.2, c("true frontier", "k-nn zone 1", "k-nn zone 0"), col = c("black", "cyan", "pink"), lty=1); cat("Bayes classification error:\t", bayesError, "\nknn classifier error:\t", knnError, "\n") # choice of k by crossvalidation K = 10; L = floor(n/K); Ik = 1:(n-L-1); err <- array(0, length(Ik)); for(i in 1:K){ for(k in 1:length(Ik)){ plageTest = (i-1) * L + (1:L); rtrain = list(x=train$x[-plageTest, ], y=train$y[-plageTest]); test = list(x=train$x[plageTest, ], y=train$y[plageTest]); test$guessed <- knn(rtrain$x, test$x, rtrain$y, k=Ik[k]) err[k] <- err[k] + mean(test$y != test$guessed)/length(Ik) } } x11() plot(Ik, err, type='l'); kopt = Ik[which.min(err)]; kopt