##                        The Naīve Bayes and kNN classifiers

library (e1071)

## Naive Bayes Classifier for Discrete Predictors: we use again the Congressional Voting Records of 1984
# Note refusals to vote have been treated as missing values!

data (HouseVotes84, package="mlbench") 
model <- naiveBayes(Class ~ ., data = HouseVotes84) 

# predict the outcome of the first 20 records
predict(model, HouseVotes84[1:20,-1]) 

# same but displaying posteriors
predict(model, HouseVotes84[1:20,-1], type = "raw") 

# now all of them: this is the resubstituion error
pred <- predict(model, HouseVotes84[,-1])

# form and display confusion matrix & overall accuracy
tab <- table(pred, HouseVotes84$Class) 
tab
sum(tab[row(tab)==col(tab)])/sum(tab)

## using Laplace smoothing: 
model <- naiveBayes(Class ~ ., data = HouseVotes84, laplace = 3)
pred <- predict(model, HouseVotes84[,-1]) 
tab <- table(pred, HouseVotes84$Class) 
sum(tab[row(tab)==col(tab)])/sum(tab)

# Exercise: redo the example treating refusals to vote as another (third) value

## Example with metric predictors: 

data(iris) 
m <- naiveBayes (Species ~ ., data = iris) 
tab <- table(predict(m, iris[,-5]), iris[,5]) 
sum(tab[row(tab)==col(tab)])/sum(tab)

## the kNN classifier

library (class)
data(iris3)
 
# 1. using a separate test set
train <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3]) 
test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3]) 
cl <- factor(c(rep("s",25), rep("c",25), rep("v",25))) 
myknn <- knn(train, test, cl, k = 3, prob=TRUE) 
attributes(.Last.value) 
tab <- table(myknn, cl) 
sum(tab[row(tab)==col(tab)])/sum(tab)

# one can use 'knn1' when k=1

# 2. using LOOCV

train <- rbind(iris3[,,1], iris3[,,2], iris3[,,3]) 
cl <- factor(c(rep("s",50), rep("c",50), rep("v",50)))
myknn.cv <- knn.cv(train, cl, k = 3, prob = TRUE)
tab <- table(myknn.cv, cl) 
sum(tab[row(tab)==col(tab)])/sum(tab)