-
-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy path11. Classification & Regression Trees
65 lines (56 loc) · 1.29 KB
/
11. Classification & Regression Trees
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#Week-11
# Video: https://youtu.be/6SMrjEwFiQY
library(DAAG)
library(party)
library(rpart)
library(rpart.plot)
library(mlbench)
library(pROC)
library(tree)
library(caret)
# 1. Classification Tree
str(spam7)
mydata <- spam7
# Split data
set.seed(1234)
ind <- sample(2, nrow(mydata), replace = T, prob = c(0.5, 0.5))
train <- mydata[ind == 1,]
test <- mydata[ind == 2,]
# Tree
tree <- rpart(yesno ~., data = train)
rpart.plot(tree)
printcp(tree)
plotcp(tree)
# Confusion matrix -train
p <- predict(tree, train, type = 'class')
confusionMatrix(p, train$yesno)
# ROC
p1 <- predict(tree, test, type = 'prob')
p1 <- p1[,2]
r <- multiclass.roc(test$yesno, p1, percent = TRUE)
roc <- r[['rocs']]
r1 <- roc[[1]]
plot.roc(r1,
print.auc=TRUE,
auc.polygon=TRUE,
grid=c(0.1, 0.2),
grid.col=c("green", "red"),
max.auc.polygon=TRUE,
auc.polygon.col="lightblue",
print.thres=TRUE,
main= 'ROC Curve')
# 2. Regression Tree
data('BostonHousing')
mydata <- BostonHousing
# Split data
set.seed(1234)
ind <- sample(2, nrow(mydata), replace = T, prob = c(0.5, 0.5))
train <- mydata[ind == 1,]
test <- mydata[ind == 2,]
# regression tree
tree <- rpart(medv ~., data = train)
rpart.plot(tree)
printcp(tree)
plotcp(tree)
# Predict
p <- predict(tree, train)