라일락 꽃이 피는 날

[R] Support Vector Machine (SVM) 본문

데이터 분석/R

[R] Support Vector Machine (SVM)

eunki 2021. 7. 2. 18:59
728x90

데이터 불러오기

rawdata <- read.csv("wine.csv", header = TRUE) 
rawdata$Class <- as.factor(rawdata$Class) 
str(rawdata) 

 

 


트레이닝-테스트 셋 분리 (7:3)

analdata <- rawdata 

set.seed(2020) 
datatotal <- sort(sample(nrow(analdata), nrow(analdata)*.7)) 
train <- rawdata[datatotal,] 
test <- rawdata[-datatotal,] 

train_x <- train[,1:13] 
train_y <- train[,14] 

test_x <- test[,1:13] 
test_y <- test[,14] 

 



선형 서포트 벡터 머신

ctrl <- trainControl(method = "repeatedcv", repeats = 5) 
svm_linear_fit <- train(Class~., 
                        data = train, 
                        method = "svmLinear", 
                        trControl = ctrl, 
                        preProcess = c("center", "scale"), 
                        metric = "Accuracy") 

svm_linear_fit

 

→ Accuracy : 0.9708541, Kappa : 0.9559268

 

 


예측

pred_test <- predict(svm_linear_fit, newdata = test) 
confusionMatrix(pred_test, test$Class)

 

→ Accuracy : 0.9444, Kappa : 0.9117

 

 

변수중요도

importance_linear <- varImp(svm_linear_fit, scale = FALSE) 
importance_linear

 

 

 

plot(importance_linear)

 



비선형 서포트 벡터 머신

ctrl <- trainControl(method = "repeatedcv", repeats = 5) 
svm_poly_fit <- train(Class~., 
                      data = train, 
                      method = "svmPoly", 
                      trControl = ctrl, 
                      preProcess = c("center", "scale"), 
                      metric = "Accuracy") 

svm_poly_fit

 

→ degree = 1, scale = 0.01, C = 0.5일 때, 정확도가 가장 높다.

 

 

 

plot(svm_poly_fit) 

 



예측

pred_test <- predict(svm_poly_fit, newdata = test) 
confusionMatrix(pred_test, test$Class)

 

→ Accuracy : 0.9259, Kappa : 0.8848

 

 

변수중요도

importance_poly <- varImp(svm_poly_fit, scale = FALSE) 
importance_poly 

 

 

 

plot(importance_poly) 

728x90