#Daten #install.packages("gdata") library(gdata) setwd("C:\\Users\\Jürgen Fassman\\Documents\\lowerhousing\\Abgabe 3") files<-list.files("csv") alldata<-list() for(i in 1:length(files)) { alldata[[i]]<-read.csv(paste("csv\\",files[i],sep=""),header=TRUE,sep="*",stringsAsFactors=FALSE) } allsplitdata<-list() for(i in c(1:length(files))) { dataset<-alldata[[i]] ordereddata<-dataset[order(dataset$Id,dataset$ServerTimestamp,dataset$timestamp),] splitdata<-split(ordereddata,ordereddata$Id) allsplitdata[[i]]<-splitdata } dataitems<-array() for(i in 1:length(files)) { dataitems[i]<-nrow(alldata[i][[1]]) } goodfiles<-c() for(i in 1:length(files)) { if(dataitems[i]>=100) { goodfiles<-c(goodfiles,i) } } listofids<-c() for(i in goodfiles) { listofids<-c(listofids,names(allsplitdata[[i]])) } ids=unique(listofids) sumofids<-array() for(i in c(1:length(ids))) { counter<-0 for(j in c(1:length(listofids))) { if(ids[i]==listofids[j]) counter<-counter+1 } sumofids[i]<-counter } idswithsums<-cbind(ids,sumofids) goodids<-c() for(i in c(1:length(ids))) { if(idswithsums[i,2]=="41") goodids<-c(goodids,idswithsums[i,1]) } numbers<-data.frame() for(i in goodfiles) { buffer<-array() buffer[1]<-i for(j in c(1:length(goodids))) { splitid<-which(goodids[j] == names(allsplitdata[[i]])) buffer[j+1]<-nrow(allsplitdata[[i]][[splitid]]) } numbers<-rbind(numbers,buffer) } numbers<-setNames(numbers,c("filenumber",goodids)) minimum<-array() for(i in 2:9) { minimum[i-1]<-min(numbers[i]) } idswithminimum<-cbind(goodids,minimum) reallygoodids<-c() for(i in c(1:length(goodids))) { if(as.numeric(idswithminimum[i,2])>=10) reallygoodids<-c(reallygoodids,idswithminimum[i,1]) } gooddata<-data.frame(); for(i in goodfiles) { buffer<-array() for(j in c(1:length(reallygoodids))) { splitid<-which(reallygoodids[j] == names(allsplitdata[[i]])) for(k in c(1:10)) { buffer[(j-1)*10+k]<-as.numeric(allsplitdata[[i]][[splitid]]$value[nrow(allsplitdata[[i]][[splitid]])-(10-k)]) } } gooddata<-rbind(gooddata,buffer) } gooddatanames=c() for(i in c(1:length(reallygoodids))) { for(j in c(1:10)) { gooddatanames=c(gooddatanames,paste(reallygoodids[i],j,sep="")) } } gooddata<-setNames(gooddata,gooddatanames) successdata<-read.xls("machning_success.xlsx",stringsAsFactors=FALSE) j<-1 success<-array() for(i in goodfiles) { success[j]<-successdata$Success[i] j<-j+1 } gooddata<-cbind(gooddata,success) #Supervised Classification library(e1071) library(mlbench) library(leaps) library(MASS) library(caret) library(randomForest) set.seed(600) goodframe <- data.frame(gooddata) goodframe$success<-as.integer(as.logical(goodframe$success)) #https://machinelearningmastery.com/feature-selection-with-the-caret-r-package/ #feature selection(recursive feature elimination) control <- rfeControl(functions=rfFuncs, method="cv", number=10) results <- rfe(goodframe[,1:70], goodframe[,71], sizes=c(1:70), rfeControl=control) print(results) predictors(results) plot(results, type=c("g", "o")) reduceddata <- data.frame(goodframe$ns.2.s..Channel.MachineAxis.aaLoad.u1.1.7, goodframe$ns.2.s..Channel.Spindle.driveLoad8, goodframe$ns.2.s..Channel.MachineAxis.aaTorque.u1.1.7, goodframe$ns.2.s..Channel.MachineAxis.aaLoad.u1.1.6, goodframe$ns.2.s..Channel.Spindle.driveLoad7, goodframe$ns.2.s..Channel.MachineAxis.aaLoad.u1.1.8, goodframe$ns.2.s..Channel.MachineAxis.aaLoad.u1.1.5, goodframe$ns.2.s..Channel.MachineAxis.aaLoad.u1.1.10, goodframe$success) reduceddata$goodframe.success<-as.factor(reduceddata$goodframe.success) #Splitting data into test and training data 75/25 set.seed(600) reduceddata[,"train"] <- ifelse(runif(nrow(reduceddata))<0.75,1,0) trainset <- reduceddata[reduceddata$train==1,] testset <- reduceddata[reduceddata$train==0,] trainColNum <- grep("train",names(trainset)) trainset <- trainset[,-trainColNum] testset <- testset[,-trainColNum] typeColNum <- grep("success",names(reduceddata)) #SVM #https://eight2late.wordpress.com/2017/02/07/a-gentle-introduction-to-support-vector-machines-using-r/ #Linear set.seed(600) svm_model <- svm(goodframe.success~., data=trainset, method="C-classification", kernel="linear") svm_model pred_train <-predict(svm_model,trainset) mean(pred_train==trainset$goodframe.success) pred_test <-predict(svm_model,testset) mean(pred_test==testset$goodframe.success) #Radial set.seed(600) svm_model <- svm(goodframe.success~ ., data=trainset, method="C-classification", kernel="radial") svm_model pred_train <-predict(svm_model,trainset) mean(pred_train==trainset$goodframe.success) pred_test <-predict(svm_model,testset) mean(pred_test==testset$goodframe.success) #Sigmoid set.seed(600) svm_model <- svm(goodframe.success~ ., data=trainset, method="C-classification", kernel="sigmoid") svm_model pred_train <-predict(svm_model,trainset) mean(pred_train==trainset$goodframe.success) pred_test <-predict(svm_model,testset) mean(pred_test==testset$goodframe.success) #Polynomial set.seed(600) svm_model <- svm(goodframe.success~ ., data=trainset, method="C-classification", kernel="polynomial") svm_model pred_train <-predict(svm_model,trainset) mean(pred_train==trainset$goodframe.success) pred_test <-predict(svm_model,testset) mean(pred_test==testset$goodframe.success) #Tuning for radial set.seed(600) tune_out <- tune.svm(x=trainset[,-typeColNum],y=trainset[,typeColNum],gamma=10^(-3:3),cost=c(0.01,0.1,1,10,100,1000),kernel="radial") tune_out$best.parameters$cost tune_out$best.parameters$gamma svm_model <- svm(goodframe.success~ ., data=trainset, method="C-classification", kernel="radial",cost=tune_out$best.parameters$cost,gamma=tune_out$best.parameters$gamma) pred_train <-predict(svm_model,trainset) mean(pred_train==trainset$goodframe.success) pred_test <-predict(svm_model,testset) mean(pred_test==testset$goodframe.success) 0.9411765 0.8571429 #https://www.r-bloggers.com/understanding-naive-bayes-classifier-using-r/ #Naive Bayes set.seed(600) nb_model <- naiveBayes(goodframe.success ~ ., data =trainset) summary(nb_model) pred_train <-predict(nb_model,trainset) mean(pred_train==trainset$goodframe.success) pred_test <-predict(nb_model,testset) mean(pred_test==testset$goodframe.success) 0.8235294 0.5714286