2017-10-16 51 views
0

我有這樣的組織的數據集:錯誤的基礎上的算法進行預測

library(lubridate) 
library(e1071) 
library(rpart) 
library(pROC) 
library(rpart.plot) 
library(RColorBrewer) 
library(ada) 
library(maboost) 
library(adabag) 
library(ROCR) 
library(data.table) 

> head(crypto_data) 
        time btc_price eth_price block_size difficulty estimated_btc_sent estimated_transaction_volume_usd hash_rate 
1: 2017-09-02 21:54:00 4537.834 330.727 142521291 8.88e+11   2.04e+13      923315360 7417412092 
2: 2017-09-02 22:29:00 4577.605 337.804 136524566 8.88e+11   2.03e+13      918188067 7152504517 
3: 2017-09-02 23:04:00 4566.360 336.938 134845546 8.88e+11   2.01e+13      910440916 7240807042 
4: 2017-09-02 23:39:00 4590.031 342.929 133910638 8.88e+11   1.99e+13      901565930 7284958305 
5: 2017-09-03 00:14:00 4676.193 354.171 130678099 8.88e+11   2.01e+13      922422228 7152504517 
6: 2017-09-03 00:49:00 4699.936 352.299 127557140 8.88e+11   1.99e+13      910457430 7064201992 
    miners_revenue_btc miners_revenue_usd minutes_between_blocks n_blocks_mined n_blocks_total n_btc_mined n_tx nextretarget total_btc_sent 
1:    2395   10839520     8.00   168   483207 2.10e+11 241558  483839  1.62e+14 
2:    2317   10482320     8.33   162   483208 2.03e+11 236661  483839  1.60e+14 
3:    2342   10596900     8.22   164   483216 2.05e+11 238682  483839  1.60e+14 
4:    2352   10642439     8.14   165   483220 2.06e+11 237159  483839  1.58e+14 
5:    2316   10611798     8.38   162   483223 2.03e+11 237464  483839  1.58e+14 
6:    2288   10481960     8.41   160   483226 2.00e+11 234472  483839  1.57e+14 
    total_fees_btc totalbtc trade_volume_btc trade_volume_usd targetVar 
1: 29597881711 1.65e+15  102451.92  463497285  buy 
2: 29202300823 1.65e+15  102451.92  463497285  buy 
3: 29234981721 1.65e+15  102451.92  463497285  buy 
4: 28991577368 1.65e+15  102451.92  463497285  buy 
5: 29179041967 1.65e+15   96216.78  440710136  hold 
6: 28844391629 1.65e+15   96216.78  440710136  hold 

然後,我創建了一個功能:

classification_error <- function(conf_mat) { 
    conf_mat = as.matrix(conf_mat) 

    error = 1-sum(diag(conf_mat))/sum(conf_mat) 

    return (list(conf_mat = conf_mat, 
       error = error)) 
} 
    predFunc <- function(inData,trainPct){ 



    trainP <- trainPct * .6 
    valP <- trainPct * .2 
    testP <- trainPct * .2 

    #SplitData 
    trainObs <- sample(nrow(inData), trainP * nrow(inData), replace = FALSE) 
    valObs <- sample(nrow(inData), valP * nrow(inData), replace = FALSE) 
    testObs <- sample(nrow(inData), testP * nrow(inData), replace = FALSE) 

    # Create the training/va/test datasets 
    trainDS <- inData[trainObs,] 
    valDS <- inData[valObs,] 
    testDS <- inData[testObs,] 

    # SVM- linear kernel 
    linearSVM <- svm(targetVar ~ ., data = trainDS, method = "C-classification", kernel = "linear") 


    # linear SVM Predictions 
    predSVMlin <- predict(linearSVM, trainDS[,-c("targetVar")]) 
    valSVMlin <- predict(linearSVM, valDS[,-c("targetVar")]) 
    testSVMlin <- predict(linearSVM, testDS[,-c("targetVar")]) 

    # SVM Confusion matrix 
    trainConfusion <- table(true = trainDS[,c("targetVar")], pred = predSVMlin) 
    valConfusion <- table(true = trainDS[,c("targetVar")], pred = valSVMlin) 
    testConfusion <- table(true = trainDS[,c("targetVar")], pred = testSVMlin) 

    # Linear SVM Classification error 
    trainClassificationError <- classification_error(trainConfusion) 
    valClassificationError <- classification_error(valConfusion) 
    testClassificationError <- classification_error(testConfusion) 

    return(list(trainError = trainClassificationError, 
       valError = valClassificationError, 
       testError = testClassificationError, 
      )) 
} 

然後我調用的函數:crypt <- predFunc(crypto_data, .7)但是當我運行此代碼我得到以下錯誤:

Error in sort.list(y) : 'x' must be atomic for 'sort.list' Have you called 'sort' on a list? 5. stop("'x' must be atomic for 'sort.list'\nHave you called 'sort' on a list?") 4. sort.list(y) 3. factor(a, exclude = exclude) 2. table(true = trainDs[, c("targetVar")], pred = predSVM) 1. predFunc(crypto_data, 0.7)

基本上,我認爲該函數是runni在創建混淆矩陣時遇到問題,但我無法理解爲什麼,或者如何解決它。建議?

+0

請,寫什麼庫你使用。 – Alex

+0

@Alex我包括庫 – zsad512

回答

0
  1. 當你做線性SVM預測,你應該使用水木清華這樣的:

    predSVMlin <- predict(linearSVM, trainDS %>% select(-targetVar))

  2. # SVM Confusion matrix你可以使用trainDS$targetVar使數字矢量,而不是數據幀列。

  3. 這取決於你計算的是什麼指標,但最後兩步在我看來還不清楚,你能解釋一下嗎?

  4. 我找不到classification_error函數,請寫下,在哪裏找到它或寫出源代碼。

+0

請參閱我的編輯,我添加了'classification_error'函數 – zsad512

0

您的意見後,我可以給你一個解決方案:

classification_error <- function(conf_mat) { 
    conf_mat = as.matrix(conf_mat) 

    error = 1-sum(diag(conf_mat))/sum(conf_mat) 

    return (list(conf_mat = conf_mat, 
       error = error)) 
} 

predFunc <- function(inData,trainPct){ 

    trainP <- trainPct * .6 
    valP <- trainPct * .2 
    testP <- trainPct * .2 

    #SplitData 
    trainObs <- sample(nrow(inData), trainP * nrow(inData), replace = FALSE) 
    valObs <- sample(nrow(inData), valP * nrow(inData), replace = FALSE) 
    testObs <- sample(nrow(inData), testP * nrow(inData), replace = FALSE) 

    # Create the training/va/test datasets 
    trainDS <- inData[trainObs,] 
    valDS <- inData[valObs,] 
    testDS <- inData[testObs,] 

    # SVM- linear kernel 
    linearSVM <- svm(targetVar ~ ., data = trainDS, method = "C-classification", kernel = "linear") 


    # linear SVM Predictions 
    predSVMlin <- predict(linearSVM, trainDS %>% select(-targetVar)) 
    valSVMlin <- predict(linearSVM, valDS %>% select(-targetVar)) 
    testSVMlin <- predict(linearSVM, testDS %>% select(-targetVar)) 

    # SVM Confusion matrix 
    trainConfusion <- table(true = trainDS$targetVar, pred = predSVMlin) 
    valConfusion <- table(true = valDS$targetVar, pred = valSVMlin) 
    testConfusion <- table(true = testDS$targetVar, pred = testSVMlin) 

    # Linear SVM Classification error 
    trainClassificationError <- classification_error(trainConfusion) 
    valClassificationError <- classification_error(valConfusion) 
    testClassificationError <- classification_error(testConfusion) 

    return(list(trainError = trainClassificationError, 
       valError = valClassificationError, 
       testError = testClassificationError)) 
}