2016-07-15 51 views
1

我愛MLR!在下面的代碼中,我比較了四個分類器的性能。我收到一些奇怪的錯誤,當我運行使用PIMA印度糖尿病數據下面的代碼:MLR mod not found

library(mlbench) 
library(caret) 
library(randomForest) 
data(PimaIndiansDiabetes) 
data2<-data 
## Define the task 
Class.task = makeClassifTask(id = "USUBJID", data = data2, target = "Class", positive ="B") 

Class.task = makeClassifTask(data = PimaIndiansDiabetes, target = "diabetes", positive ="pos") 


fv = generateFilterValuesData(Class.task, method = "mrmr") 

plotFilterValues(fv) 

filtered.task = filterFeatures(Class.task, fval = fv, threshold = -.2) 

#filtered.task = Class.task 

n = getTaskSize(filtered.task) 
train.set = sample(n, size = round(2/3 * n)) 
test.set = setdiff(seq_len(n), train.set) 

lrn1 = makeLearner("classif.lda", predict.type = "prob") 
mod1 = train(lrn1, filtered.task, subset = train.set) 
pred1 = predict(mod1, task = filtered.task, subset = test.set) 


lrn2 = makeLearner("classif.ksvm", predict.type = "prob") 
mod2 = train(lrn2, filtered.task, subset = train.set) 
pred2 = predict(mod2, task = filtered.task, subset = test.set) 

lrn3 = makeLearner("classif.randomForest", predict.type = "prob") 
mod3 = train(lrn3, Class.task, subset = train.set) 
pred3 = predict(mod3, task = Class.task, subset = test.set) 

lrn5 = makeLearner("classif.xgboost", predict.type = "prob") 
mod5 = train(lrn5, Class.task, subset = train.set) 
pred5 = predict(mod5, task = Class.task, subset = test.set) 

### Tune wrapper for ksvm 
rdesc.inner = makeResampleDesc("Holdout") 
ms = list(auc, mmce) 
ps = makeParamSet(
    makeDiscreteParam("C", 2^(-1:1)) 
) 
ctrl = makeTuneControlGrid() 
lrn2 = makeTuneWrapper(lrn2, rdesc.inner,ms, ps, ctrl, show.info = FALSE) 

lrns = list(lrn1, lrn2,lrn3,lrn5) 
rdesc.outer = makeResampleDesc("CV", iters = 5) 

bmr = benchmark(lrns, tasks = filtered.task, resampling = rdesc.outer, measures = ms, show.info = FALSE) 
bmr 

我得到的錯誤是:

Error in unique.default(x, nmax = nmax) : 
    unique() applies only to vectors 
    > pred1 = predict(mod1, task = filtered.task, subset = test.set) 
    Error in predict(mod1, task = filtered.task, subset = test.set) : 
    object 'mod1' not found 
    > lrn2 = makeLearner("classif.ksvm", predict.type = "prob") 
    > mod2 = train(lrn2, filtered.task, subset = train.set) 
    Error in unique.default(x, nmax = nmax) : 
    unique() applies only to vectors 
    > pred2 = predict(mod2, task = filtered.task, subset = test.set) 
    Error in predict(mod2, task = filtered.task, subset = test.set) : 
    object 'mod2' not found 
    > lrn3 = makeLearner("classif.randomForest", predict.type = "prob") 
    > mod3 = train(lrn3, Class.task, subset = train.set) 
    Error in unique.default(x, nmax = nmax) : 
    unique() applies only to vectors 
    > pred3 = predict(mod3, task = Class.task, subset = test.set) 
    Error in predict(mod3, task = Class.task, subset = test.set) : 
    object 'mod3' not found 
    > 
    > lrn5 = makeLearner("classif.xgboost", predict.type = "prob") 
    > mod5 = train(lrn5, Class.task, subset = train.set) 
    Error in unique.default(x, nmax = nmax) : 
    unique() applies only to vectors 
    > pred5 = predict(mod5, task = Class.task, subset = test.set) 
    Error in predict(mod5, task = Class.task, subset = test.set) : 

我得到的性能結果。任何想法我做錯了什麼?謝謝!!!

+0

你需要'的PimaIndiansDiabetes'代替'data'。數據是一種環境。 –

+0

即使當我做出改變data2 <-PimaIndiansDiabetes,我得到相同的錯誤..謝謝你的迴應,雖然.. – tom

回答

2

問題是,您在caret之前加載mlr - 兩者都有train函數,而其中一個根據您首先加載的函數而產生陰影。您需要最後加載mlr程序包(並且您完全不需要caret)。

編輯:完整的工作代碼

library(mlbench) 
library(mlr) 

data(PimaIndiansDiabetes) 

Class.task = makeClassifTask(data = PimaIndiansDiabetes, target = "diabetes", positive ="pos") 


fv = generateFilterValuesData(Class.task, method = "mrmr") 

plotFilterValues(fv) 

filtered.task = filterFeatures(Class.task, fval = fv, threshold = -.2) 

#filtered.task = Class.task 

n = getTaskSize(filtered.task) 
train.set = sample(n, size = round(2/3 * n)) 
test.set = setdiff(seq_len(n), train.set) 

lrn1 = makeLearner("classif.lda", predict.type = "prob") 
mod1 = train(lrn1, filtered.task, subset = train.set) 
pred1 = predict(mod1, task = filtered.task, subset = test.set) 


lrn2 = makeLearner("classif.ksvm", predict.type = "prob") 
mod2 = train(lrn2, filtered.task, subset = train.set) 
pred2 = predict(mod2, task = filtered.task, subset = test.set) 

lrn3 = makeLearner("classif.randomForest", predict.type = "prob") 
mod3 = train(lrn3, Class.task, subset = train.set) 
pred3 = predict(mod3, task = Class.task, subset = test.set) 

lrn5 = makeLearner("classif.xgboost", predict.type = "prob") 
mod5 = train(lrn5, Class.task, subset = train.set) 
pred5 = predict(mod5, task = Class.task, subset = test.set) 

### Tune wrapper for ksvm 
rdesc.inner = makeResampleDesc("Holdout") 
ms = list(auc, mmce) 
ps = makeParamSet(
    makeDiscreteParam("C", 2^(-1:1)) 
) 
ctrl = makeTuneControlGrid() 
lrn2 = makeTuneWrapper(lrn2, rdesc.inner,ms, ps, ctrl, show.info = FALSE) 

lrns = list(lrn1, lrn2,lrn3,lrn5) 
rdesc.outer = makeResampleDesc("CV", iters = 5) 

bmr = benchmark(lrns, tasks = filtered.task, resampling = rdesc.outer, measures = ms, show.info = FALSE) 
bmr 
+0

嗨拉爾斯!我仍然得到相同的錯誤..謝謝你的回覆! – tom

+0

我添加了完整的代碼。 –

+0

嗨拉爾斯,完美的作品!謝謝! – tom