0
這可能是PICNIC,但是當我嘗試構建一個函數時,我會比在控制檯中執行操作時收到一組相當奇怪的行爲。R-在函數外部和內部分配不同的變量類別?
我試圖使用cut2來產生一組間隔並將它們分配給一個變量供以後重用。在函數外部使用會產生合適的向量,但在函數內部,分配會強制使用不同的方式。我試着圍繞它包裝as.vector,但是生成了一個char [210]。
誰能告訴我我做錯了什麼?
感謝, 斯蒂芬 PS全局分配是這樣的間隔可以被修改,後來重用
設置
library("Hmisc")
library("caret")
# functions in use ----------------------------------------------------------------
# functions for splitting data according to Max Kuhn's preferences in caret vignette, code condensed to save space
splitDataset<-function(dataset=rawdata, nPrimaryKeyCol=1, nOutcomeCol=1) {
end<-as.numeric(ncol(dataset))
stopifnot(is.numeric(nPrimaryKeyCol),nPrimaryKeyCol<=end,is.numeric(nOutcomeCol),nOutcomeCol<=end,(nPrimaryKeyCol+nOutcomeCol)<=end)
predstart<-nPrimaryKeyCol+1
predend<-ncol(dataset)-nOutcomeCol
assign(x="keys",value=dataset[, 0:nPrimaryKeyCol],envir = parent.frame())
assign(x="outcomes",value=dataset[,(predend+1):end],envir = parent.frame())
assign(x="predictors",value=dataset[,predstart:predend], envir = parent.frame())
}
partitionDataset<-function(proportion=0.7){
require("caret")
assign(x="inTrain", value=createDataPartition(outcomes,p=proportion, list=FALSE), envir = parent.frame())
assign(x="trainKeys", value=keys[inTrain], envir = parent.frame())
assign(x="trainPredictors",value=predictors[inTrain,],envir = parent.frame())
assign(x="trainOutcomes", value=outcomes[inTrain],envir = parent.frame())
assign(x="testKeys",value=keys[-inTrain],envir = parent.frame())
assign(x="testPredictors", value=predictors[-inTrain,], envir = parent.frame())
assign(x="testOutcomes",value=outcomes[-inTrain], envir = parent.frame())
assign(x="trainPredictors.Bad", value=subset(trainPredictors,trainOutcomes=="bad"), envir = parent.frame())
}
# this is the problem function
equalFreqBins.derive<-function(characteristic,deriveDataset,g=20){
stopifnot(is.numeric(deriveDataset[,characteristic]),is.numeric(g))
dnam<-paste0("interval.",characteristic)
# this is the assignment
intervals<-cut2(deriveDataset[,characteristic],g,onlycuts=TRUE)
assign(
x=dnam,
value=intervals,
envir=as.environment(".GlobalEnv")
)
# return to make it easier to look at variable
return(str(intervals))
}
# data loadup----------------------------------------------------------------
data(GermanCredit)
GermanCredit$outcome<-GermanCredit$Class
GermanCredit$Class<-NULL
levels(GermanCredit$outcome)<-c("bad","good")
basedata<-GermanCredit
splitDataset(basedata,nPrimaryKeyCol=0,nOutcomeCol=1)
partitionDataset(proportion=0.7)
問題再現
# this is outputting a factor of 210
equalFreqBins.derive(characteristic="Age",deriveDataset=trainPredictors.Bad,g=20)
# this is outputting a num [1:20]
intervals<-cut2(trainPredictors.Bad[,"Age"],g=20,onlycuts=TRUE)
str(intervals)
非常感謝! – 2013-05-01 07:04:56