2016-12-01 63 views
0

我正在R的wordcloud上工作,到目前爲止我只是基本的東西成功,但是我想要做的是我想顯示特定位置的文字雲。例如,如果我有一個像在另一列的R中的一列的Wordcloud

     TEXT                 LOCATION 
    True or false? link(#Addition, #Classification)           NewYork,USA 
    Gene deFuser: detecting gene fusion events from protein sequences #bmC#bioinformatics Norwich,UK 
    Biologists do have a sense of humor, especially computational bio people     France 
    Semantic Inference using #Chemogenomics Data for Drug Discovery       London,UK 

這裏的文本是我使用

library(tm) 
library(SnowballC) 
library(wordcloud) 

DATA<-c('True or false? link(#Addition, #Classification) ','Gene deFuser: detecting gene fusion events from protein sequences #bmC#bioinformatics',' Biologists do have a sense of humor, especially computational bio people','Semantic Inference using #Chemogenomics Data for Drug Discovery') 
Location<-c('NewYork,USA','Norwich,UK',' France','London,UK') 

jeopQ<-data.frame(DATA,Location) 

jeopCorpus <- Corpus(VectorSource(jeopQ$DATA)) 
jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower)) 

jeopCorpus <- tm_map(jeopCorpus, removePunctuation) 
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument) 
jeopCorpus <- tm_map(jeopCorpus, removeNumbers) 
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english')) 
jeopCorpus <- tm_map(jeopCorpus, stemDocument) 
myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1)) 

m = as.matrix(myDTM) 

v = sort(rowSums(m), decreasing = TRUE) 
set.seed(4363) 
wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette()) 

基本wordcloud代碼,我想是一個單獨的詞雲在它有「USA」的位置,並有地點「英國「以及法國單獨的wordcloud,這可能嗎?

回答

0
jeopQ<-data.frame(DATA,Location) 

# Clean Location 
jeopQ$Location <- sub('.*,\\s*','', jeopQ$Location) 

# Loop 
for(i in unique(jeopQ$Location)){ 
    jeopCorpus <- Corpus(VectorSource(jeopQ$DATA[jeopQ$Location==i])) 
    jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower)) 

    jeopCorpus <- tm_map(jeopCorpus, removePunctuation) 
    jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument) 
    jeopCorpus <- tm_map(jeopCorpus, removeNumbers) 
    jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english')) 
    jeopCorpus <- tm_map(jeopCorpus, stemDocument) 
    myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1)) 

    m = as.matrix(myDTM) 

    v = sort(rowSums(m), decreasing = TRUE) 
    set.seed(4363) 
    wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette()) 

} 

enter image description here enter image description here enter image description here

+1

這就是完美!謝謝! – hyeri

+0

@hyeri樂意幫忙 –

相關問題