2016-07-22 51 views
2

我想從mysql中將幾百個單獨的數據庫轉換爲r。我可以單獨獲取它們,但我不知道如何創建一個循環/函數/申請一次全部獲取它們。如何將所有mysql數據庫轉換爲r

以下是我如何分別獲取它們的方法。

library(RMySQL) 
mydb = dbConnect(MySQL(), user='root', password='nelson', host='localhost', dbname="bookstore")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
dub4_4_16 <- dbSendQuery(mydb, "select * from dub") 
dub4_4_16 = fetch(dub4_4_16, n=-1) 

reg4_4_16 <- dbSendQuery(mydb, "select * from all_sorted") 
reg4_4_16 = fetch(reg4_4_16, n=-1) 

dub4_5_16 <- dbSendQuery(mydb, "select * from dub4_5") 
dub4_5_16 = fetch(dub4_5_16, n=-1) 

回答

3

我用類似下面的那些功能的列表(基於來自tcpl包裝中取出功能)在RMySQL環境中工作。這些函數只是調用DBI的包裝。這是一個非常長的答案,但方法的要點是創建查詢字符串,應用於查詢來加載數據。結果(results)給出了包含連接中每個數據庫的每個表的data.table對象的列表。

library(data.table) 
library(RMySQL) 

##----------------------------------------------------------------------------## 
## Accessory functions 
##----------------------------------------------------------------------------## 

getQuery <- function(query, db, user = "dayne", 
        pass = "password", host = "localhost") { 
    ## Change the default values as necessary, or just pass your values 
    ## each time the function is called. Check out the original to see 
    ## how it finds global variables set by the package. 

    dbcon <- dbConnect(drv = RMySQL::MySQL(), 
        user = user, 
        password = pass, 
        host = host, 
        dbname = db) 
    result <- dbGetQuery(dbcon, query) 
    dbDisconnect(dbcon) 
    result <- as.data.table(result) 
    result[] 

} 

sendQuery <- function(query, db = "", user = "dayne", 
        pass = "password", host = "localhost") { 

    dbcon <- dbConnect(drv = RMySQL::MySQL(), 
        user = user, 
        password = pass, 
        host = host, 
        dbname = db) 
    temp <- try(dbSendQuery(dbcon, query), silent = TRUE) 
    if (!is(temp, "try-error")) dbClearResult(temp) 
    dbDisconnect(dbcon) 

    if (!is(temp, "try-error")) return(TRUE) 

    temp[1] 

} 

appendTable <- function(dat, tbl, db, user = "dayne", 
         pass = "password", host = "localhost") { 

    dbcon <- dbConnect(drv = RMySQL::MySQL(), 
        user = user, 
        password = pass, 
        host = host, 
        dbname = db) 
    dbWriteTable(conn = dbcon, 
       name = tbl, 
       value = dat, 
       row.names = FALSE, 
       append = TRUE) 

    dbDisconnect(dbcon) 

    return(TRUE) 

} 

##----------------------------------------------------------------------------## 
## Create example data 
##----------------------------------------------------------------------------## 

listofdb <- c("db1", "db2", "db3") 
q1 <- paste0("CREATE DATABASE ", listofdb, ";") 
sapply(q1, sendQuery) 
listoftables <- paste0("tb", 1:5) 
q2fmt <- "CREATE TABLE %s (val DOUBLE)" 
q2 <- sprintf(q2fmt, listoftables) 
createtables <- function(db) { 
    sapply(q2, sendQuery, db = db) 
    sapply(listoftables, 
     appendTable, 
     db = db, 
     dat = data.table(val = rnorm(10))) 
} 
sapply(listofdb, create tables) 


##----------------------------------------------------------------------------## 
## Do the work 
##----------------------------------------------------------------------------## 

## Load all of the tables form different databases 
my_db_list <- getQuery("SHOW DATABASES;", db = "")$Database 
my_db_list 
# [1] "db1" "db2" "db3" 

table_list <- lapply(my_db_list, getQuery, query = "SHOW TABLES;") 
names(table_list) <- my_db_list 
table_list 
# $db1 
# Tables_in_db1 
# 1:   tb1 
# 2:   tb2 
# 3:   tb3 
# 4:   tb4 
# 5:   tb5 
# 
# $db2 
# Tables_in_db2 
# 1:   tb1 
# 2:   tb2 
# 3:   tb3 
# 4:   tb4 
# 5:   tb5 
# 
# $db3 
# Tables_in_db3 
# 1:   tb1 
# 2:   tb2 
# 3:   tb3 
# 4:   tb4 
# 5:   tb5 

## The db name to the table_list data.tables and collapse 
table_list <- lapply(names(table_list), 
        function(x) table_list[[x]][ , db := x]) 
table_list <- rbindlist(table_list) 
setnames(table_list, c("tbl", "db")) 

## Load all tables from all databases 
table_list[ , full_name := paste(db, tbl, sep = ".")] 
get_tables <-paste0("SELECT * FROM ", table_list$full_name, ";") 
results <- lapply(get_tables, getQuery, db = "") 
names(results) <- table_list$full_name 

results[["db1.tb5"]] 
#   val 
# 1: -0.09380952 
# 2: 0.81556657 
# 3: 1.18589086 
# 4: 0.19746379 
# 5: 0.91738280 
# 6: 1.30142674 
# 7: 1.42089957 
# 8: -0.16475130 
# 9: 0.40345353 
# 10: -1.31012033 

##----------------------------------------------------------------------------## 
## Remove example data 
##----------------------------------------------------------------------------## 

cleanup <- paste0("DROP DATABASE ", listofdb, ";") 
sapply(cleanup, sendQuery) 
相關問題