2011-03-08 39 views

回答

7

python非常簡單。

#!/usr/bin/env python 

from couchdbkit import Server, Database 
from couchdbkit.loaders import FileSystemDocsLoader 
from csv import DictReader 
import sys, subprocess, math, os 



def parseDoc(doc): 
    for k,v in doc.items(): 
     if (isinstance(v,str)): 
      #print k, v, v.isdigit() 
      # #see if this string is really an int or a float 
      if v.isdigit()==True: #int 
       doc[k] = int(v) 
      else: #try a float 
       try: 
        if math.isnan(float(v))==False: 
         doc[k] = float(v) 
       except: 
        pass    
    return doc 


def upload(db, docs): 
    db.bulk_save(docs) 
    del docs 
    return list() 


def uploadFile(fname, uri, dbname): 


    print 'Upload contents of %s to %s/%s' % (fname, uri, dbname) 

    # #connect to the db 
    theServer = Server(uri) 
    db = theServer.get_or_create_db(dbname) 

    #loop on file for upload 
    reader = DictReader(open(fname, 'rU'), dialect = 'excel') #see the python csv module 
     #for other options, such as using the tab delimeter. The first line in your csv 
     #file should contain all of the "key" and all subsequent lines hold the values 
     #for those keys. 

    #used for bulk uploading 
    docs = list() 
    checkpoint = 100 

    for doc in reader: 
    newdoc = parseDoc(doc) #this just converts strings that are really numbers into ints and floats 

    #Here I check to see if the doc is already on the database. If it is, then I assign 
    #the _rev key so that it updates the doc on the db. 

    if db.doc_exist(newdoc.get('_id')): 
     newdoc['_rev'] = db.get_rev(newdoc.get('_id')) 

    docs.append(newdoc) 

    if len(docs)%checkpoint==0: 
     docs = upload(db,docs) 

    #don't forget the last batch   
    docs = upload(db,docs) 



if __name__=='__main__': 
    filename = sys.argv[1] 
    uri = sys.argv[2] 
    dbname = sys.argv[3] 

    uploadFile(filename, uri, dbname) 
+1

它可以像mongoDB中的mongoimport一樣簡單嗎? – bbnn 2011-03-09 21:20:53

+0

不錯的一個。感謝分享 :) – 2017-05-29 18:39:01

4

Apache CouchDB只存儲JSON文檔。因此,要導入CSV,您必須將其轉換爲單個JSON文檔,然後正常發佈。

您可能需要編寫一個程序來遍歷每一行。將CSV行(值序列)轉換爲JSON文檔(鍵值序列:值對)。然後只需使用HTTP將其發送到CouchDB。