2017-06-21 79 views
1

我正試圖編寫一個函數,將我在Google Cloud Storage上的JSON文件加載到BigQuery數據集中,但是,即使我明確傳遞架構還是說,「沒有指定模式對工作或表」使用Python從Google雲端存儲中加載數據 - 未指定架構RuntimeError

import oauth2client 
import uuid 
import time 
from google.cloud import bigquery as bq 
# from oauth2client.client import GoogleCredentials 

# Configuration 
BILLING_PROJECT_ID = --- 
DATASET_NAME = --- 
TABLE_NAME = --- 
BUCKET_NAME = --- 
FILE = --- 
SOURCE = 'gs://{}/{}'.format(BUCKET_NAME, FILE) 

SCHEMA = [ 
    bq.SchemaField('question_id', 'INTEGER'), 
    bq.SchemaField('accepted_answer', 'INTEGER'), 
    bq.SchemaField('answer_count', 'INTEGER') 
] 

# CREDENTIALS = GoogleCredentials.get_application_efault() 

client = bq.Client(project=BILLING_PROJECT_ID) 


# Dataset 
# Check if the dataset exists 
def create_datasets(name): 
    dataset = client.dataset(name) 
    try: 
     assert not dataset.exists() 
     dataset.create() 
     assert dataset.exists() 
     print("Dataset {} created".format(name)) 
    except(AssertionError): 
     pass 


def load_data_from_gcs(dataset_name, table_name, source, schema): 
    ''' 
    Load Data from Google Cloud Storage 
    ''' 
    dataset = client.dataset(dataset_name) 
    table = dataset.table(table_name) 
    table.schema = schema 
    job_name = str(uuid.uuid4()) 
    job = client.load_table_from_storage(
     job_name, table, source) 
    job.source_format = 'NEWLINE_DELIMITED_JSON' 

    job.begin() 
    wait_for_job(job) 

    print('Loaded {} rows into {}:{}.'.format(
     job.output_rows, dataset_name, table_name)) 


def wait_for_job(job): 
    while True: 
     job.reload() 
     if job.state == 'DONE': 
      if job.error_result: 
       raise RuntimeError(job.errors) 
      return 
     time.sleep(1) 


load_data_from_gcs(dataset_name=DATASET_NAME, 
        table_name=TABLE_NAME, 
        source=SOURCE, 
        schema=SCHEMA) 

回答

0

我已經解決了這個問題,在這種情況下,我忘了打電話給table.create()在開始工作之前。

相關問題