我試圖遷移到postgresql的JSON數據如下。將此JSON數據遷移到postgresql時發生錯誤
JSON Data:
{
"wsgi.multiprocess": true,
"HTTP_REFERER": "http://localhost:9000/",
"SCRIPT_NAME": "",
"REQUEST_METHOD": "GET",
"PATH_INFO": "/api/impressions/i/",
"HTTP_ORIGIN": "http://localhost:9000",
"SERVER_PROTOCOL": "HTTP/1.1",
"QUERY_STRING": "",
"CONTENT_LENGTH": "",
"HTTP_USER_AGENT": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36",
"HTTP_CONNECTION": "keep-alive",
"HTTP_COOKIE": "_ga=GA1.3.1851235816.1425597711; sessionid=ihukujut48uhatb1rqtzaed78jszqsyk; csrftoken=8F2CcluTFgGUdCV3mfgnhqxfh2crgDKj; customer=\"AmsrbY7bSj5wiDQPM7xcRa:1YdLVd:nKyRyZNx5aoLLmVRL4o9aN267vI\"",
"SERVER_NAME": "app.adomattic.com",
"REMOTE_ADDR": "182.186.59.228",
"HTTP_X_FIREPHP_VERSION": "0.0.6",
"wsgi.url_scheme": "http",
"SERVER_PORT": "80",
"uwsgi.node": "stage",
"HTTP_PUBLISHER_KEY": "ng2HM6ThZehtWHR2tgonBg",
"HTTP_DNT": "1",
"HTTP_HOST": "app.adomattic.com",
"wsgi.multithread": false,
"HTTP_CACHE_CONTROL": "max-age=0",
"REQUEST_URI": "/api/impressions/i/",
"HTTP_ACCEPT": "application/json, text/plain, */*",
"wsgi.run_once": false,
"REMOTE_PORT": "50740",
"HTTP_ACCEPT_LANGUAGE": "en-US,en;q=0.8,ur;q=0.6",
"uwsgi.version": "1.9.17.1-debian",
"CONTENT_TYPE": "",
"DOCUMENT_ROOT": "/usr/share/nginx/html",
"CSRF_COOKIE": "8F2CcluTFgGUdCV3mfgnhqxfh2crgDKj",
"HTTP_ACCEPT_ENCODING": "gzip, deflate, sdch"
}
要遷移這些數據,首先我創造我的數據庫中的表:
CREATE TABLE filtered_data
(
ROW_ID INT
,MULTIPROCESS VARCHAR(10)
,HTTP_REFERER VARCHAR(100)
,SCRIPT_NAME VARCHAR(20)
,REQUEST_METHOD VARCHAR(10)
,PATH_INFO VARCHAR(40)
,HTTP_ORIGIN VARCHAR(100)
,SERVER_PROTOCOL VARCHAR(30)
,QUERY_STRING VARCHAR(50)
,CONTENT_LENGTH VARCHAR(20)
,HTTP_USER_AGENT VARCHAR(400)
,HTTP_CONNECTION VARCHAR(30)
,HTTP_COOKIE VARCHAR(500)
,SERVER_NAME VARCHAR(30)
,REMOTE_ADDR VARCHAR(30)
,FIREPHP_VERSION VARCHAR(20)
,URL_SCHEME VARCHAR(10)
,SERVER_PORT INT
,NODE VARCHAR(20)
,PUBLISHER_KEY VARCHAR(30)
,HTTP_DNT INT
,HTTP_HOST VARCHAR(30)
,MULTITHREAD VARCHAR(10)
,CACHE_CONTROL VARCHAR(20)
,REQUEST_URI VARCHAR(30)
,HTTP_ACCEPT VARCHAR(50)
,RUN_ONCE VARCHAR(10)
,REMOTE_PORT INT
,HTTP_ACCEPT_LANGUAGE VARCHAR(30)
,UWSGI_VERSION VARCHAR(30)
,CONTENT_TYPE VARCHAR(20)
,DOCUMENT_ROOT VARCHAR(40)
,CSRF_COOKIE VARCHAR(50)
,HTTP_ACCEPT_ENCODING VARCHAR(50)
);
創建此表之後,我用下面的查詢複製JSON數據到這個表:
COPY raw_data(DATA) FROM 'metadata.txt' DELIMITERS '#' CSV;
該語句將整個文件加載到跨越多行的單個列中。
然後,我使用下列插入查詢到的JSON數據拆分成各個列:
INSERT INTO filtered_data
SELECT
row_id
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.multiprocess:',2), ', HTTP_REFERER:',1)) AS MULTIPROCESS
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_REFERER:',2), ', SCRIPT_NAME:',1)) AS HTTP_REFERER
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SCRIPT_NAME:',2), ', REQUEST_METHOD:',1)) AS SCRIPT_NAME
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REQUEST_METHOD:',2), ', PATH_INFO:',1)) AS REQUEST_METHOD
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'PATH_INFO:',2), ', HTTP_ORIGIN:',1)) AS PATH_INFO
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ORIGIN:',2), ', SERVER_PROTOCOL:',1)) AS HTTP_ORIGIN
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SERVER_PROTOCOL:',2), ', QUERY_STRING:',1)) AS SERVER_PROTOCOL
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'QUERY_STRING:',2), ', CONTENT_LENGTH:',1)) AS QUERY_STRING
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CONTENT_LENGTH:',2), ', HTTP_USER_AGENT:',1)) AS CONTENT_LENGTH
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_USER_AGENT:',2), ', HTTP_CONNECTION:',1)) AS HTTP_USER_AGENT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_CONNECTION:',2), ', HTTP_COOKIE:',1)) AS HTTP_CONNECTION
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_COOKIE:',2), ', SERVER_NAME:',1)) AS HTTP_COOKIE
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SERVER_NAME:',2), ', REMOTE_ADDR:',1)) AS SERVER_NAME
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REMOTE_ADDR:',2), ', HTTP_X_FIREPHP_VERSION:',1)) AS REMOTE_ADDR
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_X_FIREPHP_VERSION:',2), ', wsgi.url_scheme:',1)) AS FIREPHP_VERSION
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.url_scheme:',2), ', SERVER_PORT:',1)) AS URL_SCHEME
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', SERVER_PORT:',2), ', uwsgi.node:',1) AS INT) AS SERVER_PORT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'uwsgi.node:',2), ', HTTP_PUBLISHER_KEY:',1)) AS NODE
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_PUBLISHER_KEY:',2), ', HTTP_DNT:',1)) AS PUBLISHER_KEY
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', HTTP_DNT:',2), ', HTTP_HOST:',1) AS INT) AS HTTP_DNT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_HOST:',2), ', wsgi.multithread:',1)) AS HTTP_HOST
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.multithread:',2), ', HTTP_CACHE_CONTROL:',1)) AS MULTITHREAD
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_CACHE_CONTROL:',2), ', REQUEST_URI:',1)) AS CACHE_CONTROL
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REQUEST_URI:',2), ', HTTP_ACCEPT:',1)) AS REQUEST_URI
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT:',2), ', wsgi.run_once:',1)) AS HTTP_ACCEPT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.run_once:',2), ', REMOTE_PORT:',1)) AS RUN_ONCE
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', REMOTE_PORT:',2), ', HTTP_ACCEPT_LANGUAGE:',1) AS INT) AS REMOTE_PORT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT_LANGUAGE:',2), ', uwsgi.version:',1)) AS HTTP_ACCEPT_LANGUAGE
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'uwsgi.version:',2), ', CONTENT_TYPE:',1)) AS UWSGI_VERSION
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CONTENT_TYPE:',2), ', DOCUMENT_ROOT:',1)) AS CONTENT_TYPE
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'DOCUMENT_ROOT:',2), ', CSRF_COOKIE":',1)) AS DOCUMENT_ROOT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CSRF_COOKIE:',2), ', HTTP_ACCEPT_ENCODING":',1)) AS CSRF_COOKIET
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT_ENCODING:',2), ', HTTP_ACCEPT_ENCODING":',1)) AS HTTP_ACCEPT_ENCODING
FROM raw_data;
但是,當我運行插入查詢,我得到一個錯誤 -
ERROR: invalid input syntax for integer: ""
。 我只有三個字段作爲整數,但都有有效的值。爲什麼我得到這個錯誤?
你爲什麼不只是轉換是JSON的一個對象,做一個基於對象的插入到數據庫(即ORM或類似的)? – 2015-04-06 03:03:35
@MikeBrant你可以給我一個例子或一個我可以參考的頁面嗎? – Ambidextrous 2015-04-07 01:16:05