我試圖使用node.js將csv
文件導入neo4j
。我必須將數據插入多個collection/table
,所以我必須使用node.js腳本插入數據。但我的問題是,插入CSV數據時無法防止數據重複。使用node.js在neo4j中導入CSV
樣品CSV數據:
name
-------------
Afghanistan
Afghanistan
Aland
Albania
Albania
Bangladesh
Bangladesh
index.js
cp = require('child_process');
child = cp.fork(__dirname + "/background-import-csv-file.js");
child.on('message', function(msg) {
console.log("background-insert-process said : ", msg);
});
file = path.resolve(__dirname, `./file/simplemaps.csv`);
child.send(file);
在background-import-csv-file.js
,我有兩種不同的方式來編寫代碼。
首先無極基於(background-import-csv-file.js
):
cp = require('child_process');
csv = require('fast-csv');
Q = require('q');
DB = require("./common/driver");
Country = require('./collection/country');
process.on("message", (file) => {
stream = fs.createReadStream(file);
csv
.fromStream(stream, { headers: true })
.on("data", function(data) {
let countryData = { "name": data.name };
neo = new DB();
country = new Country(neo);
country.insert(countryData)
.then(resp => process.send(resp.msg))
.catch(err => process.send(err))
})
.on("end",() => process.send("file read complete"));
});
./collection/country.js
:
Q = require('q');
Country = function Country(neo) {
this.country = "Country"; this.neo = neo;
};
Country.prototype.find = function find(filters) {
query = `MATCH (a:Country { name: '${filters.name}' }) RETURN {country:properties(a)}`;
return this.neo.run(query, filters).then(resp => resp);
}
Country.prototype.create = function create(data) {
query = `CREATE (ax:Country { name: '${data.name}' }) RETURN ax `;
return this.neo.run(query, {}).then(resp => resp[0].properties).catch(err => err)
}
Country.prototype.insert = function insert(country) {
filter = { name: country.name };
return Q(this.find(filter))
.then(resp => resp.length > 0 ? Q.resolve({ msg: `country: [${country.name}] is already exist` }) : Q.resolve(this.create(country)) )
.then(resp => resp)
.catch(e => Q.reject(e));
}
module.exports = Country;
./common/driver.js
neo4j = require('neo4j-driver').v1;
function DB() {
this.driver = neo4j.driver(); this.session = this.driver.session();
}
DB.prototype.run = function run(query, data) {
return this.session.run(query, data)
.then(response => response.records.map(
record => record._fields[0] ?
record._fields.length ? record._fields[0] : {} : {}
)).catch(err => new Error(err));
}
module.exports = DB;
當我在終端運行index.js
,在databas e,我有2 Afghanistan
,1 Aland
,2 Albania
和2 Bangladesh
。但我需要在我的數據庫中有1 Afghanistan
,1 Aland
,1 Albania
和1 Bangladesh
。當我分析代碼時,發現在插入數據之前,我正在檢查數據(Country.prototype.find = function find(filters)
),如果它已經存在與否,但它總是返回空結果。這就是爲什麼它插入多個數據。如果我再次運行index.js
,則不會將新數據插入到數據庫中。爲了解決這個問題,我已經試過以下CQL
:
MERGE (c:Country { name: '${data.name}' }) RETURN c
它插入唯一的數據,但它殺了這麼多的時間。然後,我寫了下面的代碼:
事件驅動(background-import-csv-file.js
):
process.on("message", (file) => {
stream = fs.createReadStream(file);
csv
.fromStream(stream, { headers: true })
.on("data", function(data) {
countryData = { "name": data.name };
neo = new DB();
country = new Country(neo);
country.find(countryData);
country.on('find', resp => resp.length > 0 ? Q.resolve({ msg: `country: [${country.name}] is already exist` }) : Q.resolve(country.create(countryData)) );
country.on('create', resp => console.log(resp));
})
.on("end",() => process.send("file read complete"));
});
./collection/country.js
:
EventEmitter = require('events').EventEmitter;
util = require('util');
Country = function Country(neo) {
this.neo = neo; EventEmitter.call(this);
};
util.inherits(Country, EventEmitter);
Country.prototype.find = function find(filters) {
query = `MATCH (a:Country { name: '${filters.name}' }) RETURN {country:properties(a)}`;
return this.neo.run(query, {}).then(resp => this.emit('find', resp));
}
Country.prototype.create = function create(data) {
query = `CREATE (ax:Country { name: '${data.name}' }) RETURN ax `;
return this.neo.run(query, {}).then(resp => this.emit('create', resp[0].properties)).catch(err => err)
}
而這一次,它顯示了同樣的結果。我錯過了什麼?任何建議都將非常有用。
注意:我正在使用fast-csv
進行csv解析,並使用Q
作爲承諾。
什麼是 「勒貝爾」 是什麼意思?我沒有看到一個顯而易見的理由,爲什麼用一個簡單的Cypher查詢就無法做到這一點。 – cybersam