I have a script which converts CSV to JSON, there are approx 1'000'000 records to parse and build JSON objects out of, these objects are then written to a MongoDB database, my script however returns an Out of Memory error at 100000 records unless I pass in the --max-old-space-size flag with 10000mb of RAM allocated.
Below is the code, could anybody suggest any modifications to make this script more memory efficient?
/*
* @desc Builds JSON from Ofcom CSV source(s) for postcode broadband data
* @author Jacob Clark
*/
var fs = require("fs");
var MongoClient = require('mongodb').MongoClient
/*
* BroadbandData Class Constructor
* @params Array (Object)
*/
function BroadbandData(sources){
this.sources = sources;
this.header = ["Postcode(No Spaces)", "Postcode Data Status", "Lines < 2Mbps(Y/N)", "Average Speed/Mbps", "Median Speed/Mbps", "Maximum Speed/Mbps", "NGA Available(Y/N)", "Number of Connections"];
this.broadbandDataJSON = [];
this.mongoDB = null;
};
/*
* Connect to MongoDB
* @params Function, Object (JSON)
*/
BroadbandData.prototype.connectToMongoDB = function(callback, dataArrays){
_this = this;
MongoClient.connect('mongodb://127.0.0.1:27017/UKBroadbandCoverageAndSpeed', function(err, db) {
_this.mongoDB = db;
callback(dataArrays, _this);
});
};
/*
* Get data from sources
* @returns Array (Object)
*/
BroadbandData.prototype.getData = function(){
data = [];
for(source in this.sources){
data.push(fs.readFileSync(this.sources[source]).toString().split("\n"));
}
return data;
};
/*
* Build JSON from built data arrays
* @params Array (Object), Object
* @returns null
*/
BroadbandData.prototype.buildJSON = function(dataArrays, bd){
//console.log(this)
for(array in dataArrays){
for(datum in dataArrays[array]) {
postcodeData = dataArrays[array][datum].split(",");
var tempObj = {};
for(postcodeDatum in postcodeData){
key = bd.header[postcodeDatum];
value = postcodeData[postcodeDatum]
tempObj[key] = value;
}
bd.persist(tempObj, "mongodb");
// Limiting to 202 records for testing -
/*
if(datum == 100)
break;
*/
}
}
};
/*
* Persistance
* @params Array (Object), String
* @returns null
*/
BroadbandData.prototype.persist = function(obj, source){
if(source == 'mongodb'){
var collection = this.mongoDB.collection('data');
collection.insert(obj, function(err, docs) {
if(err) throw(err);
})
}else if(source == 'object'){
this.broadbandDataJSON.push(obj);
}
};
var bd = new BroadbandData(["../data/ofcom-uk-fixed-broadband-postcode-level-data-2013/ofcom-part1-fixed-broadband-postcode-level-data-2013.csv", "../data/ofcom-uk-fixed-broadband-postcode-level-data-2013/ofcom-part2-fixed-broadband-postcode-level-data-2013.csv"]);
bd.connectToMongoDB(bd.buildJSON, bd.getData())