trying to get a CSV dump of some data (~500Mb) in mongodb. Thought streams would be the way to go, to avoid building up an array in memory and then building the csv at once.
But, it seems the stream that mongoose creates and the one that csv expects are not the same thing.
var stream = Subscriber.find().stream()
stream.setEncoding = function() { }
csv().from.stream(stream).on('record', function(record, index) {
console.log(record)
console.log(index)
})
without the setEncoding()
stub above, I get an error about that when csv calls setEncoding on the stream. With it, results in
TypeError: Object #<Object> has no method 'indexOf'
at [object Object].stringify (/home/project/node_modules/csv/lib/stringifier.js:98:35)
So, is this even the right approach? if so, what is the problem with the streams?
As zeMirco said: to get a CSV dump of a collection, I'd use the mongoexport
tool that comes with MongoDB. Here's an example of exporting a collection called "users" in a database "mydatabase" to CSV format:
$ mongoexport --csv --host localhost:27017 --db mydatabase --collection users --fields name,email,age -o output.csv
And you'll get something that looks like this:
$ cat output.csv
name,email,age
renold,renold.ronaldson@gmail.com,21
jacob,xXxjacobxXx@hotmail.com,16
Something like this should work. Replace process.stdout
with a filestream to write it to a file.
var csv = require('csv')
var through = require('through')
var Model = require('...')
_ = require('underscore')
var modelStream = Model.find().stream();
modelStream.pipe(through(write, end)).pipe(csv()).pipe(process.stdout);
function end(){ console.log('done'); }
function write(doc) {
this.queue(_.values(doc.toObject({getters:true, virtuals:false})));
}
If you want to download the csv from a webserver by accessing a URL and your using express you can do this:
var through = require('through');
var csv = require('csv')
var MyModel = require('./my_model');
app.get('/download_csv/', function(req, res) {
res.setHeader('Content-disposition', 'attachment; filename=attendances.csv');
res.contentType('csv');
res.write('property 1,property 2\n');
var modelStream = MyModel.find().stream();
modelStream.
pipe(through(write, end)).
pipe(csv.stringify()).
pipe(res);
function end() {
res.end();
console.log('done outputting file');
}
function write(doc) {
var myObject = doc.toObject({getters:true, virtuals:false});
this.queue([
myObject.property_1,
myObject.property_2
]);
}
});
NOTE: This is using the latest version of the csv module (v0.4) whereas the previous answers are using an older version of the module.