I wrote a simple mapping-script for a Database with around 5M of documents. The find-statement finds all documents when i run it standalone (so it can't be that). The script also ends as expected, no error message thrown. But here's the point - it ends to soon. After about 200k documents and my question is: why?
function performMapping (connectionInputDB, connectionOutputDB) {
var collectionInput = connectionInputDB.collection(config.input.collection),
collectionOutput = connectionOutputDB.collection(config.output.collection),
successCounter = 0,
errorCounter = 0,
cursor = collectionInput.find({ 'changed': { '$gte': config.min } }),
onNextObject = function (error, obj) {
var mappedObject;
if(error) {
console.log(error);
process.exit(1);
return;
}
if(obj === null) {
console.log('Success:', successCounter);
console.log('Error:', errorCounter);
console.log('Time:', (new Date().getTime() - startTime) / 1000);
process.exit(0);
return;
}
try {
mappedObject = mapScript(obj);
collectionOutput.update(
{ 'meta.identkey': mappedObject.meta.identkey },
mappedObject,
{ upsert: true },
function (error) {
if(error) {
console.log('Error:', error);
errorCounter += 1;
return;
}
successCounter += 1;
cursor.nextObject(onNextObject);
}
);
} catch(ex) {
console.log('Error:', obj.data.identkey, ex);
errorCounter += 1;
cursor.nextObject(onNextObject);
}
};
cursor.nextObject(onNextObject);
}
I think nextObject doesn't work as expected. And the strangest part of it: its always a different amount of successful inserts/updates.
Okay, I think i found the answer: nextObject is not reliable. When the cursor is paused it maybe returns some empty responses before starting again returning documents. So if I use something like this:
if(obj === null) {
it will stop unexpected. Source: http://stackoverflow.com/a/25155237/2768213