in nodejs, how to stop a FOR loop until mongodb call returns

Please look at the code snippet below. I have an array of JSON objects called 'stuObjList'. I want to loop thru the array to find specific JSON objects with a certain flag set, and then make a db call to retrieve more data.

Ofcourse, the FOR loop doesn't wait for the db call to return and reaches the end of with j == length. And when the db call returns, the index 'j' is beyond the array index. I understand how node.js works and this is the expected behavior.

My question is, what is the work around here. How can I achieve what I am trying to achieve? Thanks, --su

...............
...............
...............
else
{
  console.log("stuObjList.length: " + stuObjList.length);
  var j = 0;
  for(j = 0; j < stuObjList.length; j++)
  {
    if(stuObjList[j]['honor_student'] != null)
    {     
      db.collection("students").findOne({'_id' : stuObjList[j]['_id'];}, function(err, origStuObj)
      {
        var marker = stuObjList[j]['_id'];
        var major = stuObjList[j]['major'];
      });
    }

    if(j == stuObjList.length)
    {
      process.nextTick(function()
      {
        callback(stuObjList);
      });
    }
  }
}
});

"async" is an very popular module for abstracting away asynchronous looping and making your code easier to read/maintain. For example:

var async = require('async');

function getHonorStudentsFrom(stuObjList, callback) {

    var honorStudents = [];

    // The 'async.forEach()' function will call 'iteratorFcn' for each element in
    // stuObjList, passing a student object as the first param and a callback
    // function as the second param. Run the callback to indicate that you're
    // done working with the current student object. Anything you pass to done()
    // is interpreted as an error. In that scenario, the iterating will stop and
    // the error will be passed to the 'doneIteratingFcn' function defined below.
    var iteratorFcn = function(stuObj, done) {

        // If the current student object doesn't have the 'honor_student' property
        // then move on to the next iteration.
        if( !stuObj.honor_student ) {
            done();
            return; // The return statement ensures that no further code in this
                    // function is executed after the call to done(). This allows
                    // us to avoid writing an 'else' block.
        }

        db.collection("students").findOne({'_id' : stuObj._id}, function(err, honorStudent)
        {
            if(err) {
                done(err);
                return;
            }

            honorStudents.push(honorStudent);
            done();
            return;
        });
    };

    var doneIteratingFcn = function(err) {
        // In your 'callback' implementation, check to see if err is null/undefined
        // to know if something went wrong.
        callback(err, honorStudents);
    };

    // iteratorFcn will be called for each element in stuObjList.
    async.forEach(stuObjList, iteratorFcn, doneIteratingFcn);
}

So you could use it like this:

getHonorStudentsFrom(studentObjs, function(err, honorStudents) {
    if(err) {
      // Handle the error
      return;
    }

    // Do something with honroStudents
});

Note that .forEach() will call your iterator function for each element in stuObjList "in parallel" (i.e., it won't wait for one iterator function to finish being called for one array element before calling it on the next array element). This means that you can't really predict the order in which the iterator functions--or more importantly, the database calls--will run. End result: unpredictable order of honor students. If the order matters, use the .forEachSeries() function.

Ah the beauty and frustration of thinking asynchronously. Try this:

...............
...............
...............
else
{
  console.log("stuObjList.length: " + stuObjList.length);
  var j = 0, found = false, step;
  for(j = 0; j < stuObjList.length; j++)
  {
    if(stuObjList[j]['honor_student'] != null)
    {     
      found = true;
      step = j;
      db.collection("students").findOne({'_id' : stuObjList[j]['_id'];}, function(err, origStuObj)
      {
        var marker = stuObjList[step]['_id']; // because j's loop has moved on
        var major = stuObjList[step]['major'];
        process.nextTick(function()
        {
          callback(stuObjList);
        });
      });
    }

  }
  if (!found) {
    process.nextTick(function()
    {
      callback(stuObjList);
    });
  }
}
});

If you find your "when I'm done" steps are getting complicated, extract them to another function, and just call it from each spot. In this case since it was only 2 lines, it seemed fair to duplicate.

given the requirement, you can also use underscore's "filter" method http://documentcloud.github.com/underscore/#filter

var honor_students = _.filter(stuObjList, function(stud) { return stu['honor_student'] != null });
if (honor_students.length === 0) {
  process.nextTick(function() { callback(stuObjList); });
} else {
  var honor_students_with_more_data = [];
  for (var i = 0; i < honor_students.length; i++) {
    db.collection("students").findOne({'_id' : honor_students[i]['_id'];}, function(err, origStuObj) {
      // do something with retrieved data
      honor_students_with_more_data.push(student_with_more_data);
      if (honor_students_with_more_data.length === honor_students.length) {
        process.nextTick(function() { callback(stuObjList); });
      }
    }
  }
}