node.js reqest.js/cheerio.js returning empty json

I'm working on an app that scrapes a website and exposes data as a rest api. Im using Node, Express, request, cheerio. I seem to be getting the correct values in the array..but just before returning..the array is still empty (out side the request function scope )

I could not workout what I'm missing..could you please take a look at my code and let me know?

in app.js I specified the route,

    app.use('/timetable', timetable.timetable(url));

in timetable.js

var classes = require('../lib/classes');
var schedule = require('../lib/schedule');
exports.timetable = function(timeTableURL) {

return function(req, res) {

    request( timeTableURL, function srapeWebsite(error, response, html ) {
        var webHtml = '';
        var moreInfo = []; 
        if( !error && response.statusCode === 200 ) {
            webHtml = cheerio.load(html);
        }
        // schedule.getInfo returns an array of json objects where
        // json = {
        //    day,
        //    number,
        //    url
        // }
        var info = schedule.getInfo(webHtml);

        for (var index = 0; index < info.length; index++) {     
            var json = info[index];
            classes.getMoreInfo(json, function (moreInfoJson) {
                //console.log shows correct information here
                moreInfo.push(moreInfoJson);
            };
        }
        // however moreInfo is empty here..hence not getting anything
        res.json(moreInfo);
    } );
  };
};

Ideas is, for each of the json object in the array, get more information from the url specified.

so the end result would be,

    finalJson = {
       day : ''
       json : []
    }

in classes.js

exports.getMoreInfo =  function (info, callback) {

var infoDay = info.day;
var infoNumber = parseInt(info.number);
var moreInfoURL = info.moreInfoUrl;
var stuff = [];

var moreInfo = {};
moreInfo.day = infoDay;
moreInfo.json = [];

if (infoNumber > 0 ) {
    request(moreInfoURL, function(error, response,html) {
        var moreInfoHtml = '';
        if( !error && response.statusCode === 200 ) {
            moreInfoHtml = cheerio.load(html) ;

        }
        var $ = moreInfoHtml;

        $('table tbody tr').each ( function getWhatisNeeded () {
            var json = getJson ( $(this) );
            stuff.push(json);

        });

        moreInfo.json = stuff;
        return callback(moreInfo);
    });
  }
}

Your call to classes.getMoreInfo is async.

What happens is that you do not wait for the call to return, and call res.json with an empty array.

EDIT: How to resolve this issue? one approach is to use promisses (see for example https://github.com/malko/D.js).

getMoreInfo should return a deferred object:

exports.getMoreInfo =  function (info) {
    var deferred = D();
    ...
    request(..., function(){
        deferred.resolve(moreInfo);
    });
    ...
    return deferred.promise;
}

and timetable.js should look more or less like this:

var promises = [];
for (var index = 0; index < info.length; index++) {     
    var json = info[index];
    promises.push(classes.getMoreInfo(json));
}
D.all(promises).this(function(array_of_results){
    res.json(array_of_results);
}

I did not test the code, so I might have a bug here, but this is a good guideline on how to resolve async issues in node.js and in general. Get your self familiar with promises, it will be much easier to solve problems like this.

Thanks for the tip 'jonjon'..I used Async.map to fix it ( sorry was going to post it yesterday..but you beat me to it)..I'll also try out Promises as well..I might as well learn both techniques..

this is what I did to fix it..

In app.js

app.get('/timetable', timetable.timetable);

In timetable.js

var url = '...';
exports.timetable = function ( request, response ) {

    //scrape html to get info
    scrape.getHtml( url, function ( error, html ) {
    // schedule.getInfo returns an array of json objects where
    // json = {
    //    day,
    //    number,
    //    url
    // }
    var info = schedule.getInfo(cheerio.load(html));

    //getmoreinfo
    async.map(info, classes.getMoreInfo, function( error,moreInfo ) {
        if(!error) {
            response.json(moreInfo);
        }else {
            response.send("error encountered");
        }
    });
  });
}

In classes.js

   exports.getMoreInfo =  function (info, callback) {

   var infoDay = info.day;
   var infoNumber = parseInt(info.number);
   var moreInfoURL = info.moreInfoUrl;
   var stuff = [];

   var moreInfo = {};
   moreInfo.day = infoDay;
   moreInfo.json = [];

   if (infoNumber > 0 ) {
   request(moreInfoURL, function(error, response,html) {
    var moreInfoHtml = '';
    if( !error && response.statusCode === 200 ) {
        moreInfoHtml = cheerio.load(html) ;

    }
    var $ = moreInfoHtml;

    $('table tbody tr').each ( function getWhatisNeeded () {
        var json = getJson ( $(this) );
        stuff.push(json);

    });

    moreInfo.json = stuff;
    return callback(null, moreInfo);
  });
 }
}

I'm getting what I'm after now..however it seems to take 5 seconds..will need to test more anyway..