How to render a page only after all web-scraping requests are completed in Node.js?

I am making anywhere between 1 to 10 web requests using jsdom (web-scraping library for Node.js). It goes something like this:

app.get('/results', function(req, res) {

jsdom.env(
  "http://website1.com",
  ["http://code.jquery.com/jquery.js"],
  function (errors, window) {
    // scrape website #1
  }
);

jsdom.env(
  "http://website2.com",
  ["http://code.jquery.com/jquery.js"],
  function (errors, window) {
    // scrape website #2
  }
);

jsdom.env(
  "http://website3.com",
  ["http://code.jquery.com/jquery.js"],
  function (errors, window) {
    // scrape website #3
  }
);
}

res.render('results', { items: items });
}

How do I run res.render() ONLY after all jsdom requests have been completed and after I have gathered all the information that I need? In a synchronous world this obviously would not be a problem, but since javascript is asynchronous, res.render() will be run before any of jsdom callbacks are finished.

Naive solution

The "naive" solution you could employ for a small number of scrapes is to nest everything (start each scrape in the callback of the last scrape, the last callback contains the render method.)

scrape
  cb: scrape
     cb: scrape
        cb: render all results

That becomes tedious and illegible, of course. (And everything runs in series, not parallel, which won't be very fast.)

Better solution

The better solution would be to write a function to count the number of returned results and calls render when all have returned. Here is one implementation:

function parallel_cb(total, finalCallback) {
    var done = 0;
    var results = [];
    return function(result) {
        done += 1;
        results.push(result);
        if (total == done) finalCallback(results);
    }
}

To use it in your example:

app.get('/results', function(req, res) {
    var myCallback = parallel_cb(
        sitesToScrape.count, // or 3 in this case
        function(items) {res.render('results', { items: items })});

    jsdom.env(
      "http://nodejs.org/dist/",
      ["http://code.jquery.com/jquery.js"],
      function (errors, window) {
        // do some scraping
        myCallback(result_from_scrape);
      }
    );

    jsdom.env(
      "http://nodejs.org/dist/",
      ["http://code.jquery.com/jquery.js"],
      function (errors, window) {
        // more scraping
        myCallback(result_from_scrape);
      }
    );

    jsdom.env(
      "http://nodejs.org/dist/",
      ["http://code.jquery.com/jquery.js"],
      function (errors, window) {
        // even more scraping
        myCallback(result_from_scrape);
      }
    );
});

The best solution

Instead of writing your own, you should really learn to use an existing parallel / async library as suggested by @almypal in the comment to your question.

With async you could do something much neater as described in the docs: https://github.com/caolan/async#parallel

Or if all your scrapes actually look for the same elements in the resulting pages, you could even do a parallel map over an array of URLs to scrape: https://github.com/caolan/async#maparr-iterator-callback

Each of your scrapes can use the callback function provided by async's parallel method, to return the results of its scrape. The final [optional] callback will contain your call to render with all the items.

EDIT: The example you asked for

This is your code, directly translated to the async library:

var async = require("async");

app.get('/results', function(req, res) {
    async.parallel( // the first argument is an array of functions
      [
        // this cb (callback) is what you use to let the async
        // function know that you're done, and give it your result
        function (cb) { 
          jsdom.env(
            "http://nodejs.org/dist/",
            ["http://code.jquery.com/jquery.js"],
            function (errors, window) {
              // do some scraping      

              // async's callback expects an error for the first
              // param and the result as the second param
              cb(null, result_from_scrape); //No error
            }
          );
        },
        function (cb) { 
          jsdom.env(
            "http://nodejs.org/dist/",
            ["http://code.jquery.com/jquery.js"],
            function (errors, window) {
              // more scraping
              cb(null, result_from_scrape);
            }
          );
        },
        function (cb) { 
          jsdom.env(
            "http://nodejs.org/dist/",
            ["http://code.jquery.com/jquery.js"],
            function (errors, window) {
              // even more scraping
              cb(null, result_from_scrape);
            }
          );
        }
      ],
      // This is the "optional callback". We need it to render.
      function (err, results) {
        // If any of the parallel calls returned an error instead
        // of null, it's now in the err variable.
        if (err) res.render('error_template', {error: err});
        else res.render('results', { items: results });
      });
});