I'm using the crawler module for Node.js (npm install crawler) in the most usual way, but some weird things are happening:
var Crawler = require('crawler').Crawler;
var c = new Crawler({
// This will be called for each crawled page
callback: function (error, result, $) {
// $ is a jQuery instance scoped to the server-side DOM of the page
if (error) return;
$('a').each(function (index, a) {
// error, result and $ are undefined in here
c.queue(a.href);
});
}
});
c.queue('http://google.com');
Why wouldn't parent function parameters be visible inside the $.each callback? In order to access the $ function inside the $.each callback I have to do ugly things like:
callback: function (error, result, $) {
var $ = $; // really?
if (error) return;
$('a').each(function (index, a) {
if ($(a).hasClass('whatever') { // $ is now defined and this line won't throw an error
...
}
c.queue(a.href);
}
}
Could you explain what's going on in here?
EDIT: another strange thing I noticed, if we edit the first example in the following way the behavior will change slightly, but not consistently:
var c = new Crawler({
callback: function (error, result, $) {
if (error) return;
function hello() {
console.log('$ is', $); // only $ is defined here
}
hello(); // error, result and $ are all defined here
$('a').each(function (index, a) {
// $ is now defined, but error and result are not
c.queue(a.href);
});
}
});
c.queue('http://google.com');
Why would defining and calling the hello capture the $ for the $.each callback? And why just $ is captured, but error and result are not?