I'm building a scrapper with node.io.
The page I want to scrap has new content every minute. I would like to run my job again and again every minute. (Ok I could do that with a bash script, but I would like to stay in javascript) This is a basic job :
var nodeio = require('node.io'), options = {timeout: 10};
exports.job = new nodeio.Job(options, {
input: ['hello', 'foobar', 'weather'],
run: function (keyword) {
this.getHtml('http://www.google.com/search?q=' + encodeURIComponent(keyword), function (err, $) {
var results = $('#resultStats').text.toLowerCase();
this.emit(keyword + ' has ' + results);
});
}
});
How could I do that ? I'm a beginner in node.js, i tried setInterval around the job (: without success.
Try this (run with "node<myfile.js
>" instead of "node.io<myfile.js
>"):
var nodeio = require('node.io'), options = {timeout: 10};
var job = {
input: ['hello', 'foobar', 'weather'],
run: function (keyword) {
this.getHtml('http://www.google.com/search?q=' + encodeURIComponent(keyword), function (err, $) {
var results = 'test';//$('#resultStats').text.toLowerCase();
this.emit(keyword + ' has ' + results);
});
}
};
setInterval(function(){
nodeio.start(new nodeio.Job(options, job), options, function(){});
}, 5000);
The problem you were running into is the following block of code in node.io that exits node when you don't provide a callback when running the job:
//Default behaviour is to exit once the job is complete
callback = callback || function (err) {
if (err) {
utils.status.error(err);
}
process.exit();
};