Having a problem with making a lot of requests with method 'HEAD'.
I've made async.queue set to 20, and timeout to 3000ms. Anyway when i run: I see 10-15 success, than some timouts with some more success, an hangs... nothing happening further.
If i remove timeout i have about 10 success and hang... And i dont get the error message neither.
The Code of request:
function getHeader(link)
{
var correctUrl = url.parse(link);
var options = {method: 'HEAD', host: correctUrl.hostname, port: 80, path: correctUrl.pathname};
var req = http.request(options, function(res) {
if(res.statusCode == 404 || res.statusCode == 500) return;
var x = {
loc : link
};
if(typeof(res.headers['last-modified']) != "undefined")
{
x.lastmod = dateConverter(res.headers['last-modified']);
console.log("Added lastmodify: " + x.lastmod);
}
console.log(res.headers);
parser.allObjects.push(x);
});
req.setTimeout(3000, function() {
console.log("Timeout reached. Link:" + link);
req.abort();
});
req.on('error', function (e) {
console.log('problem with request: ' + e.message);
});
req.end();
}
And the queue is here:
var queue = async.queue(function (href, callback) {
getHeader(href,function(err){
if(err) return callback(err);
return callback();
});
}, parser.serverMight); // this set to 20 at the mom (decreased from 50)
queue.drain = function() {
formXml(null, parser.allObjects);
};
queue.push(toRequest, function(err) {
if(err) console.log(err);
});
Any help is highly appreciated, thanks.
Heh, found myself. Maybe this may help someone. So the mistake was very simple: I didn't callback from the getHeader function, i just used return. That's why the queue couldn't start the next round. Httpreq takes less space, so i i'll let it stay.
Here is how the correct code look:
function getHeader(link, callback)
{
httpreq.get(link, function(err, res) {
if(err) return callback(err);
if(res.statusCode == 404 || res.statusCode == 500)
{
parser.allHrefs.remove(parser.allHrefs.indexOf(link));
console.log("Faced status code 404 || 500. url deleted: " + link);
return callback(null);
}
//collect header-info
var x = { loc : link };
if(typeof(res.headers['last-modified']) != "undefined")
x.lastmod = dateConverter(res.headers['last-modified']);
console.log("Success adding header:" + x.loc);
parser.allObjects.push(x);
return callback(null);
});
}
p.s.: somewhy the 'httpreq' (requesting full request body) is making this faster, than 'http' (requesting HEAD)...