So in this (simplified) bit of code, when someone hits my node server I make a GET request to another website and print the HTML page title to the console. Works fine:
var http = require("http");
var cheerio = require('cheerio');
var port = 8081;
s = http.createServer(function (req, res) {
var opts = {
method: 'GET',
port: 80,
hostname: "pwoing.com",
path: "/"
};
http.request(opts, function(response) {
console.log("Content-length: ", response.headers['content-length']);
var str = '';
response.on('data', function (chunk) {
str += chunk;
});
response.on('end', function() {
dom = cheerio.load(str);
var title = dom('title');
console.log("PAGE TITLE: ",title.html());
});
}).end();
res.end("Done.");
}).listen(port, '127.0.0.1');
However, in the actual app, users can specify a URL to hit. That means my node server could be downloading 20GB movie files or whatever. Not good. The content-length header is no use for stopping this either as it isn't transmitted by all servers. The question then:
How can I tell it to stop the GET request after, say, the first 10KB are received?
Cheers!
You could abort the request once you have read enough data:
http.request(opts, function(response) {
var request = this;
console.log("Content-length: ", response.headers['content-length']);
var str = '';
response.on('data', function (chunk) {
str += chunk;
if (str.length > 10000)
{
request.abort();
}
});
response.on('end', function() {
console.log('done', str.length);
...
});
}).end();
This will abort the request at around 10.000 bytes, since the data arrives in chunks of various sizes.