NOOb here. I've got a HTTP request that pulls all of the content from a specific webpage. However, all I need is a specific string:"Most recent instantaneous value: "
. In fact, I actually need to store the value that follows value:
. Here is my code:
var http = require("http");
var options = {
host: 'waterdata.usgs.gov',
port: 80,
path: '/ga/nwis/uv?cb_72036=on&cb_00062=on&format=gif_default&period=1&site_no=02334400',
method: 'POST'
};
var req = http.request(options, function(res) {
console.log('STATUS: ' + res.statusCode);
console.log('HEADERS: ' + JSON.stringify(res.headers));
res.setEncoding('utf8');
res.on('data', function (chunk) {
console.log('BODY: ' + chunk);
});
});
req.on('error', function(e) {
console.log('problem with request: ' + e.message);
});
// write data to request body
req.write('data\n');
req.write('data\n');
req.end();
I realize I don't need all the console.log
statements, but do I need keep console.log('BODY: ' + chunk);
so all of the data downloads?
Never do it the way I'm doing it in this quick'n'dirty example. There are plenty of modules for DOM traversal, HTML/XML parsing, etc... They are a lot safer then a simple regex. But just so you get the general idea:
var http = require("http");
var options = {
host: 'waterdata.usgs.gov',
port: 80,
path: '/ga/nwis/uv?cb_72036=on&cb_00062=on&format=gif_default&period=1&site_no=02334400',
};
function extract (body, cb) {
if(!body)
return;
var matches=body.match(/Most recent instantaneous value: ([^ ]+) /);
if(matches)
cb(matches[1]);
}
http.get(options, function(res) {
res.setEncoding('utf8');
res.on('data', function (chunk) {
extract(chunk, function(v){ console.log(v); });
});
}).on('error', function(e) {
console.log('problem with request: ' + e.message);
});
Somehow I also got a different page when sending a POST instead of a GET request. So I changed that bit...
Regarding your second question: No you don't need to keep any of the console.log()
statements. Just use callbacks and everything is fine! :-)