I'm using node module xml2js. My xml file is of the form.:
<?xml version="1.0" encoding="UTF-8" ?>
<?xml-stylesheet type="text/xsl"?>
<?xml-stylesheet type="text/css" media="screen" href="some url" ?>
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">
<channel>
<item>
<pubDate>Fri, 19 Sep 2014 18:00:08 GMT</pubDate>
<guid isPermaLink="false">http://www.example0.com</guid>
</item>
<item>
<pubDate>Fri, 19 Sep 2014 17:52:25 GMT</pubDate>
<guid isPermaLink="false">http://www.example1.com</guid>
</item>
</channel>
</rss>
I want to get all the urls under <item><guid isPermaLink="false"> as an array.
I'm trying out the code, but it is for a locally stored xml file. Also, I'm unable to get the urls.:
var fs = require('fs'),
xml2js = require('xml2js');
var parser = new xml2js.Parser();
parser.addListener('end', function(result) {
console.dir(result);
console.log('Done.');
});
fs.readFile(__dirname + '/foo.xml', function(err, data) {
parser.parseString(data);
});
You can use the sax-js module to extract URLs you need.
The module you mentioned uses sax-js internally.
Here is the code (rough cuts):
'use strict';
var sax = require('sax');
var fs = require('fs');
var filePath = __dirname + '/' + 'foo.xml';
var isTextPending = false;
var saxStream = sax.createStream(true);
saxStream.on('error', function (e) {
console.error(e);
});
saxStream.ontext = function (text) {
if(isTextPending) {
console.log(text);
isTextPending = false;
}
};
saxStream.on('opentag', function (node) {
if(node.name === 'guid' && node.attributes.isPermaLink === 'false') {
isTextPending = true;
}
});
fs.createReadStream(filePath)
.pipe(saxStream);
And the output is:
http://www.example0.com
http://www.example1.com
UPD:
To fetch XML from the internet to process it, use the request module:
var request = require('request');
var href = 'http://SOME_URL.xml';
request(href)
.pipe(saxStream);