I am trying to work on a Javascript algorithm to loop through my MongoDB, search for two identifical URLs, and do an analysis of the header objects.
Below is a snippet of an example of this object:
{
"url": "www.professionalsupplementcenter.com",
"date": "Tue Mar 26 2013 15:08:31 GMT-0400 (EDT)",
"headers": {
"server": "Microsoft-IIS/7.5",
"x-aspnet-version": "4.0.30319",
"x-powered-by": "ASP.NET"
}
}
Specifically, I will have two collections. The two collections are two databases in MongoDB of HTTP header scrapes two months apart. Let's call them todayScrape and twoMonthFromNowScrape. Through these two collections, I want to compare their header objects, so, server, x-aspnet-version, and x-powered-by and see if there has been any upgrades within these numbers, E.G. IIS/7.5 upgraded to IIS/8.0 (in the future).
I do not have any code to display, because I don't know how to implement this system at all. I have no idea where to begin. I want to first compare the two URLs, then when the program identified that both URLs exist, it would then do a specific comparison of the three header objects. It could then report whenever these attributes are changed by scanning them in order and set off some event like console.log("There has been a change") when the strings are not identical.
Can anyone give me any suggestions on where to begin? I have been stuck on this issue a couple days and being stuck has been disheartening. I would really like to begin implementing it, but I need some assistance.
The first thing you need is npm install mongodb (if you haven't already). Then in your app.js file:
var mongodb = require('mongodb');
var Server = mongodb.Server('127.0.0.1', 27017, {});
new mongodb.Db('scraperapp', Server, {w: 1}).open(function(err, db){
var scrape = new mongodb.Collection(db, 'scrape');
var scrapeFuture = new mongodb.Collection(db, 'scrapeFuture');
scrape.find({url: {$exists: true}}).toArray(function(err, today_docs){
if(!today_docs) return;
var scrapeFn = function(i){
var today_doc = today_docs[i];
scrapeFuture.findOne({url: today_doc.url}, function(err, future_doc){
if(!future_doc) return;
if(today_doc.headers.server != future_doc.headers.server)
console.log(today_doc.url + ':servers different');
if(today_doc.headers['x-aspnet-version'] != future_doc.headers['x-aspnet-version'])
console.log(today_doc.url + ':x-aspnet-versions different');
if(today_doc.headers['x-powered-by'] != future_doc.headers['x-powered-by'])
console.log(today_doc.url + ':x-powered-by different');
if(today_docs[i+1]) scrapeFn(i+1);
});
}
scrapeFn(0);
});
});