I am trying to save data from an external API into a mongodb using nodejs. The script feels really lightweight to me, but for some reason it's using a lot of RAM (from top):
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
2626 root 20 0 756m 113m 7148 S 6.5 11.4 3:11.74 nodejs
This is what the script does in psuedo code:
each 5 seconds
fetch 3 JSON lists through an API
for all new items in list
store in mongo
[edit] The JSON lists are aprox. 10kb each. So I don't think it has to do with keeping that in memory until I processed the items. [/edit]
the (light) dependencies are:
I wrote it as simple functions, when returning they should give back all the memory they used right?
Here is the whole script:
var querystring = require("querystring");
var https = require('https');
var fetch = function(cur, callback) {
cur = cur.toLowerCase().replace('/', '_');
var options = {
host: 'data.fxbtc.com',
path: '/api?op=query_last_trades&count=100&symbol=' + cur,
method: 'GET',
headers: {
'User-Agent': 'Mozilla/4.0 (compatible; node.js client)'
}
};
var req = https.request(options, function(res) {
res.setEncoding('utf8');
var buffer = '';
res.on('data', function(data) {
buffer += data;
});
res.on('end', function() {
try {
var json = JSON.parse(buffer);
} catch (err) {
return callback(err);
}
callback(null, json);
});
});
req.end();
}
var currencies = [
'BTC/CNY',
'LTC/CNY',
'LTC/BTC'
];
var LAST_TRADE = {
'BTC/CNY': 0,
'LTC/CNY': 0,
'LTC/BTC': 0
}
var _ = require('underscore');
var mongo = require('mongodb');
var moment = require('moment');
var init = function(next) {
mongo.connect('mongodb://127.0.0.1:27017/coindata', next);
}
var now = function() {
return moment().format('YYYY-MM-DD HH:mm:ss');
}
console.log(now(), 'STARTING');
setInterval(function() {
console.log(now(), 'alive')
}, 60000)
var collections = {};
var forever = function(err, db) {
if(err) throw err;
_.each(currencies, function(cur, i) {
collections[cur] = db.collection('fxbtc_' + cur);
collections[cur].ensureIndex({fid: 1}, {unique: true}, console.log);
setTimeout(function() {
console.log(now(), 'registering', cur);
setInterval(check(cur), 5 * 1000);
}, i * 1000);
});
}
var check = function(cur) {
return function() {
fetch(cur, function(err, trades) {
if(err) return console.log(now(), 'ERROR-FETCH', err);
trades = _.map(trades.datas, function(trade) {
return {
date: new Date(trade.date * 1000),
price: parseFloat(trade.rate),
amount: parseFloat(trade.vol),
fid: parseInt(trade.ticket)
}
});
trades = _.filter(trades, function(trade) {
return trade.fid > LAST_TRADE[cur];
});
var fids = _.pluck(trades, 'fid');
fids.push(LAST_TRADE[cur]);
LAST_TRADE[cur] = _.max(fids);
if(!trades.length)
return;
console.log(now(), 'storing:', trades.length, 'in', cur);
collections[cur].insert(trades, function(err, docs) {
if(err && err.code !== 11000) console.log(now(), 'ERROR-STORE', err);
});
});
}
}
init(forever);
Are there any obvious memory leaks in this script? How do I go about finding the source of all the used memory?
The project I am working on is polling a lot of different API services (15+) and storing all latest changes.
My initial thought was to write a small script for each different service, which has a loop inside that should stay up forever. The problem (as described above) is that somehow the memory would grow to 40 - 120mb (depending on a couple of things) per service and my system would run out of RAM.
This is how I solved it now:
instead of keeping a process per service alive I rewrote all scripts to run only once and wrote a master script that is responsible for running each script per service script after x amount of time:
var cp = require('child_process');
var moment = require('moment');
var i = 0;
var watch = function(options) {
i++;
setTimeout(function() {
var fid = 0;
setInterval(function() {
var worker = cp.fork('./process_' + options.exchange + '.js');
worker.send(fid);
worker.once('message', function(new_fid) {
fid = new_fid;
worker.kill();
});
}, options.interval);
}, i * 3000);
}
And then I register all different services like so:
watch({exchange: 'bitcurex', interval: +moment.duration(9, 'minutes')});
It has been running for a little over 10 hours or now with little to no memory footprint (I can't find it in top).