Node.js async directory traversal with accumulator

I was wanting to get a list of unique file names contained in a directory including subdirectories using node and was having some trouble combining the results of each callback. I am wanting to avoid operating on duplicate which is what happens if I just log the filename from the fs.stat callback.

var distinct = {};

function getNames(root) {
    fs.readdir(root, function(err, list) {
        list.forEach(function(file) {
            file = root + '/' + file; 
            fs.stat(file, function(err, stat) {
                if (!err && stat.isDirectory()) {
                    getNames(file);
                } else {
                    distinct[path.basename(file)] = true;
                }
            });
        });
    });
}

// perform various operations on unique filename list
console.log(Object.keys(distinct));

Of course this calls the console.log() function too early and gives undesired results. How can I achieve a set of filenames to work on; is there a nice way of doing this using the async methods, i.e., without having to use readdirSync and statSync?

The async module will be your friend here.

var distinct = {};
function getNames(root, callback) {
    fs.readdir(root, function(err, list) {
        if (err) return callback(err);   

        async.each(list, function(file, done) {
            fs.stat(file, function(stat) {
                if (err) return done(err);
                if (stat.isDirectory()) return getNames(file, done);
                distinct[path.basename(file)] = true;
                done();
            });
        }, function(err) {
            // called when all done or error
            callback(err, distinct);
        })
    });
}

Then you can use it like

getNames('path/to/dir', function(distinct) {
    // logic
});

Might I suggest using promises?

function getNames(root) {
    readdir = Q.nfbind(fs.readdir);
    stat = Q.nfbind(fs.stat);
    return readdir(root)
        .then(function(list) {
            files = [];
            for(var i = 0; i < list.length; i++) {
                file = root + '/' + list[i]; 
                files.push(stat(file)
                    .then(function(stat) {
                        if (stat.isDirectory()) {
                            return getNames(file);
                        } else {
                            return path.basename(file);
                        }
                     })
                );
            }
            return Q.all(files);
        });
}
getNames('someDir')
    .then(console.log)
    .catch(console.error);

The problem here is console.log is firing before any of those callbacks in your main function. Your function needs to be an async wrapper around those internally-called functions.

This is the basic idea

function myFunc(done) {
  doSomethingAsync(function(err, someValue) {
    if (err) {
      done(err, someValue);
    }
    else {
      done(null, somevalue);
    }
  }
}

Then you use it like this

myFunc(function(err, someValue) {
  if (err) throw err;
  console.log(someValue);
});