Refactoring nested callbacks, node.js, async

function indexArticles(callback) {
  fs.readdir("posts/", function(err, files) {
    async.map(files, readPost, function(err, markdown) {
      async.map(markdown, parse, function(err, results) {
        async.sortBy(results, function(obj, callback) {
          callback(err, obj.date);
        }, function(err, sorted) {
          callback( {"articles": sorted.reverse()} );
        });
      });
    });
  });
}

I'm trying to figure out how to make this prettier -- as you can tell I'm using caolan's async library, but I'm not sure which of the control flow structures to use. It seems like if I use async.waterfall, for example, that results in quite a bit more code, with each step having to be wrapped in an anonymous function. For example, this is just the first two lines of the nested version with waterfall:

function indexArticles(callback) {
  async.waterfall([
    function(callback) {
      fs.readdir("posts/", function(err, files) {
        callback(err, files)
      })
    },

    function(files, callback) {
      async.map(files, readPost, function(err, markdown) {
        callback(err, markdown)
      })
    }])
}

How would you improve this?

If there were a way to partially apply arguments NOT only from the left, then I could see doing, for example,

function indexArticles(callback) {
  async.waterfall([
    async.apply(fs.readdir, "posts/"),
    async.apply(async.map, __, readPost),
    async.apply(async.map, __, parse),
    // etc...
  ])
}

This is an interesting problem, as you need to bind arguments both to the left and to the right of your iterator functions, so neither bind/ nor bindRight (of which there are a few implementaions on StackOverflow) will work for you. There's a few options for you here:

(1) First, in your async.waterfall example, you have:

function(callback) {
  fs.readdir("posts/", function(err, files) {
    callback(err, files)
  })
}

which is the same as:

function(callback) {
  fs.readdir("posts/", callback)
}

Using Function.bind and this method, your entire function indexArticles could be written:

function indexArticles(callback) {
  async.waterfall([
    fs.readdir.bind(this, 'posts/'),
    function(files, cb) { async.map(files, readPost, cb); },
    function(text, cb) { async.map(text, parse, cb); },
    function(results, cb) { async.sortBy(results, function(obj, callback) {
      callback(null, obj.date);
    }, cb) }
  ], function(err, sorted) {
    callback( {"articles": sorted.reverse()} );
  });
};

Which is a bit shorter.

(2) If you really want to avoid the wrapping functions, you can use a type of partial function application. First, at the top of your file (or in a module, etc), define a function called partial:

var partial = function(fn) {
  var args = Array.prototype.slice.call(arguments, 1);
  return function() {
    var currentArg = 0;
    for(var i = 0; i < args.length && currentArg < arguments.length; i++) {
      if (args[i] === undefined)
        args[i] = arguments[currentArg++];
    }
    return fn.apply(this, args);
  };
}

This function takes a function and any number of arguments, and replaces undefined values in the arguments list with the actual arguments when the function is called. You would then use it like this:

function indexArticles(callback) {
  async.waterfall([
    fs.readdir.bind(this, 'posts/'),
    partial(async.map, undefined, readPost, undefined),
    partial(async.map, undefined, parse, undefined),
    partial(async.sortBy, undefined, function(obj, callback) {
      callback(null, obj.date);
    }, undefined)
  ], function(err, sorted) {
    callback( {"articles": sorted.reverse()} );
  });
}

So, partial(async.map, undefined, readPost, undefined) returns a function that, when called by the Async library as fn(files, callback), it fills in files for the first undefined, and callback for the second undefined, ending in a call to async.map(files, readPost, callback).

(3) There is also a version of partial for Function.prototype at this StackOverflow answer, allowing you to use the syntax: async.map.partial(undefined, readPost, undefined); however, I would probably recommend against modifying Function.prototype in this way, and just use partial as a function.

In the end, it's up to you which method is the most readable and maintainable.

Looks like I have some overlap with Brandon's answer, but here's my take:

 var async = require("async")

//dummy function
function passThrough(arg, callback){
  callback(null, arg)
}

//your code rewritten to only call the dummy. 
//same structure, didn't want to think about files and markdown
function indexArticles(callback) {
  passThrough("posts/", function(err, files) {
    async.map(files, passThrough, function(err, markdown) {
      async.map(markdown, passThrough, 
        function(err, results) {
          async.sortBy(results, function(obj, callback) {
            callback(err, obj);
        }, 
        function(err, sorted) {
          callback( {"articles": sorted.reverse()} );
        });
      });
    });
  });
}
indexArticles(console.log)

//version of apply that calls 
//fn(arg, arg, appliedArg, apliedArg, callback)
function coolerApply(fn) {
  var args = Array.prototype.slice.call(arguments, 1);
  return function () {
    var callback = Array.prototype.slice.call(arguments, -1)
    var otherArgs = Array.prototype.slice.call(arguments, 0, -1)
    return fn.apply(
      null, otherArgs.concat(args).concat(callback)
    );
  };
};

//my version of your code that uses coolerAppl
function indexArticles2(callback){
  async.waterfall([
    async.apply(passThrough, "posts/"),
    coolerApply(async.map, passThrough),
    coolerApply(async.map, passThrough),
    coolerApply(async.sortBy, function(obj, callback){callback(null,obj)})
  ],
  function(err, sorted){
    callback({"articles": sorted.reverse()})
  })
}
//does the same thing as indexArticles!
indexArticles2(console.log)

Here's what I've ended up with so far.

function indexArticles(callback) {
  var flow = [
    async.apply(fs.readdir, "posts/"),

    function(data, callback) { async.map(data, readPost, callback); },

    function sortByDate(parsed, callback) {
      var iterator = function(obj, callback) {
        if (obj.date) { callback(null, obj.date); }
        else { callback("Article has no date.") }
      }
      // Note that this sorts in reverse lexicographical order!
      async.sortBy(parsed, iterator,
          function(err, sorted) { callback(err, {"articles": sorted.reverse()} ); }
        );
    }
  ];

  async.waterfall(flow, async.apply(callback))
}

I've recently created a simple abstraction named WaitFor to call async functions in sync mode (based on Fibers): https://github.com/luciotato/waitfor

I've not tested it with the async package, but it should work. If you run into problems, contact me.

Using wait.for and async your code will be:

var wait = require('waitfor');
var async = require('async');

function indexArticles(callback) {
  var files = wait.for(fs.readdir,"posts/");
  var markdown = wait.for(async.map, files, readPost);
  var results = wait.for(async.map, markdown, parse);
  var sorted = wait.for(async.sortBy, results, function(obj, callback) {
                                                  callback(null, obj.date);
                                              });
  callback( null, {"articles": sorted.reverse()} );
}

to call your fn (async-mode):

//execute in a fiber
wait.launchFiber(indexArticles,function(err,data){
       // do something with err,data
       }); 

to call your fn (sync-mode):

//execute in a fiber
function handleRequest(req,res){
    try{
        ...
        data = wait.for(indexArticles); //call indexArticles and wait for results
        // do something with data
        res.end(data.toString());
    }
    catch(err){
        // handle errors
    }
}

// express framework
app.get('/posts', function(req, res) { 
    // handle request in a Fiber, keep node spinning
    wait.launchFiber(handleRequest,req,res);
    });