Often I would want to scan an entire directory tree (a directory, and everything inside it, including files, subdirs, and those subdir contents too, and their subdir contents too, etc etc).
How would one accomplish this with node? Requirements, is that it should be asynchronous to take advantage of super fast non-blocking IO - and not crash when processing too many files at once.
Created a solution to do this myself. Requires the dependency bal-util (so npm install bal-util
). Here's the code:
Source code of the scandir
function can be found here: https://github.com/balupton/bal-util/blob/master/src/lib/paths.coffee
# Prepare
balUtil = require('bal-util')
# Scan
balUtil.scandir({
# Path
# The directory path to scan
path: 'some/dir/path'
# Action
# a function to use for both the fileAction and dirAction
action: null
# File Action
# a function to run against each file, in the following format:
# fileAction(fileFullPath,fileRelativePath,next(err,skip),fileStat)
fileAction: null
# Dir Action
# a function to run against each directory, in the following format:
# dirAction(fileFullPath,fileRelativePath,next(err,skip),fileStat)
dirAction: null
# Next
# a function to run after the entire directory has been scanned, in the following format:
# next(err,list,tree)
# the callback arguments are:
# err: null, or an error that has occured
# list: a collection of all the child nodes in a list/object format:
# {fileRelativePath: 'dir|file'}
# tree: a colleciton of all the child nodes in a tree format:
# {dir:{dir:{},file1:true}}
# if the readFiles option is true, then files will be returned with their contents instead
next: (err,list,tree) ->
console.log('All done:', {err,list,tree})
# Stat
# a file stat object for the path if we already have one
stat: null
# Recurse
# a boolean for whether or not to scan subdirectories too
recurse: null
# Read Files
# a boolean for whether or not we should read the file contents
readFiles: null
# Ignore Hidden Files
# a boolean for if we should ignore files starting with a dot
ignoreHiddenFiles: null
# Ignore Patterns
# a regex to match paths against to determine if we should ignore them
# if true will use balUtilPaths.commonIgnorePatterns
ignorePatterns: null
})
// Prepare
var balUtil;
balUtil = require('bal-util');
// Scan
balUtil.scandir({
// Path
// The directory path to scan
path: 'some/dir/path',
// Action
// a function to use for both the fileAction and dirAction
action: null,
// File Action
// a function to run against each file, in the following format:
// fileAction(fileFullPath,fileRelativePath,next(err,skip),fileStat)
fileAction: null,
// Dir Action
// a function to run against each directory, in the following format:
// dirAction(fileFullPath,fileRelativePath,next(err,skip),fileStat)
dirAction: null,
// Next
// a function to run after the entire directory has been scanned, in the following format:
// next(err,list,tree)
// the callback arguments are:
// err: null, or an error that has occured
// list: a collection of all the child nodes in a list/object format:
// {fileRelativePath: 'dir|file'}
// tree: a colleciton of all the child nodes in a tree format:
// {dir:{dir:{},file1:true}}
// if the readFiles option is true, then files will be returned with their contents instead
next: function(err, list, tree) {
return console.log('All done:', {
err: err,
list: list,
tree: tree
});
},
// Stat
// a file stat object for the path if we already have one
stat: null,
// Recurse
// a boolean for whether or not to scan subdirectories too
recurse: null,
// Read Files
// a boolean for whether or not we should read the file contents
readFiles: null,
// Ignore Hidden Files
// a boolean for if we should ignore files starting with a dot
ignoreHiddenFiles: null,
// Ignore Patterns
// a regex to match paths against to determine if we should ignore them
// if true will use balUtilPaths.commonIgnorePatterns
ignorePatterns: null
});
It accomplishes the not crashing with too many open files at once requirement, by limiting the amount of open files to 100. This is done by utilising the bal-util
dependency wrappers for common fs
calls instead, which will check if we are at our limit and if so wait until there is an opportunity. Without such a check, our node.js program would easily crash. You can see this by looking at the openFile
and closeFile
calls here: https://github.com/balupton/bal-util/blob/master/src/lib/paths.coffee#L7-45