There are some crawlers in NPM / Github, one of them is the simplecrawler
I usually use.
One script I use is:
var domain = 'http://teu.dominio.com/';
var Crawler = require("simplecrawler");
var fs = require('node-fs');
var url = require('url');
var path = require("path");
new Crawler(domain).on("fetchcomplete", function(queueItem, responseBuffer, response) {
var parsed = url.parse(queueItem.url);
if (parsed.pathname === "/") {
parsed.pathname = "/index.html";
}
// Diretoria de destino
var outputDirectory = path.join(__dirname, 'tua_pasta');
var dirname = outputDirectory + parsed.pathname.replace(/\/[^\/]+$/, "");
var filepath = outputDirectory + parsed.pathname;
fs.exists(dirname, function(exists) {
if (exists) {
fs.writeFile(filepath, responseBuffer, 'utf8', function() {});
} else {
fs.mkdir(dirname, 0755, true, function() {
fs.writeFile(filepath, responseBuffer, function() {});
});
}
});
}).start();
You need to configure the domain, and the destination folder, rest is just run the file.