When I parse a page, then I get a number of titles. Each title must be checked in database and if it is not added into database, then do it.
Example list:
test
test2
test3
test4
test2
test
test5
test4
As you see, there are some duplicates.
My code:
var $ = cheerio.load(body);
$(".program-table .item").each(function() {
var titleOriginal = $(this).find(".title-original").text().match(/(.+)\s\
var movieYear = titleOriginal[2];
var movieTitle = titleOriginal[1];
connection.query("SELECT id FROM movie WHERE name = ? AND year = ?", [movieTitle, movieYear], function(err, result) {
if (err) return;
if (result.length) {
console.log('Found, ID: %s', result[0].id);
} else {
connection.query("INSERT INTO movie SET name = ?, year = ?", [movieTitle, movieYear], function(err, result) {
if (err) return;
console.log('Not found, adding into database. ID: %s', result.insertId);
});
}
});
});
Result: When database is clean or non record containing titles, the script will add all title into database with duplicates. When I start it second time, the it will show me that record with title found and it will skip.
Your .each()
is not waiting for the query to finish before moving on to the next element. You could use something like async to help manage this:
var async = require('async');
// ...
var $ = cheerio.load(body);
async.each($('.program-table .item'), function(el, cb) {
var titleOriginal = $(el).find('.title-original').text().match(/(.+)\s/)
var movieYear = titleOriginal[2];
var movieTitle = titleOriginal[1];
connection.query('SELECT id FROM movie WHERE name = ? AND year = ?',
[movieTitle, movieYear],
function(err, result) {
if (err) return cb(err);
if (result.length) {
console.log('Found, ID: %s', result[0].id);
cb();
} else {
connection.query('INSERT INTO movie SET name = ?, year = ?',
[movieTitle, movieYear],
function(err, result) {
if (err) return cb(err);
console.log('Not found, adding into database. ID: %s', result.insertId);
cb();
});
}
});
}, function(err) {
// if `err` is set, we had a database query error ...
if (err) throw err;
});
I wasn't sure what your titleOriginal
regexp was supposed to be (it looks like it was cut off in your original code), so I guessed.