Node.js で SQLite3 のデータのユニーク化をした
データをいろいろ整理したくて昔入ってしまった重複データを排除していた。
作成日やらをちゃんと管理するのは重要だな。
そして、 rowid
に助けられた。
普通にリクエストしたら取得できてないのは罠すぎるよ。
const path = require('path');
const sqlite3 = require('sqlite3').verbose();
class DataUniqueService {
searchSameData(db, row) {
return new Promise((resolve, reject) => {
const ALL_QUERY = `SELECT rowid FROM mediafile WHERE url = ? AND title = ? AND created_at = ?`;
db.all(ALL_QUERY, [row.url, row.title, row.created_at], (error, rows) => {
if (error) {
reject(error);
}
resolve(rows);
});
});
}
deleteByRowid(db, rowid) {
db.run(`DELETE FROM mediafile WHERE rowid = ${rowid}`);
}
deleteRows(db, rows, url, title) {
for (const r of rows) {
this.deleteByRowid(db, r.rowid);
console.log(`Deleted rowid: `, r.rowid, url, title);
}
}
run(db) {
return new Promise((resolve, reject) => {
db.serialize(() => {
db.each('SELECT * FROM mediafile', (error, row) => {
if (error) {
reject(err);
}
this.searchSameData(db, row).then((rows) => {
if (rows.length > 1) {
console.log('Duplicate data found');
rows.sort((a, b) => a.rowid - b.rowid);
rows.shift();
this.deleteRows(db, rows, row.url, row.title);
}
});
}, (error, count) => {
if (error) {
reject(error);
}
resolve();
});
});
});
}
};
class Db() {
if (dbpath) {
const absolutePath = path.resolve(dbpath);
console.log(`DBPATH: ${absolutePath}`);
this.db = new sqlite3.Database(absolutePath);
} else {
this.db = new sqlite3.Database(path.join(process.cwd(), 'mediafile.sqlite'));
this.db.serialize(() => {
this.db.run("CREATE TABLE IF NOT EXISTS mediafile (title TEXT, url TEXT, created_at INTEGER)");
});
}
this.db.serialize(() => {
this.db.run("CREATE TABLE IF NOT EXISTS version (version INTEGER UNIQUE, created_at INTEGER)");
});
}
class DataUniqueController {
constructor(dbPath) {
this.db = new Db(dbPath).db;
this.service = new DataUniqueService();
}
async main() {
console.log('DataUniqueController start');
await this.service.run(this.db);
console.log('DataUniqueController end');
}
};
module.exports = DataUniqueController;
if (require.main === module) (async () => {
if (process.argv.length < 3) {
const controller = new DataUniqueController();
await controller.main();
} else {
const dbPath = process.argv[2];
const controller = new DataUniqueController(dbPath);
await controller.main();
}
})();