import { Client } from '@elastic/elasticsearch';
|
import xlsx from "node-xlsx";
|
const client = new Client({ node: 'http://localhost:9200' });
|
import sqlite3 from "sqlite3";
|
|
async function createIndex() {
|
try {
|
if (await client.indices.exists({ index: 'books' })) { return; }
|
await client.indices.create({
|
index: 'books',
|
body: {
|
mappings: {
|
properties: {
|
id: { type: 'keyword' },
|
title: { type: 'text' },
|
author: { type: 'text' },
|
isbn: { type: 'text' },
|
},
|
},
|
}
|
});
|
} catch (e) {
|
console.error(e);
|
}
|
}
|
|
function getBooksFromDb() {
|
return new Promise((resolve, reject) => {
|
const db = new sqlite3.Database("./book-list.db");
|
db.all("SELECT Title as title,Author as author,ISBN as isbn FROM t_books", (err, rows) => {
|
if (err) {
|
console.error(err);
|
} else {
|
resolve(rows);
|
}
|
db.close();
|
});
|
});
|
}
|
|
async function indexBooks() {
|
const books = await getBooksFromDb();
|
const bookGroups = [];
|
for (let i = 0; i < books.length; i += 1000) {
|
bookGroups.push(books.slice(i, i + 1000));
|
}
|
|
for (const bookGroup of bookGroups) {
|
const body = bookGroup.map(book => ([{ index: { _index: 'books', _id: book.id } }, book])).flat();
|
try {
|
const response = await client.bulk({ body });
|
console.log(response.errors);
|
} catch (e) {
|
console.error(e);
|
}
|
}
|
}
|
|
async function importBooToEs() {
|
await createIndex();
|
await indexBooks();
|
}
|
|
async function searchBook() {
|
const books = [];
|
const workSheets = xlsx.parse("./fictionnoisbn.xlsx");
|
for (const sheet of workSheets) {
|
books.push(...sheet.data.map(row => ({ id: row[0], title: row[1], author: row[2] })));
|
}
|
let cnt = 0;
|
let bookCnt = 0;
|
for (const book of books) {
|
cnt++;
|
if (cnt % 1000 == 0) {
|
console.log('当前%d', cnt);
|
}
|
if (!book.title) { continue; }
|
const resp = await client.search({
|
index: 'books',
|
size: 1,
|
query: {
|
bool: book.author ? {
|
must: [
|
{
|
match: {
|
title: { query: book.title }
|
}
|
},
|
{
|
match: {
|
author: { query: book.author }
|
}
|
}
|
]
|
} : undefined,
|
match: book.author ? undefined : {
|
'title': book.title,
|
}
|
}
|
});
|
if ((resp.hits.max_score ?? 0) < 25) { continue; }
|
const isbn = resp.hits.hits[0]?._source?.isbn;
|
if (isbn) {
|
book.isbn = isbn;
|
book.title2 = resp.hits.hits[0]?._source?.title;
|
book.author2 = resp.hits.hits[0]?._source?.author;
|
}
|
bookCnt++;
|
if (bookCnt % 1000 == 0) {
|
console.log('已匹配:%s', bookCnt);
|
}
|
}
|
console.log(bookCnt);
|
saveToDb(books);
|
}
|
|
async function saveToDb(books) {
|
const db = new sqlite3.Database("./book-list-result.db");
|
db.serialize(function () {
|
db.run("CREATE TABLE IF NOT EXISTS t_books (id TEXT PRIMARY KEY, Title TEXT, Author TEXT, ISBN TEXT, Title2 TEXT, Author2 TEXT)");
|
db.run("BEGIN TRANSACTION");
|
const stmt = db.prepare("INSERT INTO t_books (id, Title, Author, ISBN, Title2, Author2) VALUES (?,?,?,?,?,?)");
|
for (const book of books.filter(book => book.isbn)) {
|
stmt.run([book.id, book.title, book.author, book.isbn, book.title2, book.author2]);
|
}
|
stmt.finalize();
|
db.run("COMMIT");
|
db.close();
|
});
|
}
|
|
// importBooToEs();
|
searchBook();
|