New file |
| | |
| | | import { Client } from '@elastic/elasticsearch'; |
| | | import xlsx from "node-xlsx"; |
| | | const client = new Client({ node: 'http://localhost:9200' }); |
| | | import sqlite3 from "sqlite3"; |
| | | |
| | | async function createIndex() { |
| | | try { |
| | | if (await client.indices.exists({ index: 'books' })) { return; } |
| | | await client.indices.create({ |
| | | index: 'books', |
| | | body: { |
| | | mappings: { |
| | | properties: { |
| | | id: { type: 'keyword' }, |
| | | title: { type: 'text' }, |
| | | author: { type: 'text' }, |
| | | isbn: { type: 'text' }, |
| | | }, |
| | | }, |
| | | } |
| | | }); |
| | | } catch (e) { |
| | | console.error(e); |
| | | } |
| | | } |
| | | |
| | | function getBooksFromDb() { |
| | | return new Promise((resolve, reject) => { |
| | | const db = new sqlite3.Database("./book-list.db"); |
| | | db.all("SELECT Title as title,Author as author,ISBN as isbn FROM t_books", (err, rows) => { |
| | | if (err) { |
| | | console.error(err); |
| | | } else { |
| | | resolve(rows); |
| | | } |
| | | db.close(); |
| | | }); |
| | | }); |
| | | } |
| | | |
| | | async function indexBooks() { |
| | | const books = await getBooksFromDb(); |
| | | const bookGroups = []; |
| | | for (let i = 0; i < books.length; i += 1000) { |
| | | bookGroups.push(books.slice(i, i + 1000)); |
| | | } |
| | | |
| | | for (const bookGroup of bookGroups) { |
| | | const body = bookGroup.map(book => ([{ index: { _index: 'books', _id: book.id } }, book])).flat(); |
| | | try { |
| | | const response = await client.bulk({ body }); |
| | | console.log(response.errors); |
| | | } catch (e) { |
| | | console.error(e); |
| | | } |
| | | } |
| | | } |
| | | |
| | | async function importBooToEs() { |
| | | await createIndex(); |
| | | await indexBooks(); |
| | | } |
| | | |
| | | async function searchBook() { |
| | | const books = []; |
| | | const workSheets = xlsx.parse("./fictionnoisbn.xlsx"); |
| | | for (const sheet of workSheets) { |
| | | books.push(...sheet.data.map(row => ({ id: row[0], title: row[1], author: row[2] }))); |
| | | } |
| | | let cnt = 0; |
| | | let bookCnt = 0; |
| | | for (const book of books) { |
| | | cnt++; |
| | | if (cnt % 1000 == 0) { |
| | | console.log('当前%d', cnt); |
| | | } |
| | | if (!book.title) { continue; } |
| | | const resp = await client.search({ |
| | | index: 'books', |
| | | size: 1, |
| | | query: { |
| | | bool: book.author ? { |
| | | must: [ |
| | | { |
| | | match: { |
| | | title: { query: book.title } |
| | | } |
| | | }, |
| | | { |
| | | match: { |
| | | author: { query: book.author } |
| | | } |
| | | } |
| | | ] |
| | | } : undefined, |
| | | match: book.author ? undefined : { |
| | | 'title': book.title, |
| | | } |
| | | } |
| | | }); |
| | | if ((resp.hits.max_score ?? 0) < 25) { continue; } |
| | | const isbn = resp.hits.hits[0]?._source?.isbn; |
| | | if (isbn) { |
| | | book.isbn = isbn; |
| | | book.title2 = resp.hits.hits[0]?._source?.title; |
| | | book.author2 = resp.hits.hits[0]?._source?.author; |
| | | } |
| | | bookCnt++; |
| | | if (bookCnt % 1000 == 0) { |
| | | console.log('已匹配:%s', bookCnt); |
| | | } |
| | | } |
| | | console.log(bookCnt); |
| | | saveToDb(books); |
| | | } |
| | | |
| | | async function saveToDb(books) { |
| | | const db = new sqlite3.Database("./book-list-result.db"); |
| | | db.serialize(function () { |
| | | db.run("CREATE TABLE IF NOT EXISTS t_books (id TEXT PRIMARY KEY, Title TEXT, Author TEXT, ISBN TEXT, Title2 TEXT, Author2 TEXT)"); |
| | | db.run("BEGIN TRANSACTION"); |
| | | const stmt = db.prepare("INSERT INTO t_books (id, Title, Author, ISBN, Title2, Author2) VALUES (?,?,?,?,?,?)"); |
| | | for (const book of books.filter(book => book.isbn)) { |
| | | stmt.run([book.id, book.title, book.author, book.isbn, book.title2, book.author2]); |
| | | } |
| | | stmt.finalize(); |
| | | db.run("COMMIT"); |
| | | db.close(); |
| | | }); |
| | | } |
| | | |
| | | // importBooToEs(); |
| | | searchBook(); |