From 6a6078c5d393bffda15e682994811468ff86963e Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期二, 30 七月 2024 01:40:34 +0800 Subject: [PATCH] 增加数据库图书ISBN补全脚本 --- src/book-list-download2.mjs | 104 +++++++++++++++++++++++++++++++++++---------------- 1 files changed, 71 insertions(+), 33 deletions(-) diff --git a/src/book-list-download2.mjs b/src/book-list-download2.mjs index e43cc2f..fea054e 100644 --- a/src/book-list-download2.mjs +++ b/src/book-list-download2.mjs @@ -53,6 +53,19 @@ } } +function addBooks(books) { + db.run("begin transaction"); + for (const book of books) { + db.run("INSERT INTO t_books (Title, Author, Year, Publisher, ISBN) VALUES (?,?,?,?,?)", + [book.title, book.author, book.year, book.publisher, book.isbn], (err) => { + if (!err) { + downloadCnt++; + } + }); + } + db.run("commit"); +} + function addBook(book) { db.run("INSERT INTO t_books (Title, Author, Year, Publisher, ISBN) VALUES (?,?,?,?,?)", [book.title, book.author, book.year, book.publisher, book.isbn], (err) => { @@ -192,7 +205,7 @@ const html = cheerio.load(resp.data); const bookDivs = html('#search-results-list > div > div.col-md-8.col-xs-9.div-o'); for (const _bookDiv of bookDivs) { - const bookDiv=cheerio.load(_bookDiv) + const bookDiv = cheerio.load(_bookDiv) const book = {}; const h3 = bookDiv('h3'); if (h3) { @@ -256,6 +269,26 @@ return Math.random() * (max - min) + min; } +function importFromExcel() { + initDb(); + const file = './76w.xlsx'; + const workSheets = xlsx.parse(file); + const sheet = workSheets[0]; + sheet.data.shift(); + const books = []; + sheet.data.forEach((row) => { + const title = row[0]; + const author = row[1] + const year = row[2]; + const publisher = row[3]; + const isbn = row[4].split(',').sort((a, b) => b.length - a.length)[0]; + + books.push({ title, author, year, publisher, isbn }); + }); + addBooks(books); + closeDb(); +} + // 寮�濮嬫椂闂� const startTime = Date.now(); // 鍥句功鏁伴噺 @@ -263,7 +296,7 @@ // chrome椹卞姩 /** @type {WebDriver} */ let driver; -function main() { +function startTask() { initLogger(); getBook() .catch(e => { @@ -283,38 +316,43 @@ fs.mkdirSync('D:\\book-list-crawler-cache', { recursive: true }); } -// 澶氳繘绋嬫墽琛� -if (isMainThread) { - console.log(`绾跨▼鏁帮細${config.threadSize}`); - initDb(); - let finishCnt = 0; - const threadSize = config.threadSize; - const bookNames = fs.readFileSync('./bookNames.txt', 'utf8').replace(/\r/, '').split('\n'); - for (let i = 0; i < threadSize; i++) { - const worker = new Worker("./src/book-list-download2.mjs", { workerData: {} }); - worker.on("message", (message) => { - if (message.type === 'book') { - addBook(message.data); - } - else if (message.type === 'getBookName') { - const bookName = bookNames.shift(); - if (bookName) - console.log(bookName, `鍓╀簬锛�${bookNames.length}`); - worker.postMessage({ type: "bookName", data: bookName, threadId: message.threadId }); - } else if (message.type === 'finish') { - finishCnt++; - if (finishCnt == threadSize) { - closeDb(); - console.log(`鍏变笅杞�${downloadCnt}鏈紝鑰楁椂锛� ${msFormat(Date.now() - startTime)}銆俙); +function main() { + // 澶氳繘绋嬫墽琛� + if (isMainThread) { + console.log(`绾跨▼鏁帮細${config.threadSize}`); + initDb(); + let finishCnt = 0; + const threadSize = config.threadSize; + const bookNames = fs.readFileSync('./bookNames.txt', 'utf8').replace(/\r/, '').split('\n'); + for (let i = 0; i < threadSize; i++) { + const worker = new Worker("./src/book-list-download2.mjs", { workerData: {} }); + worker.on("message", (message) => { + if (message.type === 'book') { + addBook(message.data); } - } + else if (message.type === 'getBookName') { + const bookName = bookNames.shift(); + if (bookName) + console.log(bookName, `鍓╀簬锛�${bookNames.length}锛屽凡鑾峰彇${downloadCnt}鏈琡); + worker.postMessage({ type: "bookName", data: bookName, threadId: message.threadId }); + } else if (message.type === 'finish') { + finishCnt++; + if (finishCnt == threadSize) { + closeDb(); + console.log(`鍏变笅杞�${downloadCnt}鏈紝鑰楁椂锛� ${msFormat(Date.now() - startTime)}銆俙); + } + } + }); + } + process.on('SIGINT', () => { + closeDb(); + console.log(`杩涚▼琚墜鍔ㄧ粨鏉燂紝鍏变笅杞�${downloadCnt}鏈紝鑰楁椂锛� ${msFormat(Date.now() - startTime)}銆俙); + process.exit(0); }); + } else { + startTask(); } - process.on('SIGINT', () => { - closeDb(); - console.log(`杩涚▼琚墜鍔ㄧ粨鏉燂紝鍏变笅杞�${downloadCnt}鏈紝鑰楁椂锛� ${msFormat(Date.now() - startTime)}銆俙); - process.exit(0); - }); -} else { - main(); } + +// importFromExcel(); +main(); \ No newline at end of file -- Gitblit v1.9.1