| | |
| | | } |
| | | } |
| | | |
| | | function addBooks(books) { |
| | | db.run("begin transaction"); |
| | | for (const book of books) { |
| | | db.run("INSERT INTO t_books (Title, Author, Year, Publisher, ISBN) VALUES (?,?,?,?,?)", |
| | | [book.title, book.author, book.year, book.publisher, book.isbn], (err) => { |
| | | if (!err) { |
| | | downloadCnt++; |
| | | } |
| | | }); |
| | | } |
| | | db.run("commit"); |
| | | } |
| | | |
| | | function addBook(book) { |
| | | db.run("INSERT INTO t_books (Title, Author, Year, Publisher, ISBN) VALUES (?,?,?,?,?)", |
| | | [book.title, book.author, book.year, book.publisher, book.isbn], (err) => { |
| | |
| | | const html = cheerio.load(resp.data); |
| | | const bookDivs = html('#search-results-list > div > div.col-md-8.col-xs-9.div-o'); |
| | | for (const _bookDiv of bookDivs) { |
| | | const bookDiv=cheerio.load(_bookDiv) |
| | | const bookDiv = cheerio.load(_bookDiv) |
| | | const book = {}; |
| | | const h3 = bookDiv('h3'); |
| | | if (h3) { |
| | |
| | | return Math.random() * (max - min) + min; |
| | | } |
| | | |
| | | function importFromExcel() { |
| | | initDb(); |
| | | const file = './76w.xlsx'; |
| | | const workSheets = xlsx.parse(file); |
| | | const sheet = workSheets[0]; |
| | | sheet.data.shift(); |
| | | const books = []; |
| | | sheet.data.forEach((row) => { |
| | | const title = row[0]; |
| | | const author = row[1] |
| | | const year = row[2]; |
| | | const publisher = row[3]; |
| | | const isbn = row[4].split(',').sort((a, b) => b.length - a.length)[0]; |
| | | |
| | | books.push({ title, author, year, publisher, isbn }); |
| | | }); |
| | | addBooks(books); |
| | | closeDb(); |
| | | } |
| | | |
| | | // 开始时间 |
| | | const startTime = Date.now(); |
| | | // 图书数量 |
| | |
| | | // chrome驱动 |
| | | /** @type {WebDriver} */ |
| | | let driver; |
| | | function main() { |
| | | function startTask() { |
| | | initLogger(); |
| | | getBook() |
| | | .catch(e => { |
| | |
| | | fs.mkdirSync('D:\\book-list-crawler-cache', { recursive: true }); |
| | | } |
| | | |
| | | // 多进程执行 |
| | | if (isMainThread) { |
| | | console.log(`线程数:${config.threadSize}`); |
| | | initDb(); |
| | | let finishCnt = 0; |
| | | const threadSize = config.threadSize; |
| | | const bookNames = fs.readFileSync('./bookNames.txt', 'utf8').replace(/\r/, '').split('\n'); |
| | | for (let i = 0; i < threadSize; i++) { |
| | | const worker = new Worker("./src/book-list-download2.mjs", { workerData: {} }); |
| | | worker.on("message", (message) => { |
| | | if (message.type === 'book') { |
| | | addBook(message.data); |
| | | } |
| | | else if (message.type === 'getBookName') { |
| | | const bookName = bookNames.shift(); |
| | | if (bookName) |
| | | console.log(bookName, `剩于:${bookNames.length}`); |
| | | worker.postMessage({ type: "bookName", data: bookName, threadId: message.threadId }); |
| | | } else if (message.type === 'finish') { |
| | | finishCnt++; |
| | | if (finishCnt == threadSize) { |
| | | closeDb(); |
| | | console.log(`共下载${downloadCnt}本,耗时: ${msFormat(Date.now() - startTime)}。`); |
| | | function main() { |
| | | // 多进程执行 |
| | | if (isMainThread) { |
| | | console.log(`线程数:${config.threadSize}`); |
| | | initDb(); |
| | | let finishCnt = 0; |
| | | const threadSize = config.threadSize; |
| | | const bookNames = fs.readFileSync('./bookNames.txt', 'utf8').replace(/\r/, '').split('\n'); |
| | | for (let i = 0; i < threadSize; i++) { |
| | | const worker = new Worker("./src/book-list-download2.mjs", { workerData: {} }); |
| | | worker.on("message", (message) => { |
| | | if (message.type === 'book') { |
| | | addBook(message.data); |
| | | } |
| | | } |
| | | else if (message.type === 'getBookName') { |
| | | const bookName = bookNames.shift(); |
| | | if (bookName) |
| | | console.log(bookName, `剩于:${bookNames.length},已获取${downloadCnt}本`); |
| | | worker.postMessage({ type: "bookName", data: bookName, threadId: message.threadId }); |
| | | } else if (message.type === 'finish') { |
| | | finishCnt++; |
| | | if (finishCnt == threadSize) { |
| | | closeDb(); |
| | | console.log(`共下载${downloadCnt}本,耗时: ${msFormat(Date.now() - startTime)}。`); |
| | | } |
| | | } |
| | | }); |
| | | } |
| | | process.on('SIGINT', () => { |
| | | closeDb(); |
| | | console.log(`进程被手动结束,共下载${downloadCnt}本,耗时: ${msFormat(Date.now() - startTime)}。`); |
| | | process.exit(0); |
| | | }); |
| | | } else { |
| | | startTask(); |
| | | } |
| | | process.on('SIGINT', () => { |
| | | closeDb(); |
| | | console.log(`进程被手动结束,共下载${downloadCnt}本,耗时: ${msFormat(Date.now() - startTime)}。`); |
| | | process.exit(0); |
| | | }); |
| | | } else { |
| | | main(); |
| | | } |
| | | |
| | | // importFromExcel(); |
| | | main(); |