| | |
| | | import proxy from "selenium-webdriver/proxy.js"; |
| | | import axios from "axios"; |
| | | import * as fs from "fs"; |
| | | import path from "path"; |
| | | import { Worker, isMainThread, parentPort, workerData } from 'worker_threads'; |
| | | import { HttpsProxyAgent } from "https-proxy-agent"; |
| | | import { resolve } from "path"; |
| | |
| | | }); |
| | | } |
| | | |
| | | function isAlreadyDownloaded(book) { |
| | | const id = `${book.id} ${book.isbn}`; |
| | | return alreadyDownloadedBooks.includes(id); |
| | | } |
| | | |
| | | async function downloadBooks(books) { |
| | | driver = await createDriver(); |
| | | for (const book of books) { |
| | |
| | | break; |
| | | } |
| | | bookCount++; |
| | | if (isAlreadyDownloaded(book)) { |
| | | skipCount++; |
| | | continue; |
| | | } |
| | | if (book.state && (book.state === "没有搜索结果" || book.state === "没有pdf或text文件" || book.state === "下载完成")) { |
| | | // 跳过没有搜索结果或没有pdf或text文件的书籍 |
| | | skipCount++; |
| | |
| | | continue; |
| | | } |
| | | // 等一段时间再打开详情页 |
| | | sleep(getRandomNumber(3000, 10000)); |
| | | sleep(getRandomNumber(1000, 30000)); |
| | | // 打开详情页 |
| | | await openBookDetailPage(book, detailPageUrl); |
| | | // 获取下载链接 |
| | | const url = await getDownloadUrl(book); |
| | | if (!url) { continue; } |
| | | // 等待一段时间再下载 |
| | | await sleep(getRandomNumber(3000, 10000)); |
| | | await sleep(getRandomNumber(1000, 30000)); |
| | | // 下载文件 |
| | | try { |
| | | await downloadFile(book, url); |
| | |
| | | } catch (e) { } |
| | | successCount++; |
| | | // 等一段时间再下一个 |
| | | sleep(getRandomNumber(3000, 10000)); |
| | | sleep(getRandomNumber(1000, 30000)); |
| | | } |
| | | } |
| | | |
| | |
| | | let skipCount = 0; |
| | | // chrome驱动 |
| | | let driver; |
| | | let alreadyDownloadedBooks = []; |
| | | |
| | | function getAlreadyDownloadedBooks() { |
| | | const text = fs.readFileSync('./alreadyDownloadedBooks.txt', 'utf-8'); |
| | | const books = text.replace(/\r/g, '').split('\n').map(it => it.trim()).filter(it => it); |
| | | const files = fs.readdirSync('./downloads'); |
| | | books.push(...files); |
| | | return books.map(it => path.basename(it, path.extname(it)).trim()); |
| | | } |
| | | |
| | | function main() { |
| | | initLogger(); |
| | | const books = getBooksFromExcel(config.startRow, config.endRow); |
| | |
| | | // 多进程执行 |
| | | if (isMainThread) { |
| | | initLogger(); |
| | | const alreadyDownloadedBooks = getAlreadyDownloadedBooks(); |
| | | console.log(`线程数:${config.threadSize}, 开始行:${config.startRow}, 结束行:${config.endRow}`); |
| | | let startRow = config.startRow; |
| | | let endRow = config.endRow; |
| | |
| | | if (er > endRow) { |
| | | er = endRow; |
| | | } |
| | | const worker = new Worker("./src/main.mjs", { workerData: { startRow: sr, endRow: er } }); |
| | | const worker = new Worker("./src/main.mjs", { workerData: { startRow: sr, endRow: er, alreadyDownloadedBooks } }); |
| | | worker.on("message", (message) => { |
| | | if (message.type === 'books') { |
| | | finishBooks.push(...message.data); |
| | |
| | | } else { |
| | | config.startRow = workerData.startRow; |
| | | config.endRow = workerData.endRow; |
| | | alreadyDownloadedBooks = workerData.alreadyDownloadedBooks; |
| | | main(); |
| | | } |
| | | |