From ff0570977629f2f0cfae4b0c4903fff0d7304934 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期二, 11 六月 2024 19:25:39 +0800 Subject: [PATCH] 已下载图书判断逻辑修改,增加从文件读取已下载图书列表 --- src/main.mjs | 30 ++++++++++++++++++++++++++---- 1 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/main.mjs b/src/main.mjs index 0e897a6..a53edc5 100644 --- a/src/main.mjs +++ b/src/main.mjs @@ -4,6 +4,7 @@ import proxy from "selenium-webdriver/proxy.js"; import axios from "axios"; import * as fs from "fs"; +import path from "path"; import { Worker, isMainThread, parentPort, workerData } from 'worker_threads'; import { HttpsProxyAgent } from "https-proxy-agent"; import { resolve } from "path"; @@ -307,6 +308,11 @@ }); } +function isAlreadyDownloaded(book) { + const id = `${book.id} ${book.isbn}`; + return alreadyDownloadedBooks.includes(id); +} + async function downloadBooks(books) { driver = await createDriver(); for (const book of books) { @@ -315,6 +321,10 @@ break; } bookCount++; + if (isAlreadyDownloaded(book)) { + skipCount++; + continue; + } if (book.state && (book.state === "娌℃湁鎼滅储缁撴灉" || book.state === "娌℃湁pdf鎴杢ext鏂囦欢" || book.state === "涓嬭浇瀹屾垚")) { // 璺宠繃娌℃湁鎼滅储缁撴灉鎴栨病鏈塸df鎴杢ext鏂囦欢鐨勪功绫� skipCount++; @@ -343,14 +353,14 @@ continue; } // 绛変竴娈垫椂闂村啀鎵撳紑璇︽儏椤� - sleep(getRandomNumber(3000, 10000)); + sleep(getRandomNumber(1000, 30000)); // 鎵撳紑璇︽儏椤� await openBookDetailPage(book, detailPageUrl); // 鑾峰彇涓嬭浇閾炬帴 const url = await getDownloadUrl(book); if (!url) { continue; } // 绛夊緟涓�娈垫椂闂村啀涓嬭浇 - await sleep(getRandomNumber(3000, 10000)); + await sleep(getRandomNumber(1000, 30000)); // 涓嬭浇鏂囦欢 try { await downloadFile(book, url); @@ -358,7 +368,7 @@ } catch (e) { } successCount++; // 绛変竴娈垫椂闂村啀涓嬩竴涓� - sleep(getRandomNumber(3000, 10000)); + sleep(getRandomNumber(1000, 30000)); } } @@ -416,6 +426,16 @@ let skipCount = 0; // chrome椹卞姩 let driver; +let alreadyDownloadedBooks = []; + +function getAlreadyDownloadedBooks() { + const text = fs.readFileSync('./alreadyDownloadedBooks.txt', 'utf-8'); + const books = text.replace(/\r/g, '').split('\n').map(it => it.trim()).filter(it => it); + const files = fs.readdirSync('./downloads'); + books.push(...files); + return books.map(it => path.basename(it, path.extname(it)).trim()); +} + function main() { initLogger(); const books = getBooksFromExcel(config.startRow, config.endRow); @@ -440,6 +460,7 @@ // 澶氳繘绋嬫墽琛� if (isMainThread) { initLogger(); + const alreadyDownloadedBooks = getAlreadyDownloadedBooks(); console.log(`绾跨▼鏁帮細${config.threadSize}, 寮�濮嬭锛�${config.startRow}, 缁撴潫琛岋細${config.endRow}`); let startRow = config.startRow; let endRow = config.endRow; @@ -452,7 +473,7 @@ if (er > endRow) { er = endRow; } - const worker = new Worker("./src/main.mjs", { workerData: { startRow: sr, endRow: er } }); + const worker = new Worker("./src/main.mjs", { workerData: { startRow: sr, endRow: er, alreadyDownloadedBooks } }); worker.on("message", (message) => { if (message.type === 'books') { finishBooks.push(...message.data); @@ -466,6 +487,7 @@ } else { config.startRow = workerData.startRow; config.endRow = workerData.endRow; + alreadyDownloadedBooks = workerData.alreadyDownloadedBooks; main(); } -- Gitblit v1.9.1