From ff0570977629f2f0cfae4b0c4903fff0d7304934 Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期二, 11 六月 2024 19:25:39 +0800
Subject: [PATCH] 已下载图书判断逻辑修改,增加从文件读取已下载图书列表

---
 config.json                |    2 +-
 alreadyDownloadedBooks.txt |    0 
 src/main.mjs               |   30 ++++++++++++++++++++++++++----
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/alreadyDownloadedBooks.txt b/alreadyDownloadedBooks.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/alreadyDownloadedBooks.txt
diff --git a/config.json b/config.json
index cacd154..3a54ac3 100644
--- a/config.json
+++ b/config.json
@@ -9,7 +9,7 @@
   "threadSize": 4,
 
   "//瀹氭椂缁撴潫鏃堕棿锛屽崟浣嶅垎閽燂紝0琛ㄧず涓嶈缃畾鏃剁粨鏉熸椂闂�": "//",
-  "endOfTime": 60,
+  "endOfTime": 0,
   
   "//鏃犲ご妯″紡": "//",
   "headless": true,
diff --git a/src/main.mjs b/src/main.mjs
index 0e897a6..a53edc5 100644
--- a/src/main.mjs
+++ b/src/main.mjs
@@ -4,6 +4,7 @@
 import proxy from "selenium-webdriver/proxy.js";
 import axios from "axios";
 import * as fs from "fs";
+import path from "path";
 import { Worker, isMainThread, parentPort, workerData } from 'worker_threads';
 import { HttpsProxyAgent } from "https-proxy-agent";
 import { resolve } from "path";
@@ -307,6 +308,11 @@
   });
 }
 
+function isAlreadyDownloaded(book) {
+  const id = `${book.id} ${book.isbn}`;
+  return alreadyDownloadedBooks.includes(id);
+}
+
 async function downloadBooks(books) {
   driver = await createDriver();
   for (const book of books) {
@@ -315,6 +321,10 @@
       break;
     }
     bookCount++;
+    if (isAlreadyDownloaded(book)) {
+      skipCount++;
+      continue;
+    }
     if (book.state && (book.state === "娌℃湁鎼滅储缁撴灉" || book.state === "娌℃湁pdf鎴杢ext鏂囦欢" || book.state === "涓嬭浇瀹屾垚")) {
       // 璺宠繃娌℃湁鎼滅储缁撴灉鎴栨病鏈塸df鎴杢ext鏂囦欢鐨勪功绫�
       skipCount++;
@@ -343,14 +353,14 @@
       continue;
     }
     // 绛変竴娈垫椂闂村啀鎵撳紑璇︽儏椤�
-    sleep(getRandomNumber(3000, 10000));
+    sleep(getRandomNumber(1000, 30000));
     // 鎵撳紑璇︽儏椤�
     await openBookDetailPage(book, detailPageUrl);
     // 鑾峰彇涓嬭浇閾炬帴
     const url = await getDownloadUrl(book);
     if (!url) { continue; }
     // 绛夊緟涓�娈垫椂闂村啀涓嬭浇
-    await sleep(getRandomNumber(3000, 10000));
+    await sleep(getRandomNumber(1000, 30000));
     // 涓嬭浇鏂囦欢
     try {
       await downloadFile(book, url);
@@ -358,7 +368,7 @@
     } catch (e) { }
     successCount++;
     // 绛変竴娈垫椂闂村啀涓嬩竴涓�
-    sleep(getRandomNumber(3000, 10000));
+    sleep(getRandomNumber(1000, 30000));
   }
 }
 
@@ -416,6 +426,16 @@
 let skipCount = 0;
 // chrome椹卞姩
 let driver;
+let alreadyDownloadedBooks = [];
+
+function getAlreadyDownloadedBooks() {
+  const text = fs.readFileSync('./alreadyDownloadedBooks.txt', 'utf-8');
+  const books = text.replace(/\r/g, '').split('\n').map(it => it.trim()).filter(it => it);
+  const files = fs.readdirSync('./downloads');
+  books.push(...files);
+  return books.map(it => path.basename(it, path.extname(it)).trim());
+}
+
 function main() {
   initLogger();
   const books = getBooksFromExcel(config.startRow, config.endRow);
@@ -440,6 +460,7 @@
 // 澶氳繘绋嬫墽琛�
 if (isMainThread) {
   initLogger();
+  const alreadyDownloadedBooks = getAlreadyDownloadedBooks();
   console.log(`绾跨▼鏁帮細${config.threadSize}, 寮�濮嬭锛�${config.startRow}, 缁撴潫琛岋細${config.endRow}`);
   let startRow = config.startRow;
   let endRow = config.endRow;
@@ -452,7 +473,7 @@
     if (er > endRow) {
       er = endRow;
     }
-    const worker = new Worker("./src/main.mjs", { workerData: { startRow: sr, endRow: er } });
+    const worker = new Worker("./src/main.mjs", { workerData: { startRow: sr, endRow: er, alreadyDownloadedBooks } });
     worker.on("message", (message) => {
       if (message.type === 'books') {
         finishBooks.push(...message.data);
@@ -466,6 +487,7 @@
 } else {
   config.startRow = workerData.startRow;
   config.endRow = workerData.endRow;
+  alreadyDownloadedBooks = workerData.alreadyDownloadedBooks;
   main();
 }
 

--
Gitblit v1.9.1