From b1e4e03c682ecff03aa6a6045eea234082acbd59 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期二, 11 六月 2024 23:10:02 +0800 Subject: [PATCH] 修改文件下载超时时长为10分钟 --- src/main.mjs | 86 ++++++++++++++++++++++++++++++++++--------- 1 files changed, 68 insertions(+), 18 deletions(-) diff --git a/src/main.mjs b/src/main.mjs index 0e897a6..cc3d0e5 100644 --- a/src/main.mjs +++ b/src/main.mjs @@ -4,7 +4,8 @@ import proxy from "selenium-webdriver/proxy.js"; import axios from "axios"; import * as fs from "fs"; -import { Worker, isMainThread, parentPort, workerData } from 'worker_threads'; +import path from "path"; +import { Worker, isMainThread, parentPort, workerData, threadId } from 'worker_threads'; import { HttpsProxyAgent } from "https-proxy-agent"; import { resolve } from "path"; @@ -18,7 +19,7 @@ if (!fs.existsSync('./logs')) { fs.mkdirSync('./logs', { recursive: true }); } - logFile = fs.createWriteStream(`./logs/logs-${config.startRow}-${config.endRow}.log`, { flags: 'a', encoding: 'utf8' }); + logFile = fs.createWriteStream(`./logs/logs-${config.startRow}-${config.endRow}-thread${threadId}.log`, { flags: 'a', encoding: 'utf8' }); console.log = function (...text) { text = `${new Date().toLocaleString()} ${text.join(' ') ?? ''}`; _log(text); @@ -258,8 +259,13 @@ return; } await retry(() => { + const timeoutTime = 10 * 60 * 1000; + const source = axios.CancelToken.source(); + const timeout = setTimeout(() => { + source.cancel("timeout"); + }, timeoutTime); return new Promise((resolve, reject) => myAxios - .get(url, { responseType: "stream" }) + .get(url, { responseType: "stream", timeout: timeoutTime, cancelToken: source.token }) .then((response) => { const len = response.headers['content-length']; if (ext !== "pdf" && ext !== "txt" && len > 200 * 1024 * 1024) { @@ -274,6 +280,7 @@ const out = fs.createWriteStream(filepath); stream.pipe(out); stream.on("end", () => { + clearTimeout(timeout); book.state = `涓嬭浇瀹屾垚`; book.format = ext; book.file = filepath; @@ -282,6 +289,7 @@ resolve(true); }); stream.on("error", (err) => { + clearTimeout(timeout); console.error(err); book.state = "涓嬭浇澶辫触"; book.url = url; @@ -296,6 +304,7 @@ }); }) .catch((e) => { + clearTimeout(timeout); console.error(e); book.state = "涓嬭浇澶辫触"; book.url = url; @@ -307,14 +316,43 @@ }); } +function isAlreadyDownloaded(book) { + const id = `${book.id} ${book.isbn}`; + return alreadyDownloadedBooks.includes(id); +} + +function nextBook() { + return new Promise(resolve => { + const cb = (message) => { + if (message.type === 'book') { + resolve(message.data); + parentPort.removeListener('message', cb); + } + }; + parentPort.on('message', cb); + parentPort.postMessage({ type: 'get-book', threadId }); + + }); +} + async function downloadBooks(books) { driver = await createDriver(); - for (const book of books) { + + for (; ;) { + const book = await nextBook(); + if (!book) { + break; + } + books.push(book); if (config.endOfTime && Date.now() - startTime > 1000 * 60 * config.endOfTime) { // 瀹氭椂閫�鍑� break; } bookCount++; + if (isAlreadyDownloaded(book)) { + skipCount++; + continue; + } if (book.state && (book.state === "娌℃湁鎼滅储缁撴灉" || book.state === "娌℃湁pdf鎴杢ext鏂囦欢" || book.state === "涓嬭浇瀹屾垚")) { // 璺宠繃娌℃湁鎼滅储缁撴灉鎴栨病鏈塸df鎴杢ext鏂囦欢鐨勪功绫� skipCount++; @@ -343,14 +381,14 @@ continue; } // 绛変竴娈垫椂闂村啀鎵撳紑璇︽儏椤� - sleep(getRandomNumber(3000, 10000)); + sleep(getRandomNumber(1000, 30000)); // 鎵撳紑璇︽儏椤� await openBookDetailPage(book, detailPageUrl); // 鑾峰彇涓嬭浇閾炬帴 const url = await getDownloadUrl(book); if (!url) { continue; } // 绛夊緟涓�娈垫椂闂村啀涓嬭浇 - await sleep(getRandomNumber(3000, 10000)); + await sleep(getRandomNumber(1000, 30000)); // 涓嬭浇鏂囦欢 try { await downloadFile(book, url); @@ -358,7 +396,7 @@ } catch (e) { } successCount++; // 绛変竴娈垫椂闂村啀涓嬩竴涓� - sleep(getRandomNumber(3000, 10000)); + sleep(getRandomNumber(1000, 30000)); } } @@ -378,7 +416,7 @@ } const buffer = xlsx.build([{ name: "Sheet1", data }]); - fs.writeFile("./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx", buffer, (err) => { }); + fs.writeFileSync("./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx", buffer, (err) => { }); console.log("淇濆瓨瀹屾垚: ./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx"); } @@ -416,9 +454,19 @@ let skipCount = 0; // chrome椹卞姩 let driver; +let alreadyDownloadedBooks = []; + +function getAlreadyDownloadedBooks() { + const text = fs.readFileSync('./alreadyDownloadedBooks.txt', 'utf-8'); + const books = text.replace(/\r/g, '').split('\n').map(it => it.trim()).filter(it => it); + const files = fs.readdirSync('./downloads'); + books.push(...files); + return books.map(it => path.basename(it, path.extname(it)).trim()); +} + function main() { initLogger(); - const books = getBooksFromExcel(config.startRow, config.endRow); + const books = []; downloadBooks(books) .then(() => { console.log(`鍏ㄩ儴瀹屾垚锛屽叡涓嬭浇${bookCount}鏈紝鎴愬姛涓嬭浇${successCount}鏈紝璺宠繃${skipCount}鏈紝澶辫触${bookCount - skipCount - successCount}鏈紝鑰楁椂锛� ${msFormat(Date.now() - startTime)}銆俙); @@ -440,32 +488,34 @@ // 澶氳繘绋嬫墽琛� if (isMainThread) { initLogger(); - console.log(`绾跨▼鏁帮細${config.threadSize}, 寮�濮嬭锛�${config.startRow}, 缁撴潫琛岋細${config.endRow}`); - let startRow = config.startRow; - let endRow = config.endRow; + const alreadyDownloadedBooks = getAlreadyDownloadedBooks(); + const { startRow, endRow, threadSize } = config; + console.log(`绾跨▼鏁帮細${threadSize}, 寮�濮嬭锛�${startRow}, 缁撴潫琛岋細${endRow}`); let finishCnt = 0; const finishBooks = []; - const threadSize = config.threadSize; - const thBookSize = (endRow - startRow) / threadSize + const thBookSize = (endRow - startRow) / threadSize; + const books = getBooksFromExcel(startRow, endRow); + for (let sr = startRow; sr < endRow; sr += thBookSize) { let er = sr + thBookSize; if (er > endRow) { er = endRow; } - const worker = new Worker("./src/main.mjs", { workerData: { startRow: sr, endRow: er } }); + const worker = new Worker("./src/main.mjs", { workerData: { startRow: sr, endRow: er, alreadyDownloadedBooks } }); worker.on("message", (message) => { if (message.type === 'books') { finishBooks.push(...message.data); finishCnt++; - if (finishCnt >= config.threadSize) { + if (finishCnt >= threadSize) { saveBooks(finishBooks); } + } else if (message.type === 'get-book') { + worker.postMessage({ type: "book", data: books.shift() }); } }); } } else { - config.startRow = workerData.startRow; - config.endRow = workerData.endRow; + alreadyDownloadedBooks = workerData.alreadyDownloadedBooks; main(); } -- Gitblit v1.9.1