| | |
| | | import * as fs from "fs"; |
| | | import { HttpsProxyAgent } from "https-proxy-agent"; |
| | | |
| | | /*-------------读取配置---------------*/ |
| | | let config = JSON.parse(fs.readFileSync('./config.json')); |
| | | |
| | | /* ------------日志-------------- */ |
| | | const _log = console.log; |
| | | const logFile = fs.createWriteStream('./logs.log'); |
| | | const logFile = fs.createWriteStream('./logs.log', { flags: 'a', encoding: 'utf8' }); |
| | | console.log = function (text) { |
| | | text = `${new Date().toLocaleString()} ${text ?? ''}`; |
| | | _log(text); |
| | |
| | | */ |
| | | async function createDriver() { |
| | | const opts = new ChromeOptions(); |
| | | if (config.headless) { |
| | | opts.addArguments("--headless");//开启无头模式 |
| | | } |
| | | if (config.disableGpu) { |
| | | opts.addArguments("--disable-gpu");//禁止gpu渲染 |
| | | } |
| | | opts.addArguments("--ignore-ssl-error"); // 忽略ssl错误 |
| | | opts.addArguments("--no-sandbox"); // 禁用沙盒模式 |
| | | opts.addArguments("blink-settings=imagesEnabled=false"); //禁用图片加载 |
| | |
| | | return text.replace(/[^\u4e00-\u9fa5\w \d]/g, ""); |
| | | } |
| | | |
| | | const driver = await createDriver(); |
| | | |
| | | async function sleep(ms) { |
| | | return new Promise((resolve) => { |
| | |
| | | |
| | | async function downloadFile(book, url) { |
| | | console.log(`下载文件: ${url}`); |
| | | const ext = url.split(".").pop(); |
| | | const filepath = `./downloads/${book.id} ${book.isbn}.${ext}`; |
| | | if (fs.existsSync(filepath)) { |
| | | book.state = `下载完成`; |
| | | book.format = ext; |
| | | book.file = filepath; |
| | | book.url = url; |
| | | console.log(`下载完成:${filepath}`); |
| | | return; |
| | | } |
| | | await retry(() => { |
| | | return new Promise((resolve, reject) => myAxios |
| | | .get(url, { responseType: "stream" }) |
| | | .then((response) => { |
| | | const stream = response.data; |
| | | const ext = url.split(".").pop(); |
| | | const filepath = `./downloads/${book.id} ${book.isbn}.${ext}`; |
| | | stream.pipe(fs.createWriteStream(filepath)); |
| | | const out = fs.createWriteStream(filepath); |
| | | stream.pipe(out); |
| | | stream.on("end", () => { |
| | | book.state = `下载完成`; |
| | | book.format = ext; |
| | |
| | | book.url = url; |
| | | console.log(`下载完成:${filepath}`); |
| | | resolve(true); |
| | | }); |
| | | stream.on("error", (err) => { |
| | | console.error(err); |
| | | book.state = "下载失败"; |
| | | book.url = url; |
| | | console.log(`下载失败: ${book.id} ${book.title}`); |
| | | reject(false); |
| | | try { |
| | | out.close(); |
| | | fs.unlink(filepath,(e)=>console.error(e)); |
| | | } catch (e) { |
| | | console.error(e); |
| | | } |
| | | }); |
| | | }) |
| | | .catch((e) => { |
| | |
| | | console.log(`下载失败: ${book.id} ${book.title}`); |
| | | reject(false); |
| | | })); |
| | | }).catch(e => { |
| | | return false |
| | | }); |
| | | } |
| | | |
| | | async function downloadBooks(books) { |
| | | for (const book of books) { |
| | | if (book.state && (book.state === "没有搜索结果" || book.state === "没有pdf或text文件")) { |
| | | bookCount++; |
| | | if (book.state && (book.state === "没有搜索结果" || book.state === "没有pdf或text文件" || book.state === "下载完成")) { |
| | | // 跳过没有搜索结果或没有pdf或text文件的书籍 |
| | | skipCount++; |
| | | continue; |
| | | } |
| | | bookCount++; |
| | | console.log(`开始下载: ${book.id} ${book.title}`); |
| | | // 打开搜索页面并搜索 |
| | | if (!await openSearchPage(book)) { |
| | |
| | | // 等待一段时间再下载 |
| | | await sleep(getRandomNumber(3000, 10000)); |
| | | // 下载文件 |
| | | await downloadFile(book, url); |
| | | console.log(`下载完成: ${book.id} ${book.title}`); |
| | | try { |
| | | await downloadFile(book, url); |
| | | console.log(`下载完成: ${book.id} ${book.title}`); |
| | | }catch(e){} |
| | | successCount++; |
| | | // 等一段时间再下一个 |
| | | sleep(getRandomNumber(3000, 10000)); |
| | | } |
| | | await driver.close(); |
| | | await driver.quit(); |
| | | } |
| | | |
| | | function saveBooks(books) { |
| | | console.log("保存下载状态数据"); |
| | | const workSheets = xlsx.parse("【第二批二次处理后】交付清单.xlsx"); |
| | | const sheet = workSheets[0]; |
| | | const data = sheet.data.slice(2); |
| | | const data = sheet.data; |
| | | for (const book of books) { |
| | | const index = data.findIndex((row) => row[0] === book.id); |
| | | if (index > -1) { |
| | |
| | | let successCount = 0; |
| | | // 图书数量 |
| | | let bookCount = 0; |
| | | |
| | | // 跳过的数量,已经下载过或没有搜索到的数量 |
| | | let skipCount = 0; |
| | | const driver = await createDriver(); |
| | | function main() { |
| | | const range = JSON.parse(fs.readFileSync('./config.json')); |
| | | const books = getBooksFromExcel(range.startRow, range.endRow); |
| | | const books = getBooksFromExcel(config.startRow, config.endRow); |
| | | downloadBooks(books) |
| | | .then(() => { |
| | | console.log(`全部完成,共下载${bookCount}本,成功下载${successCount}本,失败${bookCount - successCount}本,耗时: ${msFormat(Date.now() - startTime)}。`); |
| | | console.log(`全部完成,共下载${bookCount}本,成功下载${successCount}本,跳过${skipCount}本,失败${bookCount - skipCount - successCount}本,耗时: ${msFormat(Date.now() - startTime)}。`); |
| | | }) |
| | | .finally(() => { |
| | | .catch(e => { |
| | | console.error(e); |
| | | }) |
| | | .finally(async () => { |
| | | saveBooks(books); |
| | | logFile.close(); |
| | | try { |
| | | await driver.close(); |
| | | await driver.quit(); |
| | | }catch(e){} |
| | | }); |
| | | } |
| | | |