From 2db5b8679770d0e2542d9fae8f276b28eb9ed82c Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期二, 11 六月 2024 21:24:12 +0800 Subject: [PATCH] 增加图书清单下载脚本 --- src/book-list-download.mjs | 113 +++++++++++++++++++++++++++++++++++++ src/main.mjs | 2 package.json | 6 +- README.md | 2 4 files changed, 118 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9a26d05..f9762ec 100644 --- a/README.md +++ b/README.md @@ -30,5 +30,5 @@ # 鍚姩涓嬭浇 鍚姩鍛戒护锛� ```shell -yarn dev +yarn download ``` \ No newline at end of file diff --git a/package.json b/package.json index ae8994e..2d513ff 100644 --- a/package.json +++ b/package.json @@ -5,8 +5,8 @@ "license": "MIT", "type": "module", "scripts": { - "dev": "node src/main.mjs", - "build": "ncc build src/main.mjs -o dist" + "download": "node src/main.mjs", + "book-list": "node src/book-list-download.mjs" }, "devDependencies": {}, "dependencies": { @@ -15,4 +15,4 @@ "node-xlsx": "^0.24.0", "selenium-webdriver": "^4.21.0" } -} +} \ No newline at end of file diff --git a/src/book-list-download.mjs b/src/book-list-download.mjs new file mode 100644 index 0000000..d07ae42 --- /dev/null +++ b/src/book-list-download.mjs @@ -0,0 +1,113 @@ +import * as fs from "fs"; +import xlsx from "node-xlsx"; +import axios from "axios"; +import { HttpsProxyAgent } from "https-proxy-agent"; + +/* ----------axios浠g悊------------ */ +const httpsAgent = new HttpsProxyAgent(`http://127.0.0.1:10809`); +const myAxios = axios.create({ + proxy: false, + httpsAgent, +}); + +/* -----------鏃ュ織--------------- */ +let logFile; +function initLogger() { + const _log = console.log; + if (!fs.existsSync('./logs')) { + fs.mkdirSync('./logs', { recursive: true }); + } + logFile = fs.createWriteStream(`./logs/book-list-logs.log`, { flags: 'a', encoding: 'utf8' }); + console.log = function (...text) { + text = `${new Date().toLocaleString()} ${text.join(' ') ?? ''}`; + _log(text); + logFile.write(text + '\n'); + }; +} + +/** + * 鑾峰彇闅忔満鍊� + * @param {number} min 鏈�灏忓�� + * @param {number} max 鏈�澶у�� + * @returns 闅忔満鍊� + */ +function getRandomNumber(min, max) { + return Math.random() * (max - min) + min; +} + +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +function genYears(start, end) { + return Array.from({ length: end - start + 1 }, (_, i) => start + i); +} + +async function retry(func, maxTry = 3, delay = 3000) { + try { + return await func(); + } catch (e) { + if (maxTry > 0) { + await sleep(delay); + return await retry(func, maxTry - 1, delay); + } else { + throw e; + } + } +} + +async function getBookList(pageSize, page, code) { + const url = `https://archive.org/services/search/beta/page_production/?user_query=&page_type=collection_details&page_target=books&hits_per_page=${pageSize}&page=${page}&filter_map=%7B%22year%22%3A%7B%222023%22%3A%22gte%22%2C%222024%22%3A%22lte%22%7D%2C%22firstTitle%22%3A%7B%22${code}%22%3A%22inc%22%7D%7D&sort=titleSorter%3Aasc&aggregations=false&uid=R%3A1e845903aec74dee14bd-S%3A8cde5bf234b86bf96a75-P%3A1-K%3Ah-T%3A1718106108852`; + return await myAxios.get(url); +} + +async function main() { + const years = genYears(2023, 2024); + const codeList = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]; + const bookList = [["id", "isbn", "涓绘爣棰�", "鍓爣棰�", "浣滆��", "鍑虹増绀�", "鍑虹増鏃堕棿", "涓浘娉曞垎绫�", "鏍煎紡", "璇█", "绠�浠�", "椤垫暟"]]; + try { + for (const year of years) { + for (const code of codeList) { + let page = 1; + const pageSize = 100; + let total = 0; + do { + console.log(`姝e湪鑾峰彇 ${year} 骞� ${code} 鍒嗙被 ${page} 椤礰); + const resp = await retry(() => getBookList(pageSize, page, code)).catch((e) => { + console.log(`鑾峰彇澶辫触锛�${year} 骞� ${code} 鍒嗙被 ${page} 椤礰); + });; + if (!resp) { + continue; + } + const { total: _total, hits } = resp.data.response.body.hits + total = _total; + for (const hit of hits) { + const { identifier, title, creator } = hit.fields + const author = creator?.join(", "); + console.log(`${identifier} | ${title} | ${author??''}`); + bookList.push([identifier, "", title, "", author, "", "", "", "", "English"]); + } + page++; + await sleep(getRandomNumber(3000, 80000)); + + } while (pageSize * page < total); + } + } + } catch (e) { + console.error(e); + } + if (bookList.length === 1) { + console.log(`鑾峰彇瀹屾垚锛�${years[0]}-${years[years.length - 1]} A-Z锛岃幏鍙栧埌涔︾睄 ${bookList.length - 1} 鏉); + // 淇濆瓨鍒癊xcel + const buffer = xlsx.build([{ name: "Sheet1", data: bookList }]); + fs.writeFileSync(`./涔﹀崟 ${years[0]}-${years[years.length - 1]} A-Z.xlsx`, buffer, (err) => { }); + } else { + console.log("娌℃湁鑾峰彇鍒颁功绫�"); + } +} + +initLogger(); +main() + .finally(() => { + logFile.close(); + }); \ No newline at end of file diff --git a/src/main.mjs b/src/main.mjs index a53edc5..822d5c7 100644 --- a/src/main.mjs +++ b/src/main.mjs @@ -388,7 +388,7 @@ } const buffer = xlsx.build([{ name: "Sheet1", data }]); - fs.writeFile("./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx", buffer, (err) => { }); + fs.writeFileSync("./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx", buffer, (err) => { }); console.log("淇濆瓨瀹屾垚: ./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx"); } -- Gitblit v1.9.1