From 2db5b8679770d0e2542d9fae8f276b28eb9ed82c Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期二, 11 六月 2024 21:24:12 +0800
Subject: [PATCH] 增加图书清单下载脚本

---
 src/book-list-download.mjs |  113 +++++++++++++++++++++++++++++++++++++
 src/main.mjs               |    2 
 package.json               |    6 +-
 README.md                  |    2 
 4 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 9a26d05..f9762ec 100644
--- a/README.md
+++ b/README.md
@@ -30,5 +30,5 @@
 # 鍚姩涓嬭浇
 鍚姩鍛戒护锛�
 ```shell
-yarn dev
+yarn download
 ```
\ No newline at end of file
diff --git a/package.json b/package.json
index ae8994e..2d513ff 100644
--- a/package.json
+++ b/package.json
@@ -5,8 +5,8 @@
   "license": "MIT",
   "type": "module",
   "scripts": {
-    "dev": "node src/main.mjs",
-    "build": "ncc build src/main.mjs -o dist"
+    "download": "node src/main.mjs",
+    "book-list": "node src/book-list-download.mjs"
   },
   "devDependencies": {},
   "dependencies": {
@@ -15,4 +15,4 @@
     "node-xlsx": "^0.24.0",
     "selenium-webdriver": "^4.21.0"
   }
-}
+}
\ No newline at end of file
diff --git a/src/book-list-download.mjs b/src/book-list-download.mjs
new file mode 100644
index 0000000..d07ae42
--- /dev/null
+++ b/src/book-list-download.mjs
@@ -0,0 +1,113 @@
+import * as fs from "fs";
+import xlsx from "node-xlsx";
+import axios from "axios";
+import { HttpsProxyAgent } from "https-proxy-agent";
+
+/* ----------axios浠g悊------------ */
+const httpsAgent = new HttpsProxyAgent(`http://127.0.0.1:10809`);
+const myAxios = axios.create({
+  proxy: false,
+  httpsAgent,
+});
+
+/* -----------鏃ュ織--------------- */
+let logFile;
+function initLogger() {
+  const _log = console.log;
+  if (!fs.existsSync('./logs')) {
+    fs.mkdirSync('./logs', { recursive: true });
+  }
+  logFile = fs.createWriteStream(`./logs/book-list-logs.log`, { flags: 'a', encoding: 'utf8' });
+  console.log = function (...text) {
+    text = `${new Date().toLocaleString()} ${text.join(' ') ?? ''}`;
+    _log(text);
+    logFile.write(text + '\n');
+  };
+}
+
+/**
+ * 鑾峰彇闅忔満鍊�
+ * @param {number} min 鏈�灏忓��
+ * @param {number} max 鏈�澶у��
+ * @returns 闅忔満鍊�
+ */
+function getRandomNumber(min, max) {
+  return Math.random() * (max - min) + min;
+}
+
+function sleep(ms) {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+function genYears(start, end) {
+  return Array.from({ length: end - start + 1 }, (_, i) => start + i);
+}
+
+async function retry(func, maxTry = 3, delay = 3000) {
+  try {
+    return await func();
+  } catch (e) {
+    if (maxTry > 0) {
+      await sleep(delay);
+      return await retry(func, maxTry - 1, delay);
+    } else {
+      throw e;
+    }
+  }
+}
+
+async function getBookList(pageSize, page, code) {
+  const url = `https://archive.org/services/search/beta/page_production/?user_query=&page_type=collection_details&page_target=books&hits_per_page=${pageSize}&page=${page}&filter_map=%7B%22year%22%3A%7B%222023%22%3A%22gte%22%2C%222024%22%3A%22lte%22%7D%2C%22firstTitle%22%3A%7B%22${code}%22%3A%22inc%22%7D%7D&sort=titleSorter%3Aasc&aggregations=false&uid=R%3A1e845903aec74dee14bd-S%3A8cde5bf234b86bf96a75-P%3A1-K%3Ah-T%3A1718106108852`;
+  return await myAxios.get(url);
+}
+
+async function main() {
+  const years = genYears(2023, 2024);
+  const codeList = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"];
+  const bookList = [["id", "isbn", "涓绘爣棰�", "鍓爣棰�", "浣滆��", "鍑虹増绀�", "鍑虹増鏃堕棿", "涓浘娉曞垎绫�", "鏍煎紡", "璇█", "绠�浠�", "椤垫暟"]];
+  try {
+    for (const year of years) {
+      for (const code of codeList) {
+        let page = 1;
+        const pageSize = 100;
+        let total = 0;
+        do {
+          console.log(`姝e湪鑾峰彇 ${year} 骞� ${code} 鍒嗙被 ${page} 椤礰);
+          const resp = await retry(() => getBookList(pageSize, page, code)).catch((e) => {
+            console.log(`鑾峰彇澶辫触锛�${year} 骞� ${code} 鍒嗙被 ${page} 椤礰);
+          });;
+          if (!resp) {
+            continue;
+          }
+          const { total: _total, hits } = resp.data.response.body.hits
+          total = _total;
+          for (const hit of hits) {
+            const { identifier, title, creator } = hit.fields
+            const author = creator?.join(", ");
+            console.log(`${identifier} | ${title} | ${author??''}`);
+            bookList.push([identifier, "", title, "", author, "", "", "", "", "English"]);
+          }
+          page++;
+          await sleep(getRandomNumber(3000, 80000));
+
+        } while (pageSize * page < total);
+      }
+    }
+  } catch (e) {
+    console.error(e);
+  }
+  if (bookList.length === 1) {
+    console.log(`鑾峰彇瀹屾垚锛�${years[0]}-${years[years.length - 1]} A-Z锛岃幏鍙栧埌涔︾睄 ${bookList.length - 1} 鏉);
+    // 淇濆瓨鍒癊xcel
+    const buffer = xlsx.build([{ name: "Sheet1", data: bookList }]);
+    fs.writeFileSync(`./涔﹀崟 ${years[0]}-${years[years.length - 1]} A-Z.xlsx`, buffer, (err) => { });
+  } else {
+    console.log("娌℃湁鑾峰彇鍒颁功绫�");
+  }
+}
+
+initLogger();
+main()
+  .finally(() => {
+    logFile.close();
+  });
\ No newline at end of file
diff --git a/src/main.mjs b/src/main.mjs
index a53edc5..822d5c7 100644
--- a/src/main.mjs
+++ b/src/main.mjs
@@ -388,7 +388,7 @@
   }
 
   const buffer = xlsx.build([{ name: "Sheet1", data }]);
-  fs.writeFile("./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx", buffer, (err) => { });
+  fs.writeFileSync("./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx", buffer, (err) => { });
   console.log("淇濆瓨瀹屾垚: ./銆愮浜屾壒浜屾澶勭悊鍚庛�戜氦浠樻竻鍗�.xlsx");
 }
 

--
Gitblit v1.9.1