From 70f88e715c980d0a6d411cdfbac4a13e90f03daf Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期三, 12 六月 2024 19:32:36 +0800
Subject: [PATCH] 增加日志分析

---
 config.json          |    2 
 failed-books.txt     |    8 ++++
 package.json         |    3 +
 src/parse-log.mjs    |   74 +++++++++++++++++++++++++++++++++++++
 failed-book-urls.txt |    8 ++++
 5 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/config.json b/config.json
index 59e371b..3a54ac3 100644
--- a/config.json
+++ b/config.json
@@ -3,7 +3,7 @@
   "startRow": 1,
   
   "//缁撴潫琛屽彿锛屼粠0寮�濮嬶紝涓嶅寘鍚琛�": "//",
-  "endRow": 4,
+  "endRow": 2001,
 
   "//绾跨▼鏁�": "//",
   "threadSize": 4,
diff --git a/failed-book-urls.txt b/failed-book-urls.txt
new file mode 100644
index 0000000..b08d200
--- /dev/null
+++ b/failed-book-urls.txt
@@ -0,0 +1,8 @@
+https://archive.org/compress/DnaEm1CapabilitiesOfNuclearWeapons/formats=TEXT%20PDF,IMAGE%20CONTAINER%20PDF&file=/DnaEm1CapabilitiesOfNuclearWeapons.zip
+https://archive.org/download/annualcatalogueo1835wmrp_0/annualcatalogueo1835wmrp_0.pdf
+https://archive.org/download/cu31924017199948/cu31924017199948.pdf
+https://archive.org/download/CAT31285342/CAT31285342.pdf
+https://archive.org/download/17thannualcatalo1902germ/17thannualcatalo1902germ.pdf
+https://archive.org/download/CAT31284692/CAT31284692.pdf
+https://archive.org/download/CAT31322806/CAT31322806.pdf
+https://archive.org/download/charliealexander00robe/charliealexander00robe.pdf
\ No newline at end of file
diff --git a/failed-books.txt b/failed-books.txt
new file mode 100644
index 0000000..07b3d1d
--- /dev/null
+++ b/failed-books.txt
@@ -0,0 +1,8 @@
+10800062 https://archive.org/compress/DnaEm1CapabilitiesOfNuclearWeapons/formats=TEXT%20PDF,IMAGE%20CONTAINER%20PDF&file=/DnaEm1CapabilitiesOfNuclearWeapons.zip
+10908504 https://archive.org/download/annualcatalogueo1835wmrp_0/annualcatalogueo1835wmrp_0.pdf
+10519421 https://archive.org/download/cu31924017199948/cu31924017199948.pdf
+10893593 https://archive.org/download/CAT31285342/CAT31285342.pdf
+10830912 https://archive.org/download/17thannualcatalo1902germ/17thannualcatalo1902germ.pdf
+10893203 https://archive.org/download/CAT31284692/CAT31284692.pdf
+11181828 https://archive.org/download/CAT31322806/CAT31322806.pdf
+10538507 https://archive.org/download/charliealexander00robe/charliealexander00robe.pdf
\ No newline at end of file
diff --git a/package.json b/package.json
index 2d513ff..0e24ac0 100644
--- a/package.json
+++ b/package.json
@@ -6,7 +6,8 @@
   "type": "module",
   "scripts": {
     "download": "node src/main.mjs",
-    "book-list": "node src/book-list-download.mjs"
+    "book-list": "node src/book-list-download.mjs",
+    "parse-log": "node src/parse-log.mjs"
   },
   "devDependencies": {},
   "dependencies": {
diff --git a/src/parse-log.mjs b/src/parse-log.mjs
new file mode 100644
index 0000000..3875c1c
--- /dev/null
+++ b/src/parse-log.mjs
@@ -0,0 +1,74 @@
+import * as fs from 'fs';
+
+const books = [];
+const bookMap = {};
+
+function main() {
+  // 鑾峰彇鎵�鏈夋棩蹇楁枃浠�
+  const logFiles = fs.readdirSync('./logs', { withFileTypes: true });
+  // 閬嶅巻鏃ュ織鏂囦欢
+  for (const file of logFiles) {
+    // 璇诲彇鏃ュ織鏂囦欢
+    const log = fs.readFileSync(`./logs/${file.name}`, 'utf8');
+    // 瑙f瀽鏃ュ織鏂囦欢
+    const bookLogs = log.split('寮�濮嬩笅杞�');
+    for (const bookLog of bookLogs) {
+      const book = {};
+      const lines = bookLog.split('\n');
+      // 閬嶅巻鏃ュ織琛�
+      for (const line of lines) {
+        // 瑙f瀽鏃ュ織琛�
+        let reg, group;
+        reg = /^: (\d+) .*/g;
+        group = reg.exec(line);
+        if (group) {
+          const bookId = group[1];
+          book.bookId = bookId;
+          if (!bookMap[bookId]) {
+            bookMap[bookId] = book;
+            books.push(book);
+          }
+          continue;
+        }
+        reg = /.* 鎵撳紑璇︽儏: (.*)$/g;
+        group = reg.exec(line);
+        if (group) {
+          const detailUrl = group[1];
+          book.detailUrl = detailUrl;
+          continue;
+        }
+        reg = /.* 涓嬭浇鏂囦欢: (.*)$/g;
+        group = reg.exec(line);
+        if (group) {
+          const downloadUrl = group[1];
+          book.downloadUrl = downloadUrl;
+          continue;
+        }
+        reg = /.* 涓嬭浇瀹屾垚: (\d+) /g;
+        group = reg.exec(line);
+        if (group) {
+          const bookId = group[1];
+          if (book.bookId === bookId) {
+            book.download = true;
+          }
+          continue;
+        }
+      }
+
+    }
+  }
+}
+
+try {
+  main();
+} catch (e) {
+  console.error(e);
+} finally {
+  const failedBooks = books.filter(book => !book.download && book.downloadUrl)
+    .map(book => book.bookId + " " + (book.downloadUrl ?? '')).join('\n');
+  fs.writeFileSync('./failed-books.txt', failedBooks);
+
+  const failedBookUrls = books.filter(book => !book.download && book.downloadUrl)
+    .map(book => book.downloadUrl).join('\n');
+  fs.writeFileSync('./failed-book-urls.txt', failedBookUrls);
+}
\ No newline at end of file

--
Gitblit v1.9.1