From 70f88e715c980d0a6d411cdfbac4a13e90f03daf Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期三, 12 六月 2024 19:32:36 +0800 Subject: [PATCH] 增加日志分析 --- config.json | 2 failed-books.txt | 8 ++++ package.json | 3 + src/parse-log.mjs | 74 +++++++++++++++++++++++++++++++++++++ failed-book-urls.txt | 8 ++++ 5 files changed, 93 insertions(+), 2 deletions(-) diff --git a/config.json b/config.json index 59e371b..3a54ac3 100644 --- a/config.json +++ b/config.json @@ -3,7 +3,7 @@ "startRow": 1, "//缁撴潫琛屽彿锛屼粠0寮�濮嬶紝涓嶅寘鍚琛�": "//", - "endRow": 4, + "endRow": 2001, "//绾跨▼鏁�": "//", "threadSize": 4, diff --git a/failed-book-urls.txt b/failed-book-urls.txt new file mode 100644 index 0000000..b08d200 --- /dev/null +++ b/failed-book-urls.txt @@ -0,0 +1,8 @@ +https://archive.org/compress/DnaEm1CapabilitiesOfNuclearWeapons/formats=TEXT%20PDF,IMAGE%20CONTAINER%20PDF&file=/DnaEm1CapabilitiesOfNuclearWeapons.zip +https://archive.org/download/annualcatalogueo1835wmrp_0/annualcatalogueo1835wmrp_0.pdf +https://archive.org/download/cu31924017199948/cu31924017199948.pdf +https://archive.org/download/CAT31285342/CAT31285342.pdf +https://archive.org/download/17thannualcatalo1902germ/17thannualcatalo1902germ.pdf +https://archive.org/download/CAT31284692/CAT31284692.pdf +https://archive.org/download/CAT31322806/CAT31322806.pdf +https://archive.org/download/charliealexander00robe/charliealexander00robe.pdf \ No newline at end of file diff --git a/failed-books.txt b/failed-books.txt new file mode 100644 index 0000000..07b3d1d --- /dev/null +++ b/failed-books.txt @@ -0,0 +1,8 @@ +10800062 https://archive.org/compress/DnaEm1CapabilitiesOfNuclearWeapons/formats=TEXT%20PDF,IMAGE%20CONTAINER%20PDF&file=/DnaEm1CapabilitiesOfNuclearWeapons.zip +10908504 https://archive.org/download/annualcatalogueo1835wmrp_0/annualcatalogueo1835wmrp_0.pdf +10519421 https://archive.org/download/cu31924017199948/cu31924017199948.pdf +10893593 https://archive.org/download/CAT31285342/CAT31285342.pdf +10830912 https://archive.org/download/17thannualcatalo1902germ/17thannualcatalo1902germ.pdf +10893203 https://archive.org/download/CAT31284692/CAT31284692.pdf +11181828 https://archive.org/download/CAT31322806/CAT31322806.pdf +10538507 https://archive.org/download/charliealexander00robe/charliealexander00robe.pdf \ No newline at end of file diff --git a/package.json b/package.json index 2d513ff..0e24ac0 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,8 @@ "type": "module", "scripts": { "download": "node src/main.mjs", - "book-list": "node src/book-list-download.mjs" + "book-list": "node src/book-list-download.mjs", + "parse-log": "node src/parse-log.mjs" }, "devDependencies": {}, "dependencies": { diff --git a/src/parse-log.mjs b/src/parse-log.mjs new file mode 100644 index 0000000..3875c1c --- /dev/null +++ b/src/parse-log.mjs @@ -0,0 +1,74 @@ +import * as fs from 'fs'; + +const books = []; +const bookMap = {}; + +function main() { + // 鑾峰彇鎵�鏈夋棩蹇楁枃浠� + const logFiles = fs.readdirSync('./logs', { withFileTypes: true }); + // 閬嶅巻鏃ュ織鏂囦欢 + for (const file of logFiles) { + // 璇诲彇鏃ュ織鏂囦欢 + const log = fs.readFileSync(`./logs/${file.name}`, 'utf8'); + // 瑙f瀽鏃ュ織鏂囦欢 + const bookLogs = log.split('寮�濮嬩笅杞�'); + for (const bookLog of bookLogs) { + const book = {}; + const lines = bookLog.split('\n'); + // 閬嶅巻鏃ュ織琛� + for (const line of lines) { + // 瑙f瀽鏃ュ織琛� + let reg, group; + reg = /^: (\d+) .*/g; + group = reg.exec(line); + if (group) { + const bookId = group[1]; + book.bookId = bookId; + if (!bookMap[bookId]) { + bookMap[bookId] = book; + books.push(book); + } + continue; + } + reg = /.* 鎵撳紑璇︽儏: (.*)$/g; + group = reg.exec(line); + if (group) { + const detailUrl = group[1]; + book.detailUrl = detailUrl; + continue; + } + reg = /.* 涓嬭浇鏂囦欢: (.*)$/g; + group = reg.exec(line); + if (group) { + const downloadUrl = group[1]; + book.downloadUrl = downloadUrl; + continue; + } + reg = /.* 涓嬭浇瀹屾垚: (\d+) /g; + group = reg.exec(line); + if (group) { + const bookId = group[1]; + if (book.bookId === bookId) { + book.download = true; + } + continue; + } + } + + } + } +} + +try { + main(); +} catch (e) { + console.error(e); +} finally { + const failedBooks = books.filter(book => !book.download && book.downloadUrl) + .map(book => book.bookId + " " + (book.downloadUrl ?? '')).join('\n'); + fs.writeFileSync('./failed-books.txt', failedBooks); + + const failedBookUrls = books.filter(book => !book.download && book.downloadUrl) + .map(book => book.downloadUrl).join('\n'); + fs.writeFileSync('./failed-book-urls.txt', failedBookUrls); +} \ No newline at end of file -- Gitblit v1.9.1