lyg
2024-06-12 70f88e715c980d0a6d411cdfbac4a13e90f03daf
增加日志分析
2个文件已修改
3个文件已添加
95 ■■■■■ 已修改文件
config.json 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
failed-book-urls.txt 8 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
failed-books.txt 8 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
package.json 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/parse-log.mjs 74 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
config.json
@@ -3,7 +3,7 @@
  "startRow": 1,
  
  "//结束行号,从0开始,不包含该行": "//",
  "endRow": 4,
  "endRow": 2001,
  "//线程数": "//",
  "threadSize": 4,
failed-book-urls.txt
New file
@@ -0,0 +1,8 @@
https://archive.org/compress/DnaEm1CapabilitiesOfNuclearWeapons/formats=TEXT%20PDF,IMAGE%20CONTAINER%20PDF&file=/DnaEm1CapabilitiesOfNuclearWeapons.zip
https://archive.org/download/annualcatalogueo1835wmrp_0/annualcatalogueo1835wmrp_0.pdf
https://archive.org/download/cu31924017199948/cu31924017199948.pdf
https://archive.org/download/CAT31285342/CAT31285342.pdf
https://archive.org/download/17thannualcatalo1902germ/17thannualcatalo1902germ.pdf
https://archive.org/download/CAT31284692/CAT31284692.pdf
https://archive.org/download/CAT31322806/CAT31322806.pdf
https://archive.org/download/charliealexander00robe/charliealexander00robe.pdf
failed-books.txt
New file
@@ -0,0 +1,8 @@
10800062 https://archive.org/compress/DnaEm1CapabilitiesOfNuclearWeapons/formats=TEXT%20PDF,IMAGE%20CONTAINER%20PDF&file=/DnaEm1CapabilitiesOfNuclearWeapons.zip
10908504 https://archive.org/download/annualcatalogueo1835wmrp_0/annualcatalogueo1835wmrp_0.pdf
10519421 https://archive.org/download/cu31924017199948/cu31924017199948.pdf
10893593 https://archive.org/download/CAT31285342/CAT31285342.pdf
10830912 https://archive.org/download/17thannualcatalo1902germ/17thannualcatalo1902germ.pdf
10893203 https://archive.org/download/CAT31284692/CAT31284692.pdf
11181828 https://archive.org/download/CAT31322806/CAT31322806.pdf
10538507 https://archive.org/download/charliealexander00robe/charliealexander00robe.pdf
package.json
@@ -6,7 +6,8 @@
  "type": "module",
  "scripts": {
    "download": "node src/main.mjs",
    "book-list": "node src/book-list-download.mjs"
    "book-list": "node src/book-list-download.mjs",
    "parse-log": "node src/parse-log.mjs"
  },
  "devDependencies": {},
  "dependencies": {
src/parse-log.mjs
New file
@@ -0,0 +1,74 @@
import * as fs from 'fs';
const books = [];
const bookMap = {};
function main() {
  // 获取所有日志文件
  const logFiles = fs.readdirSync('./logs', { withFileTypes: true });
  // 遍历日志文件
  for (const file of logFiles) {
    // 读取日志文件
    const log = fs.readFileSync(`./logs/${file.name}`, 'utf8');
    // 解析日志文件
    const bookLogs = log.split('开始下载');
    for (const bookLog of bookLogs) {
      const book = {};
      const lines = bookLog.split('\n');
      // 遍历日志行
      for (const line of lines) {
        // 解析日志行
        let reg, group;
        reg = /^: (\d+) .*/g;
        group = reg.exec(line);
        if (group) {
          const bookId = group[1];
          book.bookId = bookId;
          if (!bookMap[bookId]) {
            bookMap[bookId] = book;
            books.push(book);
          }
          continue;
        }
        reg = /.* 打开详情: (.*)$/g;
        group = reg.exec(line);
        if (group) {
          const detailUrl = group[1];
          book.detailUrl = detailUrl;
          continue;
        }
        reg = /.* 下载文件: (.*)$/g;
        group = reg.exec(line);
        if (group) {
          const downloadUrl = group[1];
          book.downloadUrl = downloadUrl;
          continue;
        }
        reg = /.* 下载完成: (\d+) /g;
        group = reg.exec(line);
        if (group) {
          const bookId = group[1];
          if (book.bookId === bookId) {
            book.download = true;
          }
          continue;
        }
      }
    }
  }
}
try {
  main();
} catch (e) {
  console.error(e);
} finally {
  const failedBooks = books.filter(book => !book.download && book.downloadUrl)
    .map(book => book.bookId + " " + (book.downloadUrl ?? '')).join('\n');
  fs.writeFileSync('./failed-books.txt', failedBooks);
  const failedBookUrls = books.filter(book => !book.download && book.downloadUrl)
    .map(book => book.downloadUrl).join('\n');
  fs.writeFileSync('./failed-book-urls.txt', failedBookUrls);
}