From 6df7cbf23a2c8b632dfc35602086807c7fc6ff9b Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期三, 17 七月 2024 12:27:37 +0800 Subject: [PATCH] Merge branch 'main' of http://182.92.203.7:2001/r/~lyg/book-crawler into main --- src/parse-isbn-log.mjs | 52 ++++++++++++++++++++++++++ package.json | 3 + src/book-isbn-search.mjs | 6 +++ 3 files changed, 60 insertions(+), 1 deletions(-) diff --git a/package.json b/package.json index c54a6e4..c7c8afe 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,8 @@ "book-list": "node src/book-list-download.mjs", "parse-log": "node src/parse-log.mjs", "trans": "node src/trans.mjs", - "book-isbn": "node src/book-isbn-search.mjs" + "book-isbn": "node src/book-isbn-search.mjs", + "parse-isbn-log": "node src/parse-isbn-log.mjs" }, "devDependencies": {}, "dependencies": { diff --git a/src/book-isbn-search.mjs b/src/book-isbn-search.mjs index a86b02a..22a74d5 100644 --- a/src/book-isbn-search.mjs +++ b/src/book-isbn-search.mjs @@ -102,6 +102,9 @@ } book.publisher = data.metadata.publisher; book.pubDate = data.metadata.date; + if (book.isbn) { + console.log('ISBN: ',JSON.stringify(book)); + } }) .catch(() => { book.state = "鎵撳紑璇︽儏椤靛け璐�"; @@ -133,6 +136,9 @@ for (; ;) { const book = await nextBook(); + if (!book.title) { + continue; + } if (!book) { break; } diff --git a/src/parse-isbn-log.mjs b/src/parse-isbn-log.mjs new file mode 100644 index 0000000..23c98c4 --- /dev/null +++ b/src/parse-isbn-log.mjs @@ -0,0 +1,52 @@ +import * as fs from 'fs'; +import xlsx from "node-xlsx"; + +const books = []; + +function main() { + // 鑾峰彇鎵�鏈夋棩蹇楁枃浠� + const logFiles = fs.readdirSync('./book-isbn-logs', { withFileTypes: true }); + // 閬嶅巻鏃ュ織鏂囦欢 + for (const file of logFiles) { + // 璇诲彇鏃ュ織鏂囦欢 + const log = fs.readFileSync(`./book-isbn-logs/${file.name}`, 'utf8'); + // 瑙f瀽鏃ュ織鏂囦欢 + const bookLogs = log.split('寮�濮嬩笅杞�'); + for (const bookLog of bookLogs) { + const lines = bookLog.split('\n'); + // 閬嶅巻鏃ュ織琛� + for (const line of lines) { + // 瑙f瀽鏃ュ織琛� + let reg, group; + reg = /.* ISBN: (.*)$/g; + group = reg.exec(line); + if (group) { + const book = {}; + const text = group[1]; + const obj = JSON.parse(text); + Object.assign(book, obj); + books.push(book); + continue; + } + } + + } + } +} + +try { + main(); +} catch (e) { + console.error(e); +} finally { + const EXCEL_FILE = "fiction-noisbn.xlsx"; + const workSheets = xlsx.parse(EXCEL_FILE); + const sheet = workSheets[0]; + for (const book of books) { + const row=sheet.data.find(row => row[0] == book.id); + row[5] = book.isbn; + } + const buffer = xlsx.build([sheet]); + fs.writeFileSync(EXCEL_FILE, buffer, (err) => { }); + console.log("淇濆瓨瀹屾垚: ", EXCEL_FILE); +} \ No newline at end of file -- Gitblit v1.9.1