From 92c1b3abe15b82486427ef2e9e2455524e0c6c84 Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期五, 14 六月 2024 23:01:48 +0800
Subject: [PATCH] 回写图书信息到Excel

---
 src/main.mjs |   43 +++++++++++--------------------------------
 1 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/src/main.mjs b/src/main.mjs
index 4e7605b..bbf02b6 100644
--- a/src/main.mjs
+++ b/src/main.mjs
@@ -361,10 +361,9 @@
       }
     }
 
-    /* if (pdfUrl) {
+    if (pdfUrl) {
       return pdfUrl;
-    } else  */
-    if (textUrl) {
+    } else if (textUrl) {
       return textUrl;
     } else {
       book.state = "娌℃湁text鏂囦欢";
@@ -395,7 +394,7 @@
 async function downloadFile(book, url) {
   console.log(`涓嬭浇鏂囦欢: ${url}`);
   const ext = url.split(".").pop().toLowerCase();
-  const filepath = `./downloads/${book.id} ${book.isbn}.txt`;
+  const filepath = `./downloads/${book.id} ${book.isbn}.${ext}`;
   if (fs.existsSync(filepath)) {
     book.state = `涓嬭浇瀹屾垚`;
     book.format = ext;
@@ -432,27 +431,6 @@
           book.format = ext;
           book.file = filepath;
           book.url = url;
-          console.log(`涓嬭浇瀹屾垚锛�${filepath}`);
-          setTimeout(() => {
-            if (ext === "gz" || ext === "zip") {
-              unzip(_filepath, filepath);
-              fs.unlinkSync(_filepath);
-            }
-            let text = fs.readFileSync(filepath, 'utf-8');
-            text = getTextFromHtml(text);
-            fs.writeFileSync(filepath, text, 'utf-8');
-            try {
-              fs.writeFileSync(filepath + '.result.txt', cleanText(text), 'utf-8');
-            } catch (e) {
-              reject(e);
-              try {
-                out.close();
-                fs.unlink(filepath, (e) => console.error(e));
-              } catch (e) {
-                console.error(e);
-              }
-            }
-          }, 1000);
           resolve(true);
         });
         stream.on("error", (err) => {
@@ -504,13 +482,10 @@
 
 function getBookInfo(book) {
   return retry(async () => {
-    const publisher = await driver.executeScript(`return document.querySelector("span[itemprop=publisher]").textContent`);
-    const datePublished = await driver.executeScript(`return document.querySelector("span[itemprop=datePublished]").textContent`);
-    let pages = await driver.executeScript(`return document.querySelector("span[data-id=resultsCount]").textContent`);
-    pages = pages.split(' / ')[1];
-    book.publisher = publisher;
-    book.pubDate = datePublished;
-    book.pages = pages;
+    book.publisher = await driver.executeScript(`return document.querySelector("span[itemprop=publisher]").textContent`).catch(e => 0);
+    book.pubDate = await driver.executeScript(`return document.querySelector("span[itemprop=datePublished]").textContent`).catch(e => 0);
+    let pages = await driver.executeScript(`return document.querySelector("span[data-id=resultsCount]").textContent`).catch(e => 0);
+    if (pages) { book.pages = pages.split(' / ')[1]; }
   });
 }
 
@@ -573,6 +548,7 @@
     try {
       await downloadFile(book, url);
       console.log(`涓嬭浇瀹屾垚: ${book.id} ${book.title}`);
+      console.log('finish: ' + JSON.stringify(book));
     } catch (e) { }
     successCount++;
     // 绛変竴娈垫椂闂村啀涓嬩竴涓�
@@ -588,6 +564,9 @@
   for (const book of books) {
     const index = data.findIndex((row) => row[0] === book.id);
     if (index > -1) {
+      data[index][5] = book.publisher;
+      data[index][6] = book.pubDate;
+      data[index][11] = book.pages;
       data[index][12] = book.state;
       data[index][13] = book.format;
       data[index][14] = book.file;

--
Gitblit v1.9.1