From 92c1b3abe15b82486427ef2e9e2455524e0c6c84 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期五, 14 六月 2024 23:01:48 +0800 Subject: [PATCH] 回写图书信息到Excel --- src/main.mjs | 43 +++++++++++-------------------------------- 1 files changed, 11 insertions(+), 32 deletions(-) diff --git a/src/main.mjs b/src/main.mjs index 4e7605b..bbf02b6 100644 --- a/src/main.mjs +++ b/src/main.mjs @@ -361,10 +361,9 @@ } } - /* if (pdfUrl) { + if (pdfUrl) { return pdfUrl; - } else */ - if (textUrl) { + } else if (textUrl) { return textUrl; } else { book.state = "娌℃湁text鏂囦欢"; @@ -395,7 +394,7 @@ async function downloadFile(book, url) { console.log(`涓嬭浇鏂囦欢: ${url}`); const ext = url.split(".").pop().toLowerCase(); - const filepath = `./downloads/${book.id} ${book.isbn}.txt`; + const filepath = `./downloads/${book.id} ${book.isbn}.${ext}`; if (fs.existsSync(filepath)) { book.state = `涓嬭浇瀹屾垚`; book.format = ext; @@ -432,27 +431,6 @@ book.format = ext; book.file = filepath; book.url = url; - console.log(`涓嬭浇瀹屾垚锛�${filepath}`); - setTimeout(() => { - if (ext === "gz" || ext === "zip") { - unzip(_filepath, filepath); - fs.unlinkSync(_filepath); - } - let text = fs.readFileSync(filepath, 'utf-8'); - text = getTextFromHtml(text); - fs.writeFileSync(filepath, text, 'utf-8'); - try { - fs.writeFileSync(filepath + '.result.txt', cleanText(text), 'utf-8'); - } catch (e) { - reject(e); - try { - out.close(); - fs.unlink(filepath, (e) => console.error(e)); - } catch (e) { - console.error(e); - } - } - }, 1000); resolve(true); }); stream.on("error", (err) => { @@ -504,13 +482,10 @@ function getBookInfo(book) { return retry(async () => { - const publisher = await driver.executeScript(`return document.querySelector("span[itemprop=publisher]").textContent`); - const datePublished = await driver.executeScript(`return document.querySelector("span[itemprop=datePublished]").textContent`); - let pages = await driver.executeScript(`return document.querySelector("span[data-id=resultsCount]").textContent`); - pages = pages.split(' / ')[1]; - book.publisher = publisher; - book.pubDate = datePublished; - book.pages = pages; + book.publisher = await driver.executeScript(`return document.querySelector("span[itemprop=publisher]").textContent`).catch(e => 0); + book.pubDate = await driver.executeScript(`return document.querySelector("span[itemprop=datePublished]").textContent`).catch(e => 0); + let pages = await driver.executeScript(`return document.querySelector("span[data-id=resultsCount]").textContent`).catch(e => 0); + if (pages) { book.pages = pages.split(' / ')[1]; } }); } @@ -573,6 +548,7 @@ try { await downloadFile(book, url); console.log(`涓嬭浇瀹屾垚: ${book.id} ${book.title}`); + console.log('finish: ' + JSON.stringify(book)); } catch (e) { } successCount++; // 绛変竴娈垫椂闂村啀涓嬩竴涓� @@ -588,6 +564,9 @@ for (const book of books) { const index = data.findIndex((row) => row[0] === book.id); if (index > -1) { + data[index][5] = book.publisher; + data[index][6] = book.pubDate; + data[index][11] = book.pages; data[index][12] = book.state; data[index][13] = book.format; data[index][14] = book.file; -- Gitblit v1.9.1