From 8e0bd8ab4b5099625016c5b8f4c1cf4bba9cddcc Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期五, 14 六月 2024 22:58:34 +0800 Subject: [PATCH] pdf版 --- src/main.mjs | 40 ++++++++-------------------------------- 1 files changed, 8 insertions(+), 32 deletions(-) diff --git a/src/main.mjs b/src/main.mjs index 4e7605b..e6bca2b 100644 --- a/src/main.mjs +++ b/src/main.mjs @@ -361,10 +361,9 @@ } } - /* if (pdfUrl) { + if (pdfUrl) { return pdfUrl; - } else */ - if (textUrl) { + } else if (textUrl) { return textUrl; } else { book.state = "娌℃湁text鏂囦欢"; @@ -395,7 +394,7 @@ async function downloadFile(book, url) { console.log(`涓嬭浇鏂囦欢: ${url}`); const ext = url.split(".").pop().toLowerCase(); - const filepath = `./downloads/${book.id} ${book.isbn}.txt`; + const filepath = `./downloads/${book.id} ${book.isbn}.${ext}`; if (fs.existsSync(filepath)) { book.state = `涓嬭浇瀹屾垚`; book.format = ext; @@ -432,27 +431,6 @@ book.format = ext; book.file = filepath; book.url = url; - console.log(`涓嬭浇瀹屾垚锛�${filepath}`); - setTimeout(() => { - if (ext === "gz" || ext === "zip") { - unzip(_filepath, filepath); - fs.unlinkSync(_filepath); - } - let text = fs.readFileSync(filepath, 'utf-8'); - text = getTextFromHtml(text); - fs.writeFileSync(filepath, text, 'utf-8'); - try { - fs.writeFileSync(filepath + '.result.txt', cleanText(text), 'utf-8'); - } catch (e) { - reject(e); - try { - out.close(); - fs.unlink(filepath, (e) => console.error(e)); - } catch (e) { - console.error(e); - } - } - }, 1000); resolve(true); }); stream.on("error", (err) => { @@ -504,13 +482,10 @@ function getBookInfo(book) { return retry(async () => { - const publisher = await driver.executeScript(`return document.querySelector("span[itemprop=publisher]").textContent`); - const datePublished = await driver.executeScript(`return document.querySelector("span[itemprop=datePublished]").textContent`); - let pages = await driver.executeScript(`return document.querySelector("span[data-id=resultsCount]").textContent`); - pages = pages.split(' / ')[1]; - book.publisher = publisher; - book.pubDate = datePublished; - book.pages = pages; + book.publisher = await driver.executeScript(`return document.querySelector("span[itemprop=publisher]").textContent`).catch(e=>0); + book.pubDate = await driver.executeScript(`return document.querySelector("span[itemprop=datePublished]").textContent`).catch(e=>0); + let pages = await driver.executeScript(`return document.querySelector("span[data-id=resultsCount]").textContent`).catch(e=>0); + if (pages) { book.pages = pages.split(' / ')[1]; } }); } @@ -573,6 +548,7 @@ try { await downloadFile(book, url); console.log(`涓嬭浇瀹屾垚: ${book.id} ${book.title}`); + console.log('finish: '+JSON.stringify(book)); } catch (e) { } successCount++; // 绛変竴娈垫椂闂村啀涓嬩竴涓� -- Gitblit v1.9.1