From ce8cb9c851fa66c7c2902ceb57e369d3cecf1a28 Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期四, 01 八月 2024 01:48:56 +0800
Subject: [PATCH] 复制bt下载的文件,bt任务控制

---
 src/book-download.mjs |  188 ++++++-----------------------------------------
 1 files changed, 24 insertions(+), 164 deletions(-)

diff --git a/src/book-download.mjs b/src/book-download.mjs
index 861e2bf..386e84e 100644
--- a/src/book-download.mjs
+++ b/src/book-download.mjs
@@ -48,135 +48,6 @@
   return pages;
 }
 
-function allWords() {
-  const words = {};
-  wordsjs.usPlaces = usPlaceList;
-  wordsjs.usPeronNameList = usPeronNameList;
-  for (const key in wordsjs.default) {
-    if (Object.hasOwnProperty.call(wordsjs.default, key)) {
-      for (const word of wordsjs.default[key]) {
-        words[word] = true;
-      }
-    }
-  }
-  return words;
-}
-
-const wordsMap = allWords();
-
-/**
- * 缁熻鍗曡瘝鏁伴噺
- * @param {string} str 瀛楃涓�
- * @returns 鍗曡瘝鏁伴噺
- */
-function countWordSize(str) {
-  let count = 0;
-  str = str.replace(/[ ]{2,}/g, ' ');
-  for (let i = 0; i < str.length; i++) {
-    if (str[i] === ' ') {
-      count++;
-    }
-  }
-  return count;
-}
-
-/**
- * 鑾峰彇閿欒鍗曡瘝姣斾緥
- * @param {string} text 鏂囨湰
- * @returns 閿欒鍗曡瘝姣斾緥
- */
-function incorrectWordRatio(text) {
-  text = text.replace(/[ ]+/g, ' ').replace(/([a-zA-Z])[\.!?,;"')]/g, "$1");
-  const words = text.split(' ');
-  const incorrectWordCnt = words.filter(word => !wordsMap[word.toLocaleLowerCase()] && !/\d+/g.test(word)).length;
-  return incorrectWordCnt / words.length;
-}
-
-/**
- * 绗﹀彿鍗犳瘮 0 ~ 1
- * @param {string} text 鏂囨湰
- */
-function symbolRatio(text) {
-  // 闈炲瓧姣嶆暟瀛楀瓧绗﹀崰姣�
-  return (text.match(/[^a-zA-Z0-9 ]/g) || []).length / text.length;
-}
-
-/**
- * 娓呯悊鏂囨湰
- * @param {string} text 瑕佹竻鐞嗙殑鏂囨湰
- */
-function cleanText(text) {
-  text = text.replace(/(\r)/g, '');
-  const googlePage = text.substring(0, 10000);
-  if (googlePage.includes('google')) {
-    text = googlePage.replace(/^(.|\n)*books[ ]*\.[ ]*google[ ]*\.[ ]*com/ig, '') + text.substring(10000);
-  }
-  // if (!/.{170,}/g.test(text) || text.includes('google')) {
-  text = text.replace(/[ ]{2,}/g, ' ')
-  if (!/.{170,}/g.test(text)) {
-    // 姣忚涓嶈秴杩�170涓瓧绗�
-    text = text.replace(/(.{170,})\n/g, '$1');
-  }
-  text = text.replace(/\n+/g, '\n');
-  text = text.replace(/-\n/g, '-');
-  const lines = text.split('\n');
-  const result = [];
-  for (const line of lines) {
-    // 绗﹀彿姣斿お楂樼殑涓嶈
-    const incorrectRatio = incorrectWordRatio(line);
-    if (symbolRatio(line) > 0.2) {
-      if (incorrectRatio > 0.65) {
-        continue;
-      }
-    }
-    // 鍘婚櫎绌烘牸鍚� 杩炵画閲嶅鍗曚釜瀛楃3娆″強浠ヤ笂涓嶈
-    const wordSize = countWordSize(line);
-    if (/([\D])\1{2,}/.test(line.replace(/[ ]+/g, ''))) {
-      if (wordSize < 5 || incorrectRatio > 0.65) {
-        continue;
-      }
-    }
-    // 杩炵画涓変釜鏍囩偣绗﹀彿鍙婁互涓�,閿欒鐜囧ぇ浜�0.65涓嶈
-    if (incorrectRatio > 0.65 && /([\.,'";:|!@#$%^&*\(\)<>?`~鈥�*卢禄芦]){3,}/.test(line)) {
-      continue;
-    }
-    // 鍗曡瘝鏁伴噺澶皯鐨勪笉瑕�
-    if (wordSize > 5 && incorrectRatio > 0.65) {
-      continue;
-    }
-    // 鏈塯oogle鐨勪笉瑕�
-    if (/.*(google).*/ig.test(line)) {
-      continue;
-    }
-    // 鍙湁涓�涓瓧绗︿笉瑕�
-    const ret = line.trim().replace(/[鈻犫��*卢禄芦^-]/g, '');
-    if (ret.length <= 1) {
-      continue;
-    }
-    if (ret == 'Digitized by') {
-      continue;
-    }
-    result.push(ret);
-  }
-  text = result.join('\n');
-  // }
-  return text;
-}
-
-/**
- * 瑙e帇鏂囨湰鏂囦欢
- * @param {string} zipFile 鍘嬬缉鏂囦欢璺緞
- * @param {string} txtFile 鏂囨湰鏂囦欢璺緞
- */
-function unzip(zipFile, txtFile) {
-  const tmpdir = `./tmpdir/${threadId}`;
-  execFileSync('./7za.exe', ['x', '-aoa', zipFile, `-o${tmpdir}`])
-  const file = fs.readdirSync(tmpdir).map(file => ({ size: fs.statSync(`${tmpdir}/${file}`), name: file }))
-    .sort((a, b) => a.size.size - b.size.size).pop();
-  fs.cpSync(`${tmpdir}/${file.name}`, txtFile, { overwrite: true });
-  fs.rmSync(`${tmpdir}`, { recursive: true });
-}
-
 /**
  * 鑾峰彇瑕佷笅杞界啛鍥句功淇℃伅
  * @param {number} startRow 璧峰琛岋紝鍖呭惈
@@ -210,23 +81,6 @@
   return books;
 }
 
-/**
- * 鏍煎紡鍖栧叧閿瓧
- * @param {string} text 瑕佹悳绱㈢殑鍏抽敭瀛�
- * @param {boolean} titleWithNumbers 鏄惁鏍囬涓寘鍚暟瀛�
- * @returns 澶勭悊鍚庣殑鍏抽敭瀛�
- */
-function formatKw(text, titleWithNumbers) {
-  if (titleWithNumbers) {
-    text = text;
-  } else {
-    text = text.replace(/[\d]/g, "");
-  }
-  text = text.split(' ').slice(0, 6).join("+");
-  return text;
-}
-
-
 async function sleep(ms) {
   return new Promise((resolve) => {
     setTimeout(resolve, ms);
@@ -251,17 +105,23 @@
  * @param {*} book 
  */
 async function getBookDetailPageUrl(book) {
-  const url = `https://libgen.vg/index.php?req=${book.title}&columns%5B%5D=t&topics%5B%5D=f&res=25&filesuns=all`;
+  const url = `https://libgen.rs/fiction/?q=${book.title.replace(/ /g, '+')}&criteria=title&language=&format=`;
   return await retry(async () => {
     const resp = await myAxios.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' } })
-    const group = /.*href="(edition.php\?id=\d+)".*/g.exec(resp.data);
+    // const html = cheerio.load(resp.data);
+    // const url = html('body > table > tbody > tr:nth-child(1) > td:nth-child(6) > ul > li:nth-child(1) > a')?.attr('href') ?? '';
+    // return url;
+    const group = /.*href="(http:\/\/library.lol\/fiction\/[0-9a-zA-Z]+)".*/g.exec(resp.data);
     if (group) {
-      return `https://libgen.vg/${group[1]}`;
+      return `${group[1]}`;
     } else {
       return ''
     }
   })
-    .catch(() => '');
+    .catch((e) => {
+      console.error(e.message);
+      return '';
+    });
 }
 
 async function openBookDetailPage(book, detailPageUrl) {
@@ -270,7 +130,6 @@
     const resp = await myAxios.get(detailPageUrl, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' } });
     const html = cheerio.load(resp.data);
     const trList = html('tr');
-    const files = [];
     let epubUrl = null;
     let pdfUrl = null;
     for (const tr of trList) {
@@ -304,9 +163,9 @@
 async function getDownloadUrl(book, url) {
   return await retry(async () => {
     const resp = await myAxios.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' } });
-    const group = /.*href="(get.php\?md5=[0-9a-f]+.*)".*/g.exec(resp.data);
+    const group = /.*href="(\S+)".*>GET<.*/g.exec(resp.data);
     if (group) {
-      return `https://libgen.vg/${group[1]}`;
+      return `${group[1]}`;
     } else {
       return '';
     }
@@ -321,7 +180,7 @@
 async function downloadFile(book, url) {
   console.log(`涓嬭浇鏂囦欢: ${url}`);
   await retry(() => {
-    const timeoutTime = 10 * 60 * 1000;
+    const timeoutTime = 1 * 60 * 1000;
     const source = axios.CancelToken.source();
     const timeout = setTimeout(() => {
       source.cancel("timeout");
@@ -333,17 +192,18 @@
         let ext = response.headers['content-disposition'].split('filename=')[1].split('.').pop() ?? '';
         ext = ext.substring(0, ext.length - 1);
 
-        const filepath = `./downloads/${book.id} ${book.isbn}.${ext}`;
+        const filepath = `./downloads/${book.id}.${ext}`;
         book.url = url;
         if (fs.existsSync(filepath)) {
           book.state = `涓嬭浇瀹屾垚`;
           book.format = ext;
           book.file = filepath;
           console.log(`涓嬭浇瀹屾垚锛�${filepath}`);
+          resolve(true);
           return;
         }
         const stream = response.data;
-        const _filepath = `./downloads/${book.id} ${book.isbn}.${ext}`;
+        const _filepath = `./downloads/${book.id}.${ext}`;
         const out = fs.createWriteStream(_filepath);
         stream.pipe(out);
         stream.on("end", async () => {
@@ -352,7 +212,7 @@
           book.format = ext;
           book.file = filepath;
           book.url = url;
-          book.pages = await getPdfPages(filepath).catch(e => 0);
+          // book.pages = await getPdfPages(filepath).catch(e => 0);
           resolve(true);
         });
         stream.on("error", (err) => {
@@ -379,7 +239,7 @@
           reject(false);
         }
       }));
-  }).catch(e => {
+  }, 1).catch(e => {
     book.state = "涓嬭浇澶辫触";
     console.log(`涓嬭浇澶辫触: ${book.id} ${book.title} ${url}`);
     return false
@@ -439,12 +299,12 @@
     // 绛変竴娈垫椂闂村啀鎵撳紑璇︽儏椤�
     sleep(getRandomNumber(500, 1000));
     // 鎵撳紑璇︽儏椤碉紝骞惰幏鍙栦笅杞介摼鎺�
-    const filePageUrl = await openBookDetailPage(book, detailPageUrl);
-    if (!filePageUrl) {
-      console.log(`娌℃湁鏂囦欢: ${book.id} ${book.title}`);
-      continue;
-    }
-    const url = await getDownloadUrl(book, filePageUrl);
+    // const filePageUrl = await openBookDetailPage(book, detailPageUrl);
+    // if (!filePageUrl) {
+    //   console.log(`娌℃湁鏂囦欢: ${book.id} ${book.title}`);
+    //   continue;
+    // }
+    const url = await getDownloadUrl(book, detailPageUrl);
     if (!url) {
       console.log(`娌℃湁鏂囦欢: ${book.id} ${book.title}`);
       continue;

--
Gitblit v1.9.1