New file |
| | |
| | | import * as fs from "fs"; |
| | | import xlsx from "node-xlsx"; |
| | | import axios from "axios"; |
| | | import { HttpsProxyAgent } from "https-proxy-agent"; |
| | | |
| | | /* ----------axios代理------------ */ |
| | | const httpsAgent = new HttpsProxyAgent(`http://127.0.0.1:10809`); |
| | | const myAxios = axios.create({ |
| | | proxy: false, |
| | | httpsAgent, |
| | | }); |
| | | |
| | | /* -----------日志--------------- */ |
| | | let logFile; |
| | | function initLogger() { |
| | | const _log = console.log; |
| | | if (!fs.existsSync('./logs')) { |
| | | fs.mkdirSync('./logs', { recursive: true }); |
| | | } |
| | | logFile = fs.createWriteStream(`./logs/book-list-logs.log`, { flags: 'a', encoding: 'utf8' }); |
| | | console.log = function (...text) { |
| | | text = `${new Date().toLocaleString()} ${text.join(' ') ?? ''}`; |
| | | _log(text); |
| | | logFile.write(text + '\n'); |
| | | }; |
| | | } |
| | | |
| | | /** |
| | | * 获取随机值 |
| | | * @param {number} min 最小值 |
| | | * @param {number} max 最大值 |
| | | * @returns 随机值 |
| | | */ |
| | | function getRandomNumber(min, max) { |
| | | return Math.random() * (max - min) + min; |
| | | } |
| | | |
| | | function sleep(ms) { |
| | | return new Promise(resolve => setTimeout(resolve, ms)); |
| | | } |
| | | |
| | | function genYears(start, end) { |
| | | return Array.from({ length: end - start + 1 }, (_, i) => start + i); |
| | | } |
| | | |
| | | async function retry(func, maxTry = 3, delay = 3000) { |
| | | try { |
| | | return await func(); |
| | | } catch (e) { |
| | | if (maxTry > 0) { |
| | | await sleep(delay); |
| | | return await retry(func, maxTry - 1, delay); |
| | | } else { |
| | | throw e; |
| | | } |
| | | } |
| | | } |
| | | |
| | | async function getBookList(pageSize, page, code) { |
| | | const url = `https://archive.org/services/search/beta/page_production/?user_query=&page_type=collection_details&page_target=books&hits_per_page=${pageSize}&page=${page}&filter_map=%7B%22year%22%3A%7B%222023%22%3A%22gte%22%2C%222024%22%3A%22lte%22%7D%2C%22firstTitle%22%3A%7B%22${code}%22%3A%22inc%22%7D%7D&sort=titleSorter%3Aasc&aggregations=false&uid=R%3A1e845903aec74dee14bd-S%3A8cde5bf234b86bf96a75-P%3A1-K%3Ah-T%3A1718106108852`; |
| | | return await myAxios.get(url); |
| | | } |
| | | |
| | | async function main() { |
| | | const years = genYears(2023, 2024); |
| | | const codeList = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]; |
| | | const bookList = [["id", "isbn", "主标题", "副标题", "作者", "出版社", "出版时间", "中图法分类", "格式", "语言", "简介", "页数"]]; |
| | | try { |
| | | for (const year of years) { |
| | | for (const code of codeList) { |
| | | let page = 1; |
| | | const pageSize = 100; |
| | | let total = 0; |
| | | do { |
| | | console.log(`正在获取 ${year} 年 ${code} 分类 ${page} 页`); |
| | | const resp = await retry(() => getBookList(pageSize, page, code)).catch((e) => { |
| | | console.log(`获取失败:${year} 年 ${code} 分类 ${page} 页`); |
| | | });; |
| | | if (!resp) { |
| | | continue; |
| | | } |
| | | const { total: _total, hits } = resp.data.response.body.hits |
| | | total = _total; |
| | | for (const hit of hits) { |
| | | const { identifier, title, creator } = hit.fields |
| | | const author = creator?.join(", "); |
| | | console.log(`${identifier} | ${title} | ${author??''}`); |
| | | bookList.push([identifier, "", title, "", author, "", "", "", "", "English"]); |
| | | } |
| | | page++; |
| | | await sleep(getRandomNumber(3000, 80000)); |
| | | |
| | | } while (pageSize * page < total); |
| | | } |
| | | } |
| | | } catch (e) { |
| | | console.error(e); |
| | | } |
| | | if (bookList.length === 1) { |
| | | console.log(`获取完成,${years[0]}-${years[years.length - 1]} A-Z,获取到书籍 ${bookList.length - 1} 条`); |
| | | // 保存到Excel |
| | | const buffer = xlsx.build([{ name: "Sheet1", data: bookList }]); |
| | | fs.writeFileSync(`./书单 ${years[0]}-${years[years.length - 1]} A-Z.xlsx`, buffer, (err) => { }); |
| | | } else { |
| | | console.log("没有获取到书籍"); |
| | | } |
| | | } |
| | | |
| | | initLogger(); |
| | | main() |
| | | .finally(() => { |
| | | logFile.close(); |
| | | }); |