import * as fs from "fs";
|
import xlsx from "node-xlsx";
|
import axios from "axios";
|
import { HttpsProxyAgent } from "https-proxy-agent";
|
|
/* ----------axios代理------------ */
|
const httpsAgent = new HttpsProxyAgent(`http://127.0.0.1:10809`);
|
const myAxios = axios.create({
|
proxy: false,
|
httpsAgent,
|
});
|
|
/* -----------日志--------------- */
|
let logFile;
|
function initLogger() {
|
const _log = console.log;
|
if (!fs.existsSync('./logs')) {
|
fs.mkdirSync('./logs', { recursive: true });
|
}
|
logFile = fs.createWriteStream(`./logs/book-list-logs.log`, { flags: 'a', encoding: 'utf8' });
|
console.log = function (...text) {
|
text = `${new Date().toLocaleString()} ${text.join(' ') ?? ''}`;
|
_log(text);
|
logFile.write(text + '\n');
|
};
|
}
|
|
/**
|
* 获取随机值
|
* @param {number} min 最小值
|
* @param {number} max 最大值
|
* @returns 随机值
|
*/
|
function getRandomNumber(min, max) {
|
return Math.random() * (max - min) + min;
|
}
|
|
function sleep(ms) {
|
return new Promise(resolve => setTimeout(resolve, ms));
|
}
|
|
function genYears(start, end) {
|
return Array.from({ length: end - start + 1 }, (_, i) => start + i);
|
}
|
|
async function retry(func, maxTry = 3, delay = 3000) {
|
try {
|
return await func();
|
} catch (e) {
|
if (maxTry > 0) {
|
await sleep(delay);
|
return await retry(func, maxTry - 1, delay);
|
} else {
|
throw e;
|
}
|
}
|
}
|
|
async function getBookList(pageSize, page, code) {
|
const url = `https://archive.org/services/search/beta/page_production/?user_query=&page_type=collection_details&page_target=books&hits_per_page=${pageSize}&page=${page}&filter_map=%7B%22year%22%3A%7B%222023%22%3A%22gte%22%2C%222024%22%3A%22lte%22%7D%2C%22firstTitle%22%3A%7B%22${code}%22%3A%22inc%22%7D%7D&sort=titleSorter%3Aasc&aggregations=false&uid=R%3A1e845903aec74dee14bd-S%3A8cde5bf234b86bf96a75-P%3A1-K%3Ah-T%3A1718106108852`;
|
return await myAxios.get(url);
|
}
|
|
async function main() {
|
const years = genYears(1950, 2024);
|
const codeList = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"];
|
const bookList = [["Title", "Author", "Year", "Publisher", "ISBN"]];
|
try {
|
for (const year of years) {
|
for (const code of codeList) {
|
let page = 1;
|
const pageSize = 100;
|
let total = 0;
|
do {
|
console.log(`正在获取 ${year} 年 ${code} 分类 ${page} 页`);
|
const resp = await retry(() => getBookList(pageSize, page, code)).catch((e) => {
|
console.log(`获取失败:${year} 年 ${code} 分类 ${page} 页`);
|
});;
|
if (!resp) {
|
continue;
|
}
|
const { total: _total, hits } = resp.data.response.body.hits
|
total = _total;
|
for (const hit of hits) {
|
const { identifier, title, creator } = hit.fields
|
const author = creator?.join(", ");
|
console.log(`${identifier} | ${title} | ${author??''}`);
|
bookList.push([title, author, null, null, null]);
|
}
|
page++;
|
await sleep(getRandomNumber(3000, 80000));
|
|
} while (pageSize * page < total);
|
}
|
}
|
} catch (e) {
|
console.error(e);
|
}
|
if (bookList.length === 1) {
|
console.log(`获取完成,${years[0]}-${years[years.length - 1]} A-Z,获取到书籍 ${bookList.length - 1} 条`);
|
// 保存到Excel
|
const buffer = xlsx.build([{ name: "Sheet1", data: bookList }]);
|
fs.writeFileSync(`./书单 ${years[0]}-${years[years.length - 1]} A-Z.xlsx`, buffer, (err) => { });
|
} else {
|
console.log("没有获取到书籍");
|
}
|
}
|
|
|
initLogger();
|
main()
|
.finally(() => {
|
logFile.close();
|
});
|