import xlsx from "node-xlsx";
|
import * as fs from 'fs';
|
import * as path from 'path';
|
|
function split() {
|
const workSheets = xlsx.parse('./书单200万/books-0-100w.xlsx');
|
const sheet = workSheets[0];
|
const header = sheet.data.shift();
|
const size = 100000;
|
for (let i = 0; i < Math.ceil(sheet.data.length / size); i++) {
|
const datas = sheet.data.slice(i * size, i * size + size).map(row => [row[0]]);
|
const buffer = xlsx.build([{ name: "Sheet1", data: datas }]);
|
fs.writeFileSync('./split-excel/' + i + ".xlsx", buffer, (err) => { });
|
}
|
}
|
// split();
|
|
/* const files = fs.readFileSync('D:/out.txt', 'utf8').split('\n');
|
|
const workSheets = xlsx.parse('C:/Users/lyg/Documents/WeChat Files/wxid_e8swvjxjvxz321/FileStorage/File/2024-07/【反馈客户】7月批次书单 - 已撞库.xlsx');
|
const datas = workSheets[0].data.slice(1);
|
const isbnList = [];
|
const idMap = {};
|
for (const file of files) {
|
const [id,ext]=file.split('.');
|
idMap[id] = file;
|
}
|
const result = [workSheets[0].data[0]];
|
for (const row of datas) {
|
const id = `${row[0]}`;
|
if (idMap[id]) {
|
result.push(row);
|
row[12]=idMap[id];
|
}
|
row[1] = `${row[1]}`;
|
isbnList.push(row[1]);
|
}
|
const buffer = xlsx.build([{ name: 'Sheet1', data: result }]);
|
fs.writeFileSync('./【反馈客户】7月批次书单 - 已撞库 - 已下载书单2.xlsx', buffer, (err) => { });
|
fs.writeFileSync('./isbn.txt', isbnList.join('\n'), (err) => { }); */
|
|
/*
|
import axios from "axios";
|
import { HttpsProxyAgent } from "https-proxy-agent";
|
async function retry(func, maxTry = 3) {
|
try {
|
return await func();
|
} catch (e) {
|
if (maxTry > 0) {
|
return await retry(func, maxTry - 1);
|
} else {
|
throw e;
|
}
|
}
|
}
|
const httpsAgent = new HttpsProxyAgent(`http://127.0.0.1:10809`);
|
const myAxios = axios.create({
|
proxy: false,
|
httpsAgent,
|
});
|
const text = fs.readFileSync("./urls.txt", 'utf8');
|
const urls = text.split('\n').slice(0,400);
|
const urlGroup = [];
|
for (let i = 0; i < urls.length; i += 10) {
|
urlGroup.push(urls.slice(i, i + 10));
|
}
|
const failedUrls = [];
|
for (const group of urlGroup) {
|
|
console.log();
|
await Promise.all(group.map((url) => retry(async () => {
|
const resp = await myAxios.get(url, { responseType: 'arraybuffer', timeout: 10000 });
|
const buffer = Buffer.from(resp.data, 'binary');
|
const filename = path.basename(url);
|
fs.writeFileSync(`./torrents/${filename}`, buffer, (err) => { });
|
}, 3).catch((e) => {
|
console.error(e);
|
failedUrls.push(url);
|
})
|
));
|
}
|
fs.writeFileSync('./failed-urls.txt', failedUrls.join('\n'), (err) => { }); */
|
|
|
function excelResolve() {
|
const sheet1 = xlsx.parse("D:\\书单\\7月 - 已撞库.notmatch.xlsx.result.xlsx")[0];
|
const sheet2 = xlsx.parse("D:\\书单\\booklist\\已下载图书-1722496661396.xlsx")[0];
|
const map = {};
|
const data = [];
|
sheet1.data.shift();
|
sheet2.data.shift();
|
for (const row of sheet2.data) {
|
map[row[0] + ''] = row;
|
}
|
for (const row of sheet1.data) {
|
if (!map[row[0] + '']) {
|
data.push(row);
|
}
|
}
|
const buffer = xlsx.build([{ name: "Sheet1", data: data }]);
|
fs.writeFileSync('./nofile-books2.xlsx', buffer, (err) => { });
|
}
|
|
// excelResolve();
|
function test() {
|
const ids1 = Array.from(new Set(fs.readFileSync('D:\\书单\\ids.txt', 'utf8').replace(/\r/g, '').split('\n')));
|
const ids2 = Array.from(new Set(fs.readFileSync('D:\\书单\\ids2.txt', 'utf8').replace(/\r/g, '').split('\n')));
|
|
const map = {};
|
for (const id of ids2) {
|
map[id] = true;
|
}
|
const set = new Set();
|
ids1.forEach((id) => {
|
if (!map[id]) {
|
set.add(id);
|
}
|
});
|
const result = Array.from(set);
|
fs.writeFileSync('D:\\书单\\ids4.txt', result.join('\n'), (err) => { });
|
|
}
|
|
// test();
|
|
const result = [];
|
function listFile() {
|
const dstPath = 'f:/book3';
|
const dir = 'E:/books';
|
const ids = fs.readFileSync("ids3.txt", 'utf8').split("\n");
|
fs.readdirSync(dir).forEach((file) => {
|
if (!ids.length) { return; }
|
const filePath = path.join(dir, file);
|
if (fs.statSync(filePath).isFile()) {
|
return;
|
}
|
fs.readdirSync(filePath).forEach((file2) => {
|
if (!ids.length) { return; }
|
const ext = file2.split(".")[1];
|
const id = ids.shift();
|
const fileName = `${id}.${ext}`;
|
fs.renameSync(path.join(filePath, file2), path.join(dstPath, fileName));
|
result.push([id, fileName]);
|
});
|
});
|
}
|
// listFile();
|
// const buf = xlsx.build([{ name: "Sheet1", data: result }]);
|
// fs.writeFileSync("result.xlsx", buf);
|
|
function mergeExcel() {
|
const sheet0 = xlsx.parse("D:\\书单\\【反馈客户】7月批次书单 - 已撞库.xlsx")[0];
|
const sheet1 = xlsx.parse("D:\\书单\\清单第二批0723-已撞库.xlsx")[0];
|
const sheet2 = xlsx.parse("D:\\书单\\booklist\\7月书单已下载4.xlsx")[0];
|
const fileNames = fs.readFileSync("7filenames4.txt", 'utf8').replace(/\r/g, '').split('\n');
|
const fileMap = {};
|
for (const name of fileNames) {
|
const id = name.split('.')[0];
|
if (!fileMap[id]) {
|
fileMap[id] = [];
|
}
|
fileMap[id].push(name);
|
}
|
sheet0.data.shift();
|
sheet1.data.shift();
|
const map = {};
|
for (const row of sheet1.data.concat(sheet0.data)) {
|
if (!map[row[0] + '']) {
|
map[row[0] + ''] = [];
|
}
|
map[row[0] + ''].push(row);
|
}
|
const header = sheet2.data.shift();
|
const data = [header];
|
for (const row of sheet2.data) {
|
if (map[row[0] + '']?.length) {
|
const row2 = map[row[0] + ''].shift();
|
row[1] = row2[2];
|
row[2] = row2[4];
|
row[3] = row2[1];
|
if (fileMap[row[0] + '']?.length) {
|
row[4] = fileMap[row[0] + ''].shift();
|
}
|
if (row[4]) {
|
data.push(row);
|
}
|
} else {
|
}
|
}
|
|
const buffer = xlsx.build([{ name: "Sheet1", data }]);
|
fs.writeFileSync("D:\\书单\\7月已下载书单4.xlsx", buffer);
|
}
|
|
// mergeExcel();
|
|
|
function genExcel() {
|
const sheet1 = xlsx.parse("D:\\书单\\7月.xlsx")[0];
|
const idMap = {};
|
fs.readFileSync('D:/书单/ids4.txt', 'utf8').replace(/\r/g, '').split('\n').forEach((id) => {
|
idMap[id] = true;
|
});
|
const data = [sheet1.data.shift()];
|
for (const row of sheet1.data) {
|
if (idMap[row[0] + '']) {
|
data.push(row);
|
}
|
}
|
const buffer = xlsx.build([{ name: "Sheet1", data }]);
|
fs.writeFileSync("D:\\书单\\7月未下载书单.xlsx", buffer);
|
}
|
|
// genExcel();
|
|
function genDownloadExcel(src, dst, filenames, matchColIdx) {
|
|
const sheet1 = xlsx.parse(src)[0];
|
const idMap = new Map();
|
fs.readFileSync(filenames, 'utf8').replace(/\r/g, '').split('\n').forEach((id) => {
|
idMap.set(id, true);
|
});
|
const data = [sheet1.data.shift()];
|
for (const row of sheet1.data) {
|
if (idMap.has(row[matchColIdx] + '')) {
|
data.push(row);
|
}
|
}
|
const buffer = xlsx.build([{ name: "Sheet1", data }]);
|
fs.writeFileSync(dst, buffer);
|
}
|
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2【889200】.已匹配.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单3【306455】.已匹配.xlsx",
|
// "D:/书单/8月/书单3【306455】.已匹配.已下载.xlsx",
|
// "D:/书单/8月/书单3【306455】.已匹配.已下载.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/1清单第二批0723-已撞库-zlib.result.xlsx",
|
// "D:/书单/1清单第二批0723-已撞库-zlib.已下载.xlsx",
|
// "D:/书单/7月已下载文件.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/1【反馈客户】7月批次书单 - 已撞库-zlib.result.xlsx",
|
// "D:/书单/1【反馈客户】7月批次书单 - 已撞库-zlib.已下载.xlsx",
|
// "D:/书单/7月已下载文件.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载5.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件5.txt",
|
// 4);
|
|
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载6.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件6.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载7.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件7.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载8.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件8.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载9.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件9.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载10.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件10.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载11.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件11.txt",
|
// 4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
// "D:/书单/8月/书单1-2【889200】.已匹配.已下载【35766】12.xlsx",
|
// "D:/书单/8月/8月书单1和2已下载文件【35766】12.txt",
|
// 4);
|
genDownloadExcel(
|
"D:/书单/8月/书单1-2.xlsx.result.xlsx.result.xlsx",
|
"D:/书单/8月/书单1-2【889200】.已匹配.已下载【37093】13.xlsx",
|
"D:/书单/8月/书单1-2【889200】.已匹配.已下载【37093】13.txt",
|
4);
|
// genDownloadExcel(
|
// "D:/书单/8月/书单-929648本.xlsx",
|
// "D:/书单/8月/书单-929648本.已匹配.已下载【491122】.xlsx",
|
// "D:/书单/8月/书单-929648本.已下载【491122】.txt",
|
// 4);
|
|
// genDownloadExcel(
|
// "D:/书单/8月/书单-929648本.xlsx",
|
// "D:/书单/8月/书单-929648本.已匹配.已下载【359949】.xlsx",
|
// "D:/书单/8月/书单-929648本.已下载【359949】.txt",
|
// 4);
|