insurance-spider/geely.js

238 lines
7.1 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
// import cheerio from "cheerio";
// import { messageQueue } from "./msgManager.js";
class GEELY {
constructor() {
this.url = "https://glzb.geely.com/gpmp/notice/listnotice";
// this.filepath = path.resolve("geely.json");
this.info = [];
console.log("GEELY 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("吉利");
if (announcements.length > 0) {
await this.increment();
} else {
await this.fullFetch();
}
// if (fs.existsSync(this.filepath)) {
// let data = fs.readFileSync(this.filepath, "utf-8");
// this.info = data ? JSON.parse(data) : [];
// if (this.info.length > 0) {
// await this.increment();
// } else {
// await this.fullFetch();
// }
// } else {
// console.log("历史文件不存在,开始全量爬取");
// await this.fullFetch();
// }
}
// 全量爬取
async fullFetch() {
console.log("开始全量爬取...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
); // 限制最多2页用于测试
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
this.queue.saveAnnouncements("吉利", this.info);
// this.writeFile(this.info);
this.queue.addMessage("吉利", this.info);
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("全量爬取失败:", error);
}
console.log("开始增量爬取...");
this.increment();
}
// 增量爬取
async increment() {
console.log("开始增量爬取模式每5分钟检查一次新数据...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
"吉利",
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
this.queue.saveAnnouncements("吉利", newInfo);
this.queue.addMessage("吉利", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("增量爬取失败:", error);
}
}
// 传入页码获取数据
async getInfo(pagenumber = 1) {
let today = new Date().setHours(0, 0, 0, 0);
let beforeOneMonth = today - 30 * 24 * 60 * 60 * 1000;
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 0, info: [] };
} else {
let total = result[1].data.total;
let pages = Math.ceil(total / 20);
let arr = result[1].data.items;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
if (item.endtime >= today && item.publishtime >= beforeOneMonth) {
console.log("处理项目:", item.pjtnoticeid, item.pjtnoticename);
let noticeRes = await this.getNoticeUrl(item.pjtnoticeid);
if (noticeRes[0]) {
// 获取招标公告内容报错
console.error("获取公告详情失败:", noticeRes[0]);
} else {
info.push({
id: item.pjtnoticeid,
name: item.pjtnoticename,
publishTime: timestampToDate(item.publishtime),
endTime: timestampToDate(item.endtime),
urls: noticeRes[1],
});
}
}
}
return { pages, info };
}
}
getList(pagenumber) {
return axios({
url: this.url,
params: {
pagesize: 20,
pagenumber: pagenumber,
publishstatus: 2,
bidcategoryid: 1442,
iflongpro: 0,
_: Date.now(),
},
method: "get",
})
.then((res) => {
let result = res.data;
if (result.code === "success") {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
return [err, null];
});
}
getNoticeUrl(id) {
let timestamp = Date.now();
return axios({
url: `https://glzb.geely.com/gpmp/notice/query?_=${timestamp}&pjtnoticeid=${id}`,
method: "get",
})
.then((res) => {
let result = res.data;
if (result.code === "success") {
let promises = [];
for (let item of result.data.attachs) {
let params = {
name: item.attachname,
downloadUrl: item.downloadUrl,
previewUrl: item.previewUrl,
attachname: item.attachname,
_: Date.now(),
};
promises.push(
axios({
url: `https://glzb.geely.com/pub/file/info/preview`,
method: "get",
params,
})
);
}
return Promise.allSettled(promises).then((results) => {
let urls = [];
results.forEach((result) => {
if (
result.status === "fulfilled" &&
result.value.data.code === "success"
) {
urls.push(result.value.data.data);
}
});
return [null, urls];
});
} else {
return ["err", null];
}
})
.catch((err) => {
console.log("err:", err);
return [err, null];
});
}
// writeFile(info) {
// fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8");
// }
}
new GEELY();