feat(urtrust): 添加众诚保险爬虫功能

This commit is contained in:
huzhengrong 2025-10-28 10:28:41 +08:00
parent e4679caf0b
commit 4754ef3d49
2 changed files with 206 additions and 0 deletions

View File

@ -81,5 +81,21 @@ module.exports = {
log_file: "./logs/cic-combined.log", log_file: "./logs/cic-combined.log",
time: true, time: true,
}, },
{//众诚保险爬虫
name: "urtrust-spider",
script: "./service/urtrust.js",
instances: 1,
autorestart: true,
watch: false,
max_memory_restart: "300M",
env: {
NODE_ENV: "production",
SPIDER_NAME: "urtrust",
},
error_file: "./logs/urtrust-error.log",
out_file: "./logs/urtrust-out.log",
log_file: "./logs/urtrust-combined.log",
time: true,
},
], ],
}; };

190
service/urtrust.js Normal file
View File

@ -0,0 +1,190 @@
// 众诚保险
import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall, keywordsInclude } from "../utils.js";
import config from "../config.js";
import { SQLiteMessageQueue } from "../sqlite.js";
class PICC {
constructor() {
this.info = [];
console.log(`众诚保险 爬虫启动...`);
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error(`启动失败:`, err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("众诚保险");
if (announcements.length > 0) {
await this.increment();
} else {
await this.fullFetch();
}
}
// 全量爬取
async fullFetch() {
console.log(`开始全量爬取...`);
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
if (this.info.length > 0) {
this.queue.saveAnnouncements("众诚保险", this.info);
// this.writeFile(this.info);
this.queue.addMessage("众诚保险", this.info);
}
} catch (error) {
console.error(`数据库操作失败:`, error);
}
},
});
} catch (error) {
console.error(`全量爬取失败:`, error);
}
console.log(`开始增量爬取...`);
this.increment();
}
// 增量爬取
async increment() {
console.log(`-开始增量爬取模式每5分钟检查一次新数据...`);
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
"众诚保险",
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// this.info.push(...newInfo);
this.queue.saveAnnouncements("众诚保险", newInfo);
// this.writeFile(this.info);
this.queue.addMessage("众诚保险", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log(`没有发现新数据,继续监控...`);
return 1; // 重新从第一页开始
}
} catch (error) {
console.error(`数据库操作失败:`, error);
}
},
});
} catch (error) {
console.error(`增量爬取失败:`, error);
}
}
async getInfo(pagenumber = 1) {
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error(`获取页面数据失败:`, result[0]);
return { pages: 0, info: [] };
} else {
let total = result[1].data.totalCount;
let pages = Math.ceil(total / 10);
let arr = result[1].data.pageList;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
let endTime = null;
// 命中关键词
if (
keywordsInclude(item.articleTitle)
) {
console.log(`处理项目:`, item.id, item.articleTitle);
info.push({
id: item.id,
name: item.articleTitle,
publishTime: timestampToDate(
new Date(item.releaseTime).getTime(),
true
),
endTime: endTime,
urls: `https://www.urtrust.com.cn${item.menuUrl}/#${item.url}`,
});
}
}
return { pages, info };
}
}
// 分页获取数据
getList(pagenumber) {
return axios({
url: "https://www.urtrust.com.cn/ow-cust-api/column/findArticleList?t=9-SJsUVh-LZotQ0Vy9i6U&5PDyYRnw=0vlUyYqlqWDHq5Uz6G99vC9P9iVbOuWOzV.RT70H49C3AGVwJNk_7LfhYRCLm_CJzBFZX00Hj7lcJNrCC4NeHlhvIAZjk8050ZMyfSw9d2tLT01bZauKxLkc7psII4VPq4UvSRsxbjYPq8pKK2oGxvUKEEziDo6ff",
data: {
menuCode: "010606",
currentPage: pagenumber,
pageSize: 10,
},
method: "post",
// headers: {
// 'Accept': 'application/json, text/javascript, */*; q=0.01',
// 'Accept-Encoding': 'gzip, deflate, br, zstd',
// 'Accept-Language': 'zh-CN,zh;q=0.9',
// 'Connection': 'keep-alive',
// 'Content-Type': 'application/json; charset=UTF-8',
// 'Cookie': 'G_rbec_47_11_8080=22685.52745.19855.0000',
// 'Origin': 'https://ec.picc.com',
// 'Referer': 'https://ec.picc.com/cms/default/webfile/ywgg1/index.html',
// 'Sec-Fetch-Dest': 'empty',
// 'Sec-Fetch-Mode': 'cors',
// 'Sec-Fetch-Site': 'same-origin',
// 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
// 'X-Requested-With': 'XMLHttpRequest',
// 'Sec-Ch-Ua': '"Google Chrome";v="141", "Not?A_Brand";v="8", "Chromium";v="141"',
// 'Sec-Ch-Ua-Mobile': '?0',
// 'Sec-Ch-Ua-Platform': "macOS",
// }
})
.then((res) => {
let result = res.data;
console.log("then",JSON.stringify(result))
if (result.responseMsg === "成功" && result.responseCode === "0000") {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
console.log(`catch`, err)
return [err, null];
});
}
}
new PICC();