🐛 fix(ecosystem): 移除中国人寿保险爬虫配置和相关脚本文件

This commit is contained in:
huzhengrong 2025-10-28 17:04:16 +08:00
parent 41fc312702
commit 103c023a4f
2 changed files with 0 additions and 213 deletions

View File

@ -49,22 +49,6 @@ module.exports = {
log_file: "./logs/cpic-combined.log", log_file: "./logs/cpic-combined.log",
time: true, time: true,
}, },
// {//中国人寿保险爬虫
// name: "china-life-spider",
// script: "./service/china-life.js",
// instances: 1,
// autorestart: true,
// watch: false,
// max_memory_restart: "300M",
// env: {
// NODE_ENV: "production",
// SPIDER_NAME: "china-life",
// },
// error_file: "./logs/china-life-error.log",
// out_file: "./logs/china-life-out.log",
// log_file: "./logs/china-life-combined.log",
// time: true,
// },
{//中华保险爬虫 {//中华保险爬虫
name: "cic-spider", name: "cic-spider",
script: "./service/cic.js", script: "./service/cic.js",

View File

@ -1,197 +0,0 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall, keywordsInclude } from "../utils.js";
import config from "../config.js";
import { SQLiteMessageQueue } from "../sqlite.js";
import puppeteer from 'puppeteer';
class DF {
constructor() {
this.jsonMap = [
{
name: "中国人寿保险",
info: [],
options: {
name: "中国人寿保险",
url: "https://cpmsx.e-chinalife.com/xycms/#/procurementAnncmnt",
homeIndex: "",
},
},
];
console.log("中国人寿保险 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
for (let item of this.jsonMap) {
let announcements = this.queue.getAnnouncementsBySpider(item.name);
if (announcements.length > 0) {
this.loopFetchIncrement(item);
} else {
this.loopFetchFull(item);
}
}
}
// 全量爬取
loopFetchFull(props) {
try {
loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
additional: props.options,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
props.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
props.info.push(...result.info);
console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`);
try {
if (props.info.length > 0) {
this.queue.saveAnnouncements(props.name, props.info);
// this.writeFile(props);
this.queue.addMessage(props.name, props.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
this.loopFetchIncrement(props);
},
});
} catch (error) {
console.error(`${props.options.name}全量爬取失败:`, error);
}
}
loopFetchIncrement(props) {
try {
loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
additional: props.options,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
props.name,
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// props.info.push(...newInfo);
this.queue.saveAnnouncements(props.name, newInfo);
// this.writeFile(props);
this.queue.addMessage(props.name, newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error(`${props.options.name}增量爬取失败:`, error);
}
}
async getInfo(pagenumber = 1, config) {
let info = [];
console.log(`${config.name}--获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber, config);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0].status);
return { pages: 0, info: [] };
} else {
// 列表没有结束时间字段默认5页
let pages = 5;
let html = result[1];
let browser = result[2];
try{
const listData = await html.$$eval('.cardPadding', items => {
return items.map(item => {
console.log("item:", item);
let id = item.querySelector('a').innerHTML;
let name = item.querySelector('a').innerHTML;
let publishTime = item.querySelector('.releaseDate').innerHTML;
let endTime = null;
let urls = item.querySelector('a').getAttribute('href');
// if (
// keywordsInclude(name)
// ) {
console.log("处理项目:", id, name);
info.push({
id: id,
name: name,
publishTime: publishTime,
endTime: endTime,
urls: urls,
});
// }
});
});
} catch (error) {
console.log("getInfo失败:", error);
}
await browser.close();
return { pages, info };
}
}
// 分页获取数据
async getList(pagenumber, config) {
let url = config.url;
try{
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url)
console.log("页面加载完成-page",page);
// if (pagenumber != 1) {
// await page.locator('.el-pagination__jump input').fill(pagenumber)
// await page.keyboard.press('Enter');
// await page.waitForResponse('https://cpmsx.e-chinalife.com/ocps/ocps-anncmnt-backend/microapp/web/exp/outer/getCmsContentList');
// }
return [null,page,browser]
}catch (error) {
console.log("getList失败:", error);
return [error, null];
}
// return axios({
// url: url,
// method: "get",
// })
// .then((res) => {
// let result = res.data;
// return [null, result];
// })
// .catch((err) => {
// return [err, null];
// });
}
}
new DF();