From 4754ef3d4920642e7436945e1cef69b487da0e78 Mon Sep 17 00:00:00 2001 From: huzhengrong Date: Tue, 28 Oct 2025 10:28:41 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat(urtrust):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BC=97=E8=AF=9A=E4=BF=9D=E9=99=A9=E7=88=AC=E8=99=AB=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ecosystem.config.cjs | 16 ++++ service/urtrust.js | 190 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 service/urtrust.js diff --git a/ecosystem.config.cjs b/ecosystem.config.cjs index de4a37f..03d401f 100644 --- a/ecosystem.config.cjs +++ b/ecosystem.config.cjs @@ -81,5 +81,21 @@ module.exports = { log_file: "./logs/cic-combined.log", time: true, }, + {//众诚保险爬虫 + name: "urtrust-spider", + script: "./service/urtrust.js", + instances: 1, + autorestart: true, + watch: false, + max_memory_restart: "300M", + env: { + NODE_ENV: "production", + SPIDER_NAME: "urtrust", + }, + error_file: "./logs/urtrust-error.log", + out_file: "./logs/urtrust-out.log", + log_file: "./logs/urtrust-combined.log", + time: true, + }, ], }; diff --git a/service/urtrust.js b/service/urtrust.js new file mode 100644 index 0000000..120718c --- /dev/null +++ b/service/urtrust.js @@ -0,0 +1,190 @@ +// 众诚保险 +import axios from "axios"; +import fs from "fs"; +import path from "path"; +import { timestampToDate, loopCall, keywordsInclude } from "../utils.js"; +import config from "../config.js"; +import { SQLiteMessageQueue } from "../sqlite.js"; + +class PICC { + constructor() { + this.info = []; + console.log(`众诚保险 爬虫启动...`); + this.queue = new SQLiteMessageQueue(); + this.start(); + } + + async start() { + try { + await this.init(); + } catch (err) { + console.error(`启动失败:`, err); + } + } + async init() { + let announcements = this.queue.getAnnouncementsBySpider("众诚保险"); + if (announcements.length > 0) { + await this.increment(); + } else { + await this.fullFetch(); + } + } + // 全量爬取 + async fullFetch() { + console.log(`开始全量爬取...`); + try { + await loopCall(this.getInfo.bind(this), { + time: config.fullFetchTime, + pagenumber: 1, + stopWhen: (pagenumber, result) => { + return ( + pagenumber >= result.pages || pagenumber >= config.pageNumberLimit + ); + }, + readyForNext: (pagenumber, result) => { + this.info.push(...result.info); + return pagenumber + 1; + }, + complete: (result) => { + this.info.push(...result.info); + console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); + try { + if (this.info.length > 0) { + this.queue.saveAnnouncements("众诚保险", this.info); + // this.writeFile(this.info); + this.queue.addMessage("众诚保险", this.info); + } + } catch (error) { + console.error(`数据库操作失败:`, error); + } + }, + }); + } catch (error) { + console.error(`全量爬取失败:`, error); + } + console.log(`开始增量爬取...`); + this.increment(); + } + + // 增量爬取 + async increment() { + console.log(`-开始增量爬取模式,每5分钟检查一次新数据...`); + try { + await loopCall(this.getInfo.bind(this), { + time: config.incrementFetchTime, // 5分钟间隔 + pagenumber: 1, + readyForNext: (pagenumber, result) => { + try { + let newInfo = this.queue.filterNewAnnouncements( + "众诚保险", + result.info + ); + // 存在新数据 + if (newInfo.length > 0) { + console.log(`发现 ${newInfo.length} 条新数据`); + // this.info.push(...newInfo); + this.queue.saveAnnouncements("众诚保险", newInfo); + // this.writeFile(this.info); + this.queue.addMessage("众诚保险", newInfo); + // 全是新数据,继续下一页 + if (newInfo.length === result.info.length) { + return pagenumber + 1; + } else { + // 有部分重复数据,重新从第一页开始 + return 1; + } + } else { + console.log(`没有发现新数据,继续监控...`); + return 1; // 重新从第一页开始 + } + } catch (error) { + console.error(`数据库操作失败:`, error); + } + }, + }); + } catch (error) { + console.error(`增量爬取失败:`, error); + } + } + async getInfo(pagenumber = 1) { + let info = []; + console.log(`正在获取第 ${pagenumber} 页数据...`); + let result = await this.getList(pagenumber); + if (result[0]) { + // 出错, 记录错误日志 + console.error(`获取页面数据失败:`, result[0]); + return { pages: 0, info: [] }; + } else { + let total = result[1].data.totalCount; + let pages = Math.ceil(total / 10); + let arr = result[1].data.pageList; + + for (let i = 0; i < arr.length; i++) { + let item = arr[i]; + let endTime = null; + // 命中关键词 + if ( + keywordsInclude(item.articleTitle) + ) { + console.log(`处理项目:`, item.id, item.articleTitle); + info.push({ + id: item.id, + name: item.articleTitle, + publishTime: timestampToDate( + new Date(item.releaseTime).getTime(), + true + ), + endTime: endTime, + urls: `https://www.urtrust.com.cn${item.menuUrl}/#${item.url}`, + }); + } + } + return { pages, info }; + } + } + // 分页获取数据 + getList(pagenumber) { + return axios({ + url: "https://www.urtrust.com.cn/ow-cust-api/column/findArticleList?t=9-SJsUVh-LZotQ0Vy9i6U&5PDyYRnw=0vlUyYqlqWDHq5Uz6G99vC9P9iVbOuWOzV.RT70H49C3AGVwJNk_7LfhYRCLm_CJzBFZX00Hj7lcJNrCC4NeHlhvIAZjk8050ZMyfSw9d2tLT01bZauKxLkc7psII4VPq4UvSRsxbjYPq8pKK2oGxvUKEEziDo6ff", + data: { + menuCode: "010606", + currentPage: pagenumber, + pageSize: 10, + }, + method: "post", + // headers: { + // 'Accept': 'application/json, text/javascript, */*; q=0.01', + // 'Accept-Encoding': 'gzip, deflate, br, zstd', + // 'Accept-Language': 'zh-CN,zh;q=0.9', + // 'Connection': 'keep-alive', + // 'Content-Type': 'application/json; charset=UTF-8', + // 'Cookie': 'G_rbec_47_11_8080=22685.52745.19855.0000', + // 'Origin': 'https://ec.picc.com', + // 'Referer': 'https://ec.picc.com/cms/default/webfile/ywgg1/index.html', + // 'Sec-Fetch-Dest': 'empty', + // 'Sec-Fetch-Mode': 'cors', + // 'Sec-Fetch-Site': 'same-origin', + // 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36', + // 'X-Requested-With': 'XMLHttpRequest', + // 'Sec-Ch-Ua': '"Google Chrome";v="141", "Not?A_Brand";v="8", "Chromium";v="141"', + // 'Sec-Ch-Ua-Mobile': '?0', + // 'Sec-Ch-Ua-Platform': "macOS", + // } + }) + .then((res) => { + let result = res.data; + console.log("then",JSON.stringify(result)) + if (result.responseMsg === "成功" && result.responseCode === "0000") { + return [null, result]; + } else { + return ["err", null]; + } + }) + .catch((err) => { + console.log(`catch`, err) + return [err, null]; + }); + } +} + +new PICC();