import axios from "axios"; import fs from "fs"; import path from "path"; import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; import config from "./config.js"; import { SQLiteMessageQueue } from "./sqlite.js"; class PICC { constructor(key,categoryId) { this.key = key; this.categoryId = categoryId; this.info = []; console.log(`中国人民保险-${key} 爬虫启动...`); this.queue = new SQLiteMessageQueue(); this.start(); } async start() { try { await this.init(); } catch (err) { console.error(`启动失败:-${this.key}`, err); } } async init() { let announcements = this.queue.getAnnouncementsBySpider("中国人民保险"); if (announcements.length > 0) { await this.increment(); } else { await this.fullFetch(); } } // 全量爬取 async fullFetch() { console.log(`${this.key}-开始全量爬取...`); try { await loopCall(this.getInfo.bind(this), { time: config.fullFetchTime, pagenumber: 1, stopWhen: (pagenumber, result) => { return ( pagenumber >= result.pages || pagenumber >= config.pageNumberLimit ); }, readyForNext: (pagenumber, result) => { this.info.push(...result.info); return pagenumber + 1; }, complete: (result) => { this.info.push(...result.info); console.log(`${this.key}-爬取完成,共获取 ${this.info.length} 条有效数据`); try { if (this.info.length > 0) { this.queue.saveAnnouncements("中国人民保险", this.info); // this.writeFile(this.info); this.queue.addMessage("中国人民保险", this.info); } } catch (error) { console.error(`${this.key}-数据库操作失败:`, error); } // 全量爬取完成后,开始增量爬取 this.increment(); }, }); } catch (error) { console.error(`${this.key}-全量爬取失败:`, error); } } // 增量爬取 async increment() { console.log(`${this.key}-开始增量爬取模式,每天9点检查一次新数据...`); // 计算到明天9点的时间间隔 const now = new Date(); const nextRun = new Date(); nextRun.setHours(9, 0, 0, 0); // 设置为今天的9点 // 如果当前时间已经过了今天的9点,则设置为明天的9点 if (now > nextRun) { nextRun.setDate(nextRun.getDate() + 1); } const timeUntilNextRun = nextRun - now; console.log(`${this.key}-下次执行时间: ${nextRun.toString()}`); // 使用setTimeout等待到下次执行时间 setTimeout(async () => { try { console.log("setTimeout-增量执行启动"); // 网络连接测试 await this.testNetworkConnection(); await this.executeIncrement(); // 执行完后,设置每天重复执行 this.scheduleDailyIncrement(); } catch (error) { console.error(`${this.key}-增量爬取失败:`, error.message); // 即使出错也继续安排下一次执行 this.scheduleDailyIncrement(); } }, timeUntilNextRun); } // 测试网络连接 async testNetworkConnection() { try { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 5000); // 5秒超时 const response = await fetch('https://ec.picc.com', { method: 'HEAD', signal: controller.signal, mode: 'no-cors' // 不需要CORS,因为我们只是测试连接 }); clearTimeout(timeoutId); console.log(`${this.key}-网络连接测试通过`); return true; } catch (error) { console.warn(`${this.key}-网络连接测试失败:`, error.message); // 即使网络测试失败,我们也继续执行,让后续的请求自己处理错误 return false; } } // 执行增量爬取的具体逻辑 async executeIncrement() { console.log(`${this.key}-开始执行增量爬取...`); try { const result = await this.getInfo(1); // 检查结果是否有效 if (!result || result[0]) { console.log(`${this.key}-获取数据失败:`, result ? result[0] : "未知错误"); return; } try { let newInfo = this.queue.filterNewAnnouncements( "中国人民保险", result.info ); // 存在新数据 if (newInfo.length > 0) { console.log(`${this.key}-发现 ${newInfo.length} 条新数据`); this.queue.saveAnnouncements("中国人民保险", newInfo); this.queue.addMessage("中国人民保险", newInfo); } else { console.log(`${this.key}-没有发现新数据,继续监控...`); } } catch (error) { console.error(`${this.key}-数据库操作失败:`, error); } } catch (error) { console.error(`${this.key}-获取数据失败:`, error.message); // 根据错误类型给出具体提示 if (error.code === 'ENOTFOUND') { console.log(`${this.key}-DNS解析失败,请检查网络连接或域名是否正确`); } else if (error.code === 'ECONNREFUSED') { console.log(`${this.key}-连接被拒绝,请检查服务器是否正常运行`); } else if (error.code === 'ECONNRESET') { console.log(`${this.key}-连接被重置,请稍后重试`); } else if (error.code === 'ETIMEDOUT') { console.log(`${this.key}-请求超时,请检查网络连接`); } } } // 设置每天9点重复执行 scheduleDailyIncrement() { // 每天间隔24小时执行一次 setInterval(async () => { try { await this.executeIncrement(); } catch (error) { console.error(`${this.key}-定时增量爬取失败:`, error); } }, 24 * 60 * 60 * 1000); // 24小时 console.log(`${this.key}-已设置每天9点执行增量爬取`); } async getInfo(pagenumber = 1) { let info = []; console.log(`${this.key}-正在获取第 ${pagenumber} 页数据...`); let result = await this.getList(pagenumber); if (result[0]) { // 出错, 记录错误日志 console.error(`${this.key}-获取页面数据失败:`, result[0]); return { pages: 0, info: [] }; } else { let total = result[1].res.total; let pages = Math.ceil(total / 10); let arr = result[1].res.rows; for (let i = 0; i < arr.length; i++) { let item = arr[i]; let endTime = timestampToDate( new Date(item.tenderFileSaleEndTime).getTime(), true ); // 命中关键词 if ( keywordsInclude(item.title) && endTime && +new Date(endTime) >= Date.now() ) { // console.log("处理项目:", item.sourcingId, item.title); info.push({ id: item.url, name: item.title, publishTime: timestampToDate( new Date(item.tenderFileSaleBeginTime).getTime(), true ), endTime: endTime, urls: `https://ec.picc.com/cms/default/webfile${item.url}`, }); } } return { pages, info }; } } // 分页获取数据 getList(pagenumber) { return axios({ url: "https://ec.picc.com/cms/api/dynamicData/queryContentPage", data: { dto:{ categoryId:this.categoryId, city:"", county:"", purchaseMode:"", siteId:"725" }, pageNo: pagenumber, pageSize: 10, }, method: "post", headers: { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'keep-alive', 'Content-Type': 'application/json; charset=UTF-8', 'Cookie': 'G_rbec_47_11_8080=22685.52745.19855.0000', 'Origin': 'https://ec.picc.com', 'Referer': 'https://ec.picc.com/cms/default/webfile/ywgg1/index.html', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Sec-Ch-Ua': '"Google Chrome";v="141", "Not?A_Brand";v="8", "Chromium";v="141"', 'Sec-Ch-Ua-Mobile': '?0', 'Sec-Ch-Ua-Platform': "macOS", }, timeout: 10000, }) .then((res) => { let result = res.data; console.log(`${this.key}-then`,JSON.stringify(result.res.rows.map(item=>item.title), null, 2)) if (result.msg === "操作成功" && result.code === 0) { return [null, result]; } else { return ["err", null]; } }) .catch((err) => { console.log(`${this.key}-catch`, err.message); if (err.code === 'ENOTFOUND') { console.log(`${this.key}-DNS解析失败,请检查网络连接或域名是否正确`); } else if (err.code === 'ECONNREFUSED') { console.log(`${this.key}-连接被拒绝,请检查服务器是否正常运行`); } else if (err.code === 'ECONNRESET') { console.log(`${this.key}-连接被重置,请稍后重试`); } else if (err.code === 'ETIMEDOUT') { console.log(`${this.key}-请求超时,请检查网络连接`); } return [err, null]; }); } } new PICC("集中采购","211,213,214,215,216,217"); new PICC("分散采购","251,253,254,255,256,257");