From 55db56968004c60f3b28f0d649561d64069c1f15 Mon Sep 17 00:00:00 2001 From: huzhengrong Date: Thu, 23 Oct 2025 14:32:07 +0800 Subject: [PATCH] Refactor: Remove unused files and classes related to various crawlers (NIO, PICC, stats, Third, YiQi, YouZhiCai) to streamline the codebase. Update logging in PICC for better debugging. --- byd.js | 173 ---------------------- changan.js | 188 ------------------------ chery.js | 251 ------------------------------- df.js | 187 ------------------------ geely.js | 237 ------------------------------ greatWall.js | 234 ----------------------------- leapMotor.js | 193 ------------------------ nio.js | 170 --------------------- picc.js | 2 +- stats.js | 80 ---------- third.js | 309 --------------------------------------- yiqi.js | 199 ------------------------- youzhicai.js | 406 --------------------------------------------------- 13 files changed, 1 insertion(+), 2628 deletions(-) delete mode 100644 byd.js delete mode 100644 changan.js delete mode 100644 chery.js delete mode 100644 df.js delete mode 100644 geely.js delete mode 100644 greatWall.js delete mode 100644 leapMotor.js delete mode 100644 nio.js delete mode 100644 stats.js delete mode 100644 third.js delete mode 100644 yiqi.js delete mode 100644 youzhicai.js diff --git a/byd.js b/byd.js deleted file mode 100644 index 3ef15ee..0000000 --- a/byd.js +++ /dev/null @@ -1,173 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; - -class BYD { - constructor() { - this.info = []; - console.log("比亚迪 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("比亚迪"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - if (this.info.length > 0) { - this.queue.saveAnnouncements("比亚迪", this.info); - // this.writeFile(this.info); - this.queue.addMessage("比亚迪", this.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "比亚迪", - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // this.info.push(...newInfo); - this.queue.saveAnnouncements("比亚迪", newInfo); - // this.writeFile(this.info); - this.queue.addMessage("比亚迪", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - let total = result[1].data.total; - let pages = Math.ceil(total / 10); - let arr = result[1].data.records; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - let endTime = timestampToDate( - new Date(item.signUpEndTime).getTime(), - true - ); - // 命中关键词 - if ( - keywordsInclude(item.title) && - endTime && - +new Date(endTime) >= Date.now() - ) { - // console.log("处理项目:", item.sourcingId, item.title); - info.push({ - id: item.sourcingId, - name: item.title, - publishTime: timestampToDate( - new Date(item.tenderNoticePublishTime).getTime(), - true - ), - endTime: endTime, - urls: `https://spcn.byd.com/#/tender-detail?sourcingId=${item.sourcingId}`, - }); - } - } - return { pages, info }; - } - } - // 分页获取数据 - getList(pagenumber) { - return axios({ - url: "https://spcn.byd.com/api/srm-sou-sp/supplier/supplier/getTenderAnnouncementInfo", - data: { - pageNo: pagenumber, - pageSize: 10, - }, - method: "post", - }) - .then((res) => { - let result = res.data; - if (result.msg === "成功" && result.code === "000000") { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } -} - -new BYD(); diff --git a/changan.js b/changan.js deleted file mode 100644 index c47b911..0000000 --- a/changan.js +++ /dev/null @@ -1,188 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { - timestampToDate, - loopCall, - keywordsInclude, - // addToMessageQueue, -} from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import { messageQueue } from "./msgManager.js"; -// import cheerio from "cheerio"; - -class ChangAn { - constructor() { - // this.filepath = path.resolve("changan.json"); - this.info = []; - console.log("长安 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("长安"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - - // if (fs.existsSync(this.filepath)) { - // let data = fs.readFileSync(this.filepath, "utf-8"); - // this.info = data ? JSON.parse(data) : []; - // if (this.info.length > 0) { - // await this.increment(); - // } else { - // await this.fullFetch(); - // } - // } else { - // console.log("历史文件不存在,开始全量爬取"); - // await this.fullFetch(); - // } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements("长安", this.info); - // this.writeFile(this.info); - this.queue.addMessage("长安", this.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "长安", - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // this.info.push(...newInfo); - this.queue.saveAnnouncements("长安", newInfo); - // this.writeFile(this.info); - this.queue.addMessage("长安", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - // let total = result[1].result.total; - let pages = result[1].result.pages; - let arr = result[1].result.records; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - // 命中关键词 - if (keywordsInclude(item.projectName)) { - console.log("处理项目:", item.id, item.projectName); - info.push({ - id: item.id, - name: item.projectName, - publishTime: item.startTime, - endTime: item.endTime, - urls: `https://portal.changan.com.cn/noProdNoticeInfo?_t=${Date.now()}&id=${ - item.id - }`, - }); - } - } - return { pages, info }; - } - } - // 分页获取数据 - getList(pagenumber) { - return axios({ - url: "https://portal.changan.com.cn/backend_8086/changan_platform/api/nonPdcSourceNoticeCt/listSourceNoticePageBySupplier", - params: { - _t: Date.now(), - pageNo: pagenumber, - pageSize: 20, - }, - method: "get", - }) - .then((res) => { - let result = res.data; - if (result.success) { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - // writeFile(info) { - // fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8"); - // } -} - -new ChangAn(); diff --git a/chery.js b/chery.js deleted file mode 100644 index bad96ed..0000000 --- a/chery.js +++ /dev/null @@ -1,251 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { - timestampToDate, - loopCall, - keywordsInclude, - // addToMessageQueue, -} from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import { messageQueue } from "./msgManager.js"; -// import cheerio from "cheerio"; - -class Chery { - constructor() { - this.jsonMap = [ - { - name: "奇瑞采购公告", - // filepath: path.resolve("chery_cg.json"), - info: [], - options: { - name: "采购公告", - url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage", - categoryId: "5035", - siteId: "747", - }, - }, - { - name: "奇瑞寻源预告", - // filepath: path.resolve("chery_xy.json"), - info: [], - options: { - name: "寻源预告", - url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage", - categoryId: "965901485789413376", - siteId: "747", - }, - }, - { - name: "奇瑞变更公告", - // filepath: path.resolve("chery_bg.json"), - info: [], - options: { - name: "变更公告", - url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage", - categoryId: "5032", - siteId: "747", - }, - }, - ]; - console.log("奇瑞 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - // if (fs.existsSync(item.filepath)) { - // let data = fs.readFileSync(item.filepath, "utf-8"); - // item.info = data ? JSON.parse(data) : []; - // if (item.info.length > 0) { - // // await this.increment(item); - // console.log(`${item.name} 历史文件存在,开始增量爬取`); - // this.loopFetchIncrement(item); - // } else { - // this.loopFetchFull(item); - // } - // } else { - // console.log(`${item.name}历史文件不存在,开始全量爬取`); - // this.loopFetchFull(item); - // } - } - } - // 全量爬取 - loopFetchFull(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - additional: props.options, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements(props.name, props.info); - // this.writeFile(props); - this.queue.addMessage(props.name, props.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`奇瑞${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`奇瑞${props.options.name}增量爬取失败:`, error); - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 30, info: [] }; - } else { - let pages = 30; - let arr = result[1].res.rows; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - let endTime, publishTime; - if (config.categoryId === "965901485789413376") { - publishTime = item.publishDate.replace("T", " ").split(".")[0]; - endTime = this.extractDeadlineTime(item.text); - } else { - endTime = item.signUpEndTime.replace("T", " ").split(".")[0]; - publishTime = item.signUpBeginTime.replace("T", " ").split(".")[0]; - } - // 命中关键词 - if ( - endTime && - keywordsInclude(item.title) && - +new Date(endTime) >= Date.now() - ) { - // console.log("处理项目:", item.id, item.projectName); - info.push({ - id: item.url, - name: item.title, - publishTime: publishTime, - endTime: endTime, - urls: `https://ebd.mychery.com/cms` + item.url, - }); - } - } - return { pages, info }; - } - } - // 分页获取数据 - getList(pagenumber, config) { - return axios({ - url: config.url, - data: { - dto: { - bidType: "", - categoryId: config.categoryId, - city: "", - county: "", - province: "", - purchaseMode: "", - secondCompanyId: "", - siteId: config.siteId, - }, - pageNo: pagenumber, - pageSize: "10", - }, - method: "post", - }) - .then((res) => { - let result = res.data; - if (result.code === 0) { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - // writeFile(props) { - // fs.writeFileSync(props.filepath, JSON.stringify(props.info), "utf-8"); - // } - - extractDeadlineTime(html) { - // 匹配"预告报名截止时间:"后面的时间格式 - const regex = /预告报名截止时间:(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/; - const match = html.match(regex); - - if (match) { - return match[1]; - } - - return null; - } -} - -new Chery(); diff --git a/df.js b/df.js deleted file mode 100644 index 7104a44..0000000 --- a/df.js +++ /dev/null @@ -1,187 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -import * as cheerio from "cheerio"; - -class DF { - constructor() { - this.jsonMap = [ - { - name: "东风【招标采购】", - info: [], - options: { - name: "东风【招标采购】", - url: "https://etp.dfmc.com.cn/jyxx/004001/", - homeIndex: "trade_info_new.html", - }, - }, - { - name: "东风【非招标采购】", - info: [], - options: { - name: "东风【非招标采购】", - url: "https://etp.dfmc.com.cn/jyxx/004002/", - homeIndex: "trade_info_newf.html", - }, - }, - ]; - console.log("东风 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - } - } - // 全量爬取 - loopFetchFull(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - additional: props.options, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - if (props.info.length > 0) { - this.queue.saveAnnouncements(props.name, props.info); - // this.writeFile(props); - this.queue.addMessage(props.name, props.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`${props.options.name}增量爬取失败:`, error); - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0].status); - return { pages: 0, info: [] }; - } else { - // 第六页开始就要验证码了 - let pages = 5; - let html = result[1]; - const $ = cheerio.load(html); - $(".public-table tbody tr").each((index, element) => { - let id = $(element).find("td:nth-child(3)").text(); - let name = $(element).find("a").text(); - let publishTime = $(element).find("td:nth-child(6)").text(); - let endTime = $(element).find("td:nth-child(5)").text(); - let urls = - "https://etp.dfmc.com.cn" + $(element).find("a").attr("href"); - if ( - endTime && - +new Date(endTime) >= Date.now() && - keywordsInclude(name) - ) { - console.log("处理项目:", id, name); - info.push({ - id: id, - name: name, - publishTime: publishTime, - endTime: endTime, - urls: urls, - }); - } - }); - return { pages, info }; - } - } - // 分页获取数据 - getList(pagenumber, config) { - let url = config.url; - if (pagenumber === 1) { - url += config.homeIndex; - } else { - url += `${pagenumber}.html`; - } - return axios({ - url: url, - method: "get", - }) - .then((res) => { - let result = res.data; - return [null, result]; - }) - .catch((err) => { - return [err, null]; - }); - } -} - -new DF(); diff --git a/geely.js b/geely.js deleted file mode 100644 index f71479e..0000000 --- a/geely.js +++ /dev/null @@ -1,237 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import cheerio from "cheerio"; -// import { messageQueue } from "./msgManager.js"; - -class GEELY { - constructor() { - this.url = "https://glzb.geely.com/gpmp/notice/listnotice"; - // this.filepath = path.resolve("geely.json"); - this.info = []; - console.log("GEELY 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("吉利"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - // if (fs.existsSync(this.filepath)) { - // let data = fs.readFileSync(this.filepath, "utf-8"); - // this.info = data ? JSON.parse(data) : []; - // if (this.info.length > 0) { - // await this.increment(); - // } else { - // await this.fullFetch(); - // } - // } else { - // console.log("历史文件不存在,开始全量爬取"); - // await this.fullFetch(); - // } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); // 限制最多2页用于测试 - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements("吉利", this.info); - // this.writeFile(this.info); - this.queue.addMessage("吉利", this.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "吉利", - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - this.queue.saveAnnouncements("吉利", newInfo); - this.queue.addMessage("吉利", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - // 传入页码获取数据 - async getInfo(pagenumber = 1) { - let today = new Date().setHours(0, 0, 0, 0); - let beforeOneMonth = today - 30 * 24 * 60 * 60 * 1000; - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - let total = result[1].data.total; - let pages = Math.ceil(total / 20); - let arr = result[1].data.items; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - if (item.endtime >= today && item.publishtime >= beforeOneMonth) { - console.log("处理项目:", item.pjtnoticeid, item.pjtnoticename); - let noticeRes = await this.getNoticeUrl(item.pjtnoticeid); - if (noticeRes[0]) { - // 获取招标公告内容报错 - console.error("获取公告详情失败:", noticeRes[0]); - } else { - info.push({ - id: item.pjtnoticeid, - name: item.pjtnoticename, - publishTime: timestampToDate(item.publishtime), - endTime: timestampToDate(item.endtime), - urls: noticeRes[1], - }); - } - } - } - return { pages, info }; - } - } - getList(pagenumber) { - return axios({ - url: this.url, - params: { - pagesize: 20, - pagenumber: pagenumber, - publishstatus: 2, - bidcategoryid: 1442, - iflongpro: 0, - _: Date.now(), - }, - method: "get", - }) - .then((res) => { - let result = res.data; - if (result.code === "success") { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - getNoticeUrl(id) { - let timestamp = Date.now(); - return axios({ - url: `https://glzb.geely.com/gpmp/notice/query?_=${timestamp}&pjtnoticeid=${id}`, - method: "get", - }) - .then((res) => { - let result = res.data; - if (result.code === "success") { - let promises = []; - for (let item of result.data.attachs) { - let params = { - name: item.attachname, - downloadUrl: item.downloadUrl, - previewUrl: item.previewUrl, - attachname: item.attachname, - _: Date.now(), - }; - promises.push( - axios({ - url: `https://glzb.geely.com/pub/file/info/preview`, - method: "get", - params, - }) - ); - } - return Promise.allSettled(promises).then((results) => { - let urls = []; - results.forEach((result) => { - if ( - result.status === "fulfilled" && - result.value.data.code === "success" - ) { - urls.push(result.value.data.data); - } - }); - return [null, urls]; - }); - } else { - return ["err", null]; - } - }) - .catch((err) => { - console.log("err:", err); - return [err, null]; - }); - } - - // writeFile(info) { - // fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8"); - // } -} - -new GEELY(); diff --git a/greatWall.js b/greatWall.js deleted file mode 100644 index eb86488..0000000 --- a/greatWall.js +++ /dev/null @@ -1,234 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; - -class GreatWall { - constructor() { - this.jsonMap = [ - { - name: "长城公开寻源", - info: [], - options: { - name: "长城公开寻源", - url: "https://srm.gwm.cn/cloud-srm/api-sou/sou-firstPage/souReqlistPage", - }, - }, - { - name: "长城招募公示大厅", - info: [], - options: { - name: "长城招募公示大厅", - url: "https://srm.gwm.cn/cloud-srm/api-sou/api-ql/Recruit/visitList", - data: { - type: "Recruit", - lang: "zh-cn", - query: { "*": {} }, - payload: { - filter: {}, - page: { sort: "lastUpdateDate desc", pageNum: 1, pageSize: 8 }, - }, - action: "visitList", - tree: true, - }, - }, - }, - ]; - console.log("长城 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - } - } - // 全量爬取 - loopFetchFull(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - additional: props.options, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - if (props.info.length > 0) { - this.queue.saveAnnouncements(props.name, props.info); - // this.writeFile(props); - this.queue.addMessage(props.name, props.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`${props.options.name}增量爬取失败:`, error); - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - if (config.data) { - // 招募公示大厅 - let arr = result[1].data.records; - let pages = result[1].data.pageCount; - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - let endTime, publishTime; - endTime = item.deadlineTime; - publishTime = item.publishTime; - // 命中关键词 - if (keywordsInclude(item.title)) { - info.push({ - id: item.recruitId, - name: item.title, - publishTime: publishTime, - endTime: endTime, - urls: `https://srm.gwm.cn/#/portalBidding/vendorBiddingDetail?id=${item.recruitId}`, - }); - } - } - return { pages, info }; - } else { - // 公开寻源 - let arr = result[1].data.list; - let pages = result[1].data.pages; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - let endTime, publishTime; - endTime = item.publicEndTime; - publishTime = item.releaseDate; - // 命中关键词 - if (keywordsInclude(item.projectName)) { - info.push({ - id: item.reqHeadId, - name: item.projectName, - publishTime: publishTime, - endTime: endTime, - urls: `https://srm.gwm.cn/#/portal?id=${item.reqHeadId}`, - }); - } - } - return { pages, info }; - } - } - } - // 分页获取数据 - getList(pagenumber, config) { - let data = {}; - if (config.data) { - data = config.data; - data.payload.page.pageNum = pagenumber; - } else { - data = { pageNum: pagenumber, pageSize: 8 }; - } - return axios({ - url: config.url, - data: data, - method: "post", - }) - .then((res) => { - let result = res.data; - if (result.code == "0") { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - // writeFile(props) { - // fs.writeFileSync(props.filepath, JSON.stringify(props.info), "utf-8"); - // } - - // extractDeadlineTime(html) { - // // 匹配"预告报名截止时间:"后面的时间格式 - // const regex = /预告报名截止时间:(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/; - // const match = html.match(regex); - - // if (match) { - // return match[1]; - // } - - // return null; - // } -} - -new GreatWall(); diff --git a/leapMotor.js b/leapMotor.js deleted file mode 100644 index d99e73e..0000000 --- a/leapMotor.js +++ /dev/null @@ -1,193 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import cheerio from "cheerio"; - -class LeapMotor { - constructor() { - this.url = - "https://lpsrm.leapmotor.com/cloud-srm/api-inq/inq-anon/reqhead/listPage"; - this.info = []; - console.log("零跑 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("零跑"); - if (announcements.length > 0) { - // console.log(announcements); - await this.increment(); - } else { - await this.fullFetch(); - } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements("零跑", this.info); - this.queue.addMessage("零跑", this.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - // 判断数据是否存在 - try { - let newInfo = this.queue.filterNewAnnouncements( - "零跑", - result.info - ); - // 有新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - - this.queue.saveAnnouncements("零跑", newInfo); - this.queue.addMessage("零跑", newInfo); - - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - // 传入页码获取数据 - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - // let total = result[1].data.total; - let pages = result[1].data.pages; - let arr = result[1].data.list; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - // 命中关键词 - if (keywordsInclude(item.souReqTitile)) { - console.log("处理项目:", item.reqHeadId, item.souReqTitile); - let noticeRes = await this.getNoticeUrl(item.reqHeadId); - if (noticeRes[0]) { - // 获取招标公告内容报错 - console.error("获取公告链接失败:", noticeRes[0]); - } else { - info.push({ - id: item.reqHeadId, - name: item.souReqTitile, - publishTime: item.publishTime, - endTime: item.expirationTime, - urls: noticeRes[1], - }); - } - } - } - return { pages, info }; - } - } - getList(pagenumber) { - return axios({ - url: this.url, - data: { - pageNum: pagenumber, - pageSize: 8, - }, - method: "post", - }) - .then((res) => { - let result = res.data; - if (result.code === "0") { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - getNoticeUrl(id) { - return axios({ - url: `https://lpsrm.leapmotor.com/cloud-srm/api-inq/inq-anon/pj/reqhead/get?id=${id}`, - method: "get", - }) - .then((res) => { - let result = res.data; - if (result.code === "0") { - return [null, result.data.extNoticeLink]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - console.log("err:", err); - return [err, null]; - }); - } - - // writeFile(info) { - // fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8"); - // } -} - -new LeapMotor(); diff --git a/nio.js b/nio.js deleted file mode 100644 index c6d43f7..0000000 --- a/nio.js +++ /dev/null @@ -1,170 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { - timestampToDate, - loopCall, - keywordsInclude, - getYiqiNoticeUrl, - parseToGgDetailsParams, -} from "./utils.js"; -import config from "./config.js"; -import * as cheerio from "cheerio"; -import { SQLiteMessageQueue } from "./sqlite.js"; - -class NIO { - constructor() { - // this.filepath = path.resolve("yiqi.json"); - this.info = []; - console.log("蔚来 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("蔚来"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - if (this.info.length > 0) { - this.queue.saveAnnouncements("蔚来", this.info); - // this.writeFile(this.info); - this.queue.addMessage("蔚来", this.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "蔚来", - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // this.info.push(...newInfo); - this.queue.saveAnnouncements("蔚来", newInfo); - // this.writeFile(this.info); - this.queue.addMessage("蔚来", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getHtml(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - let pages = 1; - let html = result[1]; - const $ = cheerio.load(html); - let jsonStr = $("#__NEXT_DATA__").text(); - let data = JSON.parse(jsonStr).props.pageProps.tenderNotices; - // console.log(data); - data.forEach((item) => { - let id = item.id; - let name = item.title; - let publishTime = item.publishDate; - let endTime = item.dueTime; - let urls = item.documents[0].url; - if ( - endTime && - +new Date(endTime) >= Date.now() && - keywordsInclude(name) - ) { - info.push({ - id, - name, - publishTime, - endTime, - urls, - }); - } - }); - return { pages, info }; - } - } - // 分页获取数据 - getHtml(pagenumber) { - return axios({ - url: "https://www.nio.cn/partnership/tender-notices", - method: "get", - }) - .then((res) => { - let result = res.data; - return [null, result]; - }) - .catch((err) => { - return [err, null]; - }); - } -} - -new NIO(); diff --git a/picc.js b/picc.js index 4e36fea..ddbb4c5 100644 --- a/picc.js +++ b/picc.js @@ -184,7 +184,7 @@ class PICC { }) .then((res) => { let result = res.data; - console.log("then",result) + console.log("then",JSON.stringify(result.res.rows, null, 2)) if (result.msg === "操作成功" && result.code === 0) { return [null, result]; } else { diff --git a/stats.js b/stats.js deleted file mode 100644 index ade7443..0000000 --- a/stats.js +++ /dev/null @@ -1,80 +0,0 @@ -import { SQLiteMessageQueue } from "./sqlite.js"; -import path from "path"; -import { md5 } from "./utils.js"; -import axios from "axios"; - -const queue = new SQLiteMessageQueue(); - -const stats = queue.getStats(); - -// function merge() { -// let files = [ -// { name: "长安", path: "changan.json" }, -// { name: "奇瑞变更公告", path: "chery_bg.json" }, -// { name: "奇瑞采购公告", path: "chery_cg.json" }, -// { name: "奇瑞寻源预告", path: "chery_xy.json" }, -// { name: "零跑", path: "leapMotor.json" }, -// { name: "吉利", path: "geely.json" }, -// { name: "一汽", path: "yiqi.json" }, -// ]; -// files.forEach((file) => { -// queue.migrateFromJsonFile(file.name, path.resolve(file.path)); -// }); -// } -// merge(); -// 把message中的数据状态改成pending -// queue.getFailedMessages() -// .forEach((message) => { -// queue.updateMessageStatus(message.id, "pending"); -// }); -// function getSign(timestamp) { -// let secret = "cpwyyds"; -// let uri = "/common/message/push"; -// const url = uri + timestamp + secret; -// console.log(url); -// const myCalc = md5(url); -// let sign = -// myCalc.substring(5, 13) + -// myCalc.substring(29, 31) + -// myCalc.substring(18, 27); -// //sign 转大写 -// sign = sign.toUpperCase(); -// return sign; -// } -// let time = new Date().getTime(); -// let data = { -// timestamp: time, -// sign: getSign(time), -// templateNo: "A002", -// url: "https://www.baidu.com/", -// paramList: [ -// { -// key: "thing8", -// value: "网站name", -// }, -// { -// key: "thing2", -// value: "项目name", -// }, -// { -// key: "time14", -// value: "2025-11-2", -// }, -// { -// key: "time17", -// value: "2025-11-3 00:00:00", -// }, -// ], -// }; -// axios({ -// url: "https://testadvert.shenlintech.com/platform/common/message/push", -// method: "post", -// data, -// }) -// .then((res) => { -// console.log(res.data); -// }) -// .catch((err) => { -// console.log(err); -// }); -console.log(stats); diff --git a/third.js b/third.js deleted file mode 100644 index ca9afd3..0000000 --- a/third.js +++ /dev/null @@ -1,309 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import JSON5 from "json5"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -import * as cheerio from "cheerio"; - -class Third { - constructor(jsonMap) { - this.axiosInstance = axios.create({ timeout: 30000, maxRedirects: 5 }); - this.axiosInstance.interceptors.request.use((config) => { - // 添加cookie到请求头 - const cookieString = Array.from(this.cookiePair.entries()) - .map(([name, value]) => `${name}=${value}`) - .join("; "); - config.headers.Cookie = cookieString; - // console.log(config); - return config; - }); - this.axiosInstance.interceptors.response.use( - (response) => { - // 更新cookie到请求头 - let cookieArr = response.headers["set-cookie"] || []; - this.extractCookie(cookieArr); - return response; - }, - (error) => { - return Promise.reject(error); - } - ); - this.cookiePair = new Map(); - // this.csrfToken = ""; - this.jsonMap = jsonMap; - console.log("三方平台 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - } - } - async initializeCookie() { - try { - let headers = { - headers: { - Accept: "text/plain, */*; q=0.01", - "Accept-Language": "zh-CN,zh;q=0.9", - "Cache-Control": "no-cache", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - Origin: "https://www.chinabidding.com", - Pragma: "no-cache", - Priority: "u=1, i", - Referer: "https://www.chinabidding.com/search/proj.htm", - "Sec-Ch-Ua": - '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": '"macOS"', - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-origin", - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - }, - }; - const homeResponse = await this.axiosInstance.get( - "https://www.chinabidding.com/search/proj.htm", - headers - ); - } catch (err) { - console.log("err", err); - throw err; - } - } - extractCookie(cookieArr) { - for (let cookie of cookieArr) { - let [key, value] = cookie.split(";")[0].split("="); - this.cookiePair.set(key, value); - } - // console.log(this.cookiePair); - } - // 全量爬取 - loopFetchFull(props) { - console.log("开始全量爬取"); - try { - loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - additional: props.options, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - if (props.info.length > 0) { - this.queue.saveAnnouncements(props.name, props.info); - this.queue.addMessage(props.name, props.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - console.log("开始增量爬取"); - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`${props.options.name}增量爬取失败:`, error); - } - } - - async getNoticeDetail(url) { - try { - let result = await axios.get(url); - return result.data; - } catch (err) { - return "err"; - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败: ", result[0]); - return { pages: 0, info: [] }; - } else { - let pages = 3; - let html = result[1]; - const $ = cheerio.load(html); - $(".as-pager-body li").each((index, element) => { - let idmatch = $(element) - .find(".as-pager-item") - .attr("href") - .match(/\/bidDetail\/(\d+)\.html/); - let id = idmatch ? idmatch[1] : ""; - let name = $(element).find(".txt").attr("title"); - - let url = $(element).find(".as-pager-item").attr("href"); - if (keywordsInclude(name)) { - console.log("处理项目:", name); - info.push({ - id: id, - name: name, - urls: url, - publishTime: "--", - endTime: "--", - }); - } - }); - return { pages, info }; - } - } - async getList(pagenumber, config) { - let data = config.data; - data.currentPage = pagenumber; - let headers = { - Accept: "text/plain, */*; q=0.01", - "Accept-Language": "zh-CN,zh;q=0.9", - "Cache-Control": "no-cache", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - Origin: "https://www.chinabidding.com", - Pragma: "no-cache", - Priority: "u=1, i", - Referer: "https://www.chinabidding.com/search/proj.htm", - "Sec-Ch-Ua": - '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": '"macOS"', - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-origin", - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - }; - try { - const response = await this.axiosInstance({ - url: config.url, - data, - method: "post", - headers, - }); - let result = response.data; - return [null, result]; - } catch (err) { - console.log("cookie不对"); - try { - await this.initializeCookie(); - const retryResponse = await this.axiosInstance({ - url: config.url, - data, - method: "post", - headers, - }); - let result = retryResponse.data; - return [null, result]; - } catch (retryErr) { - return [retryErr, null]; - } - } - } -} - -new Third([ - { - name: "机电项目招投标【招标公告】", - info: [], - options: { - name: "机电项目招投标【招标公告】", - url: "https://www.chinabidding.com/search/proj.htm", - data: { - fullText: "", - pubDate: "", - infoClassCodes: "(0105 0103)", - normIndustry: "", - zoneCode: "", - fundSourceCodes: "", - poClass: "BidNotice", - rangeType: "", - currentPage: 1, - }, - }, - }, -]); -new Third([ - { - name: "机电项目招投标【招标变更公告】", - info: [], - options: { - name: "机电项目招投标【招标变更公告】", - url: "https://www.chinabidding.com/search/proj.htm", - data: { - fullText: "", - pubDate: "", - infoClassCodes: "(0106 0104)", - normIndustry: "", - zoneCode: "", - fundSourceCodes: "", - poClass: "BidNotice", - rangeType: "", - currentPage: 1, - }, - }, - }, -]); diff --git a/yiqi.js b/yiqi.js deleted file mode 100644 index 37f895d..0000000 --- a/yiqi.js +++ /dev/null @@ -1,199 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { - timestampToDate, - loopCall, - keywordsInclude, - getYiqiNoticeUrl, - parseToGgDetailsParams, - // addToMessageQueue, -} from "./utils.js"; -import config from "./config.js"; -import * as cheerio from "cheerio"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import { messageQueue } from "./msgManager.js"; - -class YiQi { - constructor() { - // this.filepath = path.resolve("yiqi.json"); - this.info = []; - console.log("一汽 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("一汽"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - // if (fs.existsSync(this.filepath)) { - // let data = fs.readFileSync(this.filepath, "utf-8"); - // this.info = data ? JSON.parse(data) : []; - // if (this.info.length > 0) { - // await this.increment(); - // } else { - // await this.fullFetch(); - // } - // } else { - // console.log("历史文件不存在,开始全量爬取"); - // await this.fullFetch(); - // } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements("一汽", this.info); - // this.writeFile(this.info); - this.queue.addMessage("一汽", this.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "一汽", - result.info - ); - // let newInfo = result.info.filter( - // (item) => !this.info.some((info) => info.id === item.id) - // ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // this.info.push(...newInfo); - this.queue.saveAnnouncements("一汽", newInfo); - // this.writeFile(this.info); - this.queue.addMessage("一汽", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getHtml(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 30, info: [] }; - } else { - let pages = 30; - let html = result[1]; - const $ = cheerio.load(html); - let noticeEl = $(".zl-list-main .zl-col-6"); - noticeEl.each((index, element) => { - let id = $(element).find(".zl-desc-item:contains('项目编号')").text(); - let name = $(element).find(".title").text(); - let publishTime = $(element) - .find(".zl-desc-item:contains('发布时间')") - .text(); - let endTime = $(element).find(".daojishi").attr("data-time"); - // 获取生产链接的参数 - let funcStr = $(element).find(".jump").attr("onclick"); - - let funcArgs = parseToGgDetailsParams(funcStr); - // 公告未过期 && 命中关键词 - if (endTime && keywordsInclude(name)) { - let noticeUrl = getYiqiNoticeUrl(...funcArgs); - info.push({ - id: id.replace("项目编号:", ""), - name: name.trim(), - publishTime: publishTime.replace("发布时间:", "").trim(), - endTime: timestampToDate(Number(endTime)), - urls: noticeUrl, - }); - } - }); - - return { pages, info }; - } - } - // 分页获取数据 - getHtml(pagenumber) { - return axios({ - url: "https://etp.faw.cn/gg/allJYTypeGGList?hangYeType=-1&xmLeiXing=&ggStartTimeEnd=&gongGaoType=5&isNew=1", - data: { - searchType: "", - searchText: "", - currentPage: pagenumber, - }, - headers: { - "Content-Type": "application/x-www-form-urlencoded", - }, - method: "post", - }) - .then((res) => { - let result = res.data; - return [null, result]; - }) - .catch((err) => { - return [err, null]; - }); - } - - // writeFile(info) { - // fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8"); - // } -} - -new YiQi(); diff --git a/youzhicai.js b/youzhicai.js deleted file mode 100644 index ca6f15e..0000000 --- a/youzhicai.js +++ /dev/null @@ -1,406 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import JSON5 from "json5"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -import * as cheerio from "cheerio"; - -class YouZhiCai { - constructor(jsonMap) { - this.axiosInstance = axios.create({ timeout: 30000, maxRedirects: 5 }); - this.axiosInstance.interceptors.request.use((config) => { - // 添加cookie到请求头 - const cookieString = Array.from(this.cookiePair.entries()) - .map(([name, value]) => `${name}=${value}`) - .join("; "); - config.headers.Cookie = cookieString; - return config; - }); - this.axiosInstance.interceptors.response.use( - (response) => { - // 更新cookie到请求头 - let cookieArr = response.headers["set-cookie"] || []; - this.extractCookie(cookieArr); - return response; - }, - (error) => { - return Promise.reject(error); - } - ); - this.cookiePair = new Map(); - // this.csrfToken = ""; - this.jsonMap = jsonMap; - console.log("优质采 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - } - } - async initializeCookie() { - try { - let headers = { - headers: { - Accept: "text/plain, */*; q=0.01", - "Accept-Language": "zh-CN,zh;q=0.9", - "Cache-Control": "no-cache", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - Origin: "https://www.youzhicai.com", - Pragma: "no-cache", - Priority: "u=1, i", - Referer: "https://www.youzhicai.com/s/1_1_0_0_.html", - "Sec-Ch-Ua": - '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": '"macOS"', - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-origin", - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - }, - }; - const homeResponse = await this.axiosInstance.get( - "https://www.youzhicai.com/s/1_1_0_0_.html", - headers - ); - // // 提取csrf-token - // let tokenMatch = homeResponse.data.match( - // / { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - if (props.info.length > 0) { - this.queue.saveAnnouncements(props.name, props.info); - this.queue.addMessage(props.name, props.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - console.log("开始增量爬取"); - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`${props.options.name}增量爬取失败:`, error); - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败: ", result[0]); - return { pages: 0, info: [] }; - } else { - // 后面的都要验证码 - - // let pages = 2; - let html = result[1]; - const $ = cheerio.load(html); - let total = $("#recommendMsg .info-num-value").text(); - let pages = Math.ceil(total / 15); - if (pages > 2) { - pages = 2; - } - $(".project-li").each((index, element) => { - let id = $(element).find(".project-name0").attr("href"); - let name = $(element).find(".project-name0").attr("title"); - let publishTime = $(element).find(".pub-value0").text(); - let leftDay = $(element).find(".left-day .emOrange:eq(0)").text(); - let endTime = new Date( - +new Date(publishTime) + leftDay * 24 * 60 * 60 * 1000 - ).toLocaleDateString(); - // console.log(endTime); - let urls = "https://www.youzhicai.com" + id; - if (keywordsInclude(name)) { - console.log("处理项目:", name, publishTime, endTime); - info.push({ - id: id, - name: name, - publishTime: publishTime, - endTime: endTime, - urls: urls, - }); - } - }); - return { pages, info }; - } - } - async getList(pagenumber, config) { - let data = config.data; - data.PageIndex = pagenumber; - if (this.cookiePair.get("__RequestVerificationToken")) { - data.__RequestVerificationToken = this.cookiePair.get( - "__RequestVerificationToken" - ); - } - let headers = { - Accept: "text/plain, */*; q=0.01", - "Accept-Language": "zh-CN,zh;q=0.9", - "Cache-Control": "no-cache", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - Origin: "https://www.youzhicai.com", - Pragma: "no-cache", - Priority: "u=1, i", - Referer: "https://www.youzhicai.com/s/1_1_0_0_.html", - "Sec-Ch-Ua": - '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": '"macOS"', - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-origin", - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - }; - try { - const response = await this.axiosInstance({ - url: config.url, - data, - method: "post", - headers, - }); - let result = response.data; - return [null, result]; - } catch (err) { - console.log("cookie不对"); - try { - await this.initializeCookie(); - data.__RequestVerificationToken = this.cookiePair.get( - "__RequestVerificationToken" - ); - const retryResponse = await this.axiosInstance({ - url: config.url, - data, - method: "post", - headers, - }); - // console.log(retryResponse.data); - let result = retryResponse.data; - return [null, result]; - } catch (retryErr) { - return [retryErr, null]; - } - } - } -} - -new YouZhiCai([ - { - name: "优质采【招标公告】", - info: [], - options: { - name: "优质采【招标公告】", - url: "https://www.youzhicai.com/s/1_1_0_0_.html", - data: { - MsProvince: "", - MsCity: "", - MsStartDate: "", - MsEndDate: "", - AutoOr: 0, - BackOr: 0, - NoticeTitle: "", - searchAccuracy: "precise", - matchType: "precise", - TenderType: "", - MsBidderType: 1, - MsNoticeType: 1, - MsPublishType: 0, - MsSingUpType: 1, - MsSort: 2, - MsProvince: "", - PageIndex: 1, - PageSize: 15, - AgencyId: "", - SecondSearch: "", - SecondSearchType: "", - TotalSize: 10000, - SearchRange: 3, - year: "", - key1: "", - key2: "", - key3: "", - }, - }, - }, -]); -new YouZhiCai([ - { - name: "优质采【澄清/变更公告】", - info: [], - options: { - name: "优质采【澄清/变更公告】", - url: "https://www.youzhicai.com/s/1_1_0_0_.html", - data: { - MsProvince: "", - MsCity: "", - MsStartDate: "", - MsEndDate: "", - AutoOr: 0, - BackOr: 0, - NoticeTitle: "", - searchAccuracy: "precise", - matchType: "precise", - TenderType: "", - MsBidderType: 1, - MsNoticeType: 5, - MsPublishType: 0, - MsSingUpType: 1, - MsSort: 2, - MsProvince: "", - PageIndex: 1, - PageSize: 15, - AgencyId: "", - SecondSearch: "", - SecondSearchType: "", - TotalSize: 10000, - SearchRange: 3, - year: "", - key1: "", - key2: "", - key3: "", - }, - }, - }, -]); -new YouZhiCai([ - { - name: "优质采【招标项目计划】", - info: [], - options: { - name: "优质采【招标项目计划】", - url: "https://www.youzhicai.com/s/1_1_0_0_.html", - data: { - MsProvince: "", - MsCity: "", - MsStartDate: "", - MsEndDate: "", - AutoOr: 0, - BackOr: 0, - NoticeTitle: "", - searchAccuracy: "precise", - matchType: "precise", - TenderType: "", - MsBidderType: 1, - MsNoticeType: 7, - MsPublishType: 0, - MsSingUpType: 1, - MsSort: 2, - MsProvince: "", - PageIndex: 1, - PageSize: 15, - AgencyId: "", - SecondSearch: "", - SecondSearchType: "", - TotalSize: 10000, - SearchRange: 3, - year: "", - key1: "", - key2: "", - key3: "", - }, - }, - }, -]);