diff --git a/byd.js b/byd.js deleted file mode 100644 index 3ef15ee..0000000 --- a/byd.js +++ /dev/null @@ -1,173 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; - -class BYD { - constructor() { - this.info = []; - console.log("比亚迪 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("比亚迪"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - if (this.info.length > 0) { - this.queue.saveAnnouncements("比亚迪", this.info); - // this.writeFile(this.info); - this.queue.addMessage("比亚迪", this.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "比亚迪", - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // this.info.push(...newInfo); - this.queue.saveAnnouncements("比亚迪", newInfo); - // this.writeFile(this.info); - this.queue.addMessage("比亚迪", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - let total = result[1].data.total; - let pages = Math.ceil(total / 10); - let arr = result[1].data.records; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - let endTime = timestampToDate( - new Date(item.signUpEndTime).getTime(), - true - ); - // 命中关键词 - if ( - keywordsInclude(item.title) && - endTime && - +new Date(endTime) >= Date.now() - ) { - // console.log("处理项目:", item.sourcingId, item.title); - info.push({ - id: item.sourcingId, - name: item.title, - publishTime: timestampToDate( - new Date(item.tenderNoticePublishTime).getTime(), - true - ), - endTime: endTime, - urls: `https://spcn.byd.com/#/tender-detail?sourcingId=${item.sourcingId}`, - }); - } - } - return { pages, info }; - } - } - // 分页获取数据 - getList(pagenumber) { - return axios({ - url: "https://spcn.byd.com/api/srm-sou-sp/supplier/supplier/getTenderAnnouncementInfo", - data: { - pageNo: pagenumber, - pageSize: 10, - }, - method: "post", - }) - .then((res) => { - let result = res.data; - if (result.msg === "成功" && result.code === "000000") { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } -} - -new BYD(); diff --git a/changan.js b/changan.js deleted file mode 100644 index c47b911..0000000 --- a/changan.js +++ /dev/null @@ -1,188 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { - timestampToDate, - loopCall, - keywordsInclude, - // addToMessageQueue, -} from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import { messageQueue } from "./msgManager.js"; -// import cheerio from "cheerio"; - -class ChangAn { - constructor() { - // this.filepath = path.resolve("changan.json"); - this.info = []; - console.log("长安 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("长安"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - - // if (fs.existsSync(this.filepath)) { - // let data = fs.readFileSync(this.filepath, "utf-8"); - // this.info = data ? JSON.parse(data) : []; - // if (this.info.length > 0) { - // await this.increment(); - // } else { - // await this.fullFetch(); - // } - // } else { - // console.log("历史文件不存在,开始全量爬取"); - // await this.fullFetch(); - // } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements("长安", this.info); - // this.writeFile(this.info); - this.queue.addMessage("长安", this.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "长安", - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // this.info.push(...newInfo); - this.queue.saveAnnouncements("长安", newInfo); - // this.writeFile(this.info); - this.queue.addMessage("长安", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - // let total = result[1].result.total; - let pages = result[1].result.pages; - let arr = result[1].result.records; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - // 命中关键词 - if (keywordsInclude(item.projectName)) { - console.log("处理项目:", item.id, item.projectName); - info.push({ - id: item.id, - name: item.projectName, - publishTime: item.startTime, - endTime: item.endTime, - urls: `https://portal.changan.com.cn/noProdNoticeInfo?_t=${Date.now()}&id=${ - item.id - }`, - }); - } - } - return { pages, info }; - } - } - // 分页获取数据 - getList(pagenumber) { - return axios({ - url: "https://portal.changan.com.cn/backend_8086/changan_platform/api/nonPdcSourceNoticeCt/listSourceNoticePageBySupplier", - params: { - _t: Date.now(), - pageNo: pagenumber, - pageSize: 20, - }, - method: "get", - }) - .then((res) => { - let result = res.data; - if (result.success) { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - // writeFile(info) { - // fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8"); - // } -} - -new ChangAn(); diff --git a/chery.js b/chery.js deleted file mode 100644 index bad96ed..0000000 --- a/chery.js +++ /dev/null @@ -1,251 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { - timestampToDate, - loopCall, - keywordsInclude, - // addToMessageQueue, -} from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import { messageQueue } from "./msgManager.js"; -// import cheerio from "cheerio"; - -class Chery { - constructor() { - this.jsonMap = [ - { - name: "奇瑞采购公告", - // filepath: path.resolve("chery_cg.json"), - info: [], - options: { - name: "采购公告", - url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage", - categoryId: "5035", - siteId: "747", - }, - }, - { - name: "奇瑞寻源预告", - // filepath: path.resolve("chery_xy.json"), - info: [], - options: { - name: "寻源预告", - url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage", - categoryId: "965901485789413376", - siteId: "747", - }, - }, - { - name: "奇瑞变更公告", - // filepath: path.resolve("chery_bg.json"), - info: [], - options: { - name: "变更公告", - url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage", - categoryId: "5032", - siteId: "747", - }, - }, - ]; - console.log("奇瑞 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - // if (fs.existsSync(item.filepath)) { - // let data = fs.readFileSync(item.filepath, "utf-8"); - // item.info = data ? JSON.parse(data) : []; - // if (item.info.length > 0) { - // // await this.increment(item); - // console.log(`${item.name} 历史文件存在,开始增量爬取`); - // this.loopFetchIncrement(item); - // } else { - // this.loopFetchFull(item); - // } - // } else { - // console.log(`${item.name}历史文件不存在,开始全量爬取`); - // this.loopFetchFull(item); - // } - } - } - // 全量爬取 - loopFetchFull(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - additional: props.options, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements(props.name, props.info); - // this.writeFile(props); - this.queue.addMessage(props.name, props.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`奇瑞${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`奇瑞${props.options.name}增量爬取失败:`, error); - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 30, info: [] }; - } else { - let pages = 30; - let arr = result[1].res.rows; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - let endTime, publishTime; - if (config.categoryId === "965901485789413376") { - publishTime = item.publishDate.replace("T", " ").split(".")[0]; - endTime = this.extractDeadlineTime(item.text); - } else { - endTime = item.signUpEndTime.replace("T", " ").split(".")[0]; - publishTime = item.signUpBeginTime.replace("T", " ").split(".")[0]; - } - // 命中关键词 - if ( - endTime && - keywordsInclude(item.title) && - +new Date(endTime) >= Date.now() - ) { - // console.log("处理项目:", item.id, item.projectName); - info.push({ - id: item.url, - name: item.title, - publishTime: publishTime, - endTime: endTime, - urls: `https://ebd.mychery.com/cms` + item.url, - }); - } - } - return { pages, info }; - } - } - // 分页获取数据 - getList(pagenumber, config) { - return axios({ - url: config.url, - data: { - dto: { - bidType: "", - categoryId: config.categoryId, - city: "", - county: "", - province: "", - purchaseMode: "", - secondCompanyId: "", - siteId: config.siteId, - }, - pageNo: pagenumber, - pageSize: "10", - }, - method: "post", - }) - .then((res) => { - let result = res.data; - if (result.code === 0) { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - // writeFile(props) { - // fs.writeFileSync(props.filepath, JSON.stringify(props.info), "utf-8"); - // } - - extractDeadlineTime(html) { - // 匹配"预告报名截止时间:"后面的时间格式 - const regex = /预告报名截止时间:(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/; - const match = html.match(regex); - - if (match) { - return match[1]; - } - - return null; - } -} - -new Chery(); diff --git a/df.js b/df.js deleted file mode 100644 index 7104a44..0000000 --- a/df.js +++ /dev/null @@ -1,187 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -import * as cheerio from "cheerio"; - -class DF { - constructor() { - this.jsonMap = [ - { - name: "东风【招标采购】", - info: [], - options: { - name: "东风【招标采购】", - url: "https://etp.dfmc.com.cn/jyxx/004001/", - homeIndex: "trade_info_new.html", - }, - }, - { - name: "东风【非招标采购】", - info: [], - options: { - name: "东风【非招标采购】", - url: "https://etp.dfmc.com.cn/jyxx/004002/", - homeIndex: "trade_info_newf.html", - }, - }, - ]; - console.log("东风 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - } - } - // 全量爬取 - loopFetchFull(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - additional: props.options, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - if (props.info.length > 0) { - this.queue.saveAnnouncements(props.name, props.info); - // this.writeFile(props); - this.queue.addMessage(props.name, props.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`${props.options.name}增量爬取失败:`, error); - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0].status); - return { pages: 0, info: [] }; - } else { - // 第六页开始就要验证码了 - let pages = 5; - let html = result[1]; - const $ = cheerio.load(html); - $(".public-table tbody tr").each((index, element) => { - let id = $(element).find("td:nth-child(3)").text(); - let name = $(element).find("a").text(); - let publishTime = $(element).find("td:nth-child(6)").text(); - let endTime = $(element).find("td:nth-child(5)").text(); - let urls = - "https://etp.dfmc.com.cn" + $(element).find("a").attr("href"); - if ( - endTime && - +new Date(endTime) >= Date.now() && - keywordsInclude(name) - ) { - console.log("处理项目:", id, name); - info.push({ - id: id, - name: name, - publishTime: publishTime, - endTime: endTime, - urls: urls, - }); - } - }); - return { pages, info }; - } - } - // 分页获取数据 - getList(pagenumber, config) { - let url = config.url; - if (pagenumber === 1) { - url += config.homeIndex; - } else { - url += `${pagenumber}.html`; - } - return axios({ - url: url, - method: "get", - }) - .then((res) => { - let result = res.data; - return [null, result]; - }) - .catch((err) => { - return [err, null]; - }); - } -} - -new DF(); diff --git a/geely.js b/geely.js deleted file mode 100644 index f71479e..0000000 --- a/geely.js +++ /dev/null @@ -1,237 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import cheerio from "cheerio"; -// import { messageQueue } from "./msgManager.js"; - -class GEELY { - constructor() { - this.url = "https://glzb.geely.com/gpmp/notice/listnotice"; - // this.filepath = path.resolve("geely.json"); - this.info = []; - console.log("GEELY 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("吉利"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - // if (fs.existsSync(this.filepath)) { - // let data = fs.readFileSync(this.filepath, "utf-8"); - // this.info = data ? JSON.parse(data) : []; - // if (this.info.length > 0) { - // await this.increment(); - // } else { - // await this.fullFetch(); - // } - // } else { - // console.log("历史文件不存在,开始全量爬取"); - // await this.fullFetch(); - // } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); // 限制最多2页用于测试 - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements("吉利", this.info); - // this.writeFile(this.info); - this.queue.addMessage("吉利", this.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "吉利", - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - this.queue.saveAnnouncements("吉利", newInfo); - this.queue.addMessage("吉利", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - // 传入页码获取数据 - async getInfo(pagenumber = 1) { - let today = new Date().setHours(0, 0, 0, 0); - let beforeOneMonth = today - 30 * 24 * 60 * 60 * 1000; - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - let total = result[1].data.total; - let pages = Math.ceil(total / 20); - let arr = result[1].data.items; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - if (item.endtime >= today && item.publishtime >= beforeOneMonth) { - console.log("处理项目:", item.pjtnoticeid, item.pjtnoticename); - let noticeRes = await this.getNoticeUrl(item.pjtnoticeid); - if (noticeRes[0]) { - // 获取招标公告内容报错 - console.error("获取公告详情失败:", noticeRes[0]); - } else { - info.push({ - id: item.pjtnoticeid, - name: item.pjtnoticename, - publishTime: timestampToDate(item.publishtime), - endTime: timestampToDate(item.endtime), - urls: noticeRes[1], - }); - } - } - } - return { pages, info }; - } - } - getList(pagenumber) { - return axios({ - url: this.url, - params: { - pagesize: 20, - pagenumber: pagenumber, - publishstatus: 2, - bidcategoryid: 1442, - iflongpro: 0, - _: Date.now(), - }, - method: "get", - }) - .then((res) => { - let result = res.data; - if (result.code === "success") { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - getNoticeUrl(id) { - let timestamp = Date.now(); - return axios({ - url: `https://glzb.geely.com/gpmp/notice/query?_=${timestamp}&pjtnoticeid=${id}`, - method: "get", - }) - .then((res) => { - let result = res.data; - if (result.code === "success") { - let promises = []; - for (let item of result.data.attachs) { - let params = { - name: item.attachname, - downloadUrl: item.downloadUrl, - previewUrl: item.previewUrl, - attachname: item.attachname, - _: Date.now(), - }; - promises.push( - axios({ - url: `https://glzb.geely.com/pub/file/info/preview`, - method: "get", - params, - }) - ); - } - return Promise.allSettled(promises).then((results) => { - let urls = []; - results.forEach((result) => { - if ( - result.status === "fulfilled" && - result.value.data.code === "success" - ) { - urls.push(result.value.data.data); - } - }); - return [null, urls]; - }); - } else { - return ["err", null]; - } - }) - .catch((err) => { - console.log("err:", err); - return [err, null]; - }); - } - - // writeFile(info) { - // fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8"); - // } -} - -new GEELY(); diff --git a/greatWall.js b/greatWall.js deleted file mode 100644 index eb86488..0000000 --- a/greatWall.js +++ /dev/null @@ -1,234 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; - -class GreatWall { - constructor() { - this.jsonMap = [ - { - name: "长城公开寻源", - info: [], - options: { - name: "长城公开寻源", - url: "https://srm.gwm.cn/cloud-srm/api-sou/sou-firstPage/souReqlistPage", - }, - }, - { - name: "长城招募公示大厅", - info: [], - options: { - name: "长城招募公示大厅", - url: "https://srm.gwm.cn/cloud-srm/api-sou/api-ql/Recruit/visitList", - data: { - type: "Recruit", - lang: "zh-cn", - query: { "*": {} }, - payload: { - filter: {}, - page: { sort: "lastUpdateDate desc", pageNum: 1, pageSize: 8 }, - }, - action: "visitList", - tree: true, - }, - }, - }, - ]; - console.log("长城 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - } - } - // 全量爬取 - loopFetchFull(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - additional: props.options, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - if (props.info.length > 0) { - this.queue.saveAnnouncements(props.name, props.info); - // this.writeFile(props); - this.queue.addMessage(props.name, props.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`${props.options.name}增量爬取失败:`, error); - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - if (config.data) { - // 招募公示大厅 - let arr = result[1].data.records; - let pages = result[1].data.pageCount; - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - let endTime, publishTime; - endTime = item.deadlineTime; - publishTime = item.publishTime; - // 命中关键词 - if (keywordsInclude(item.title)) { - info.push({ - id: item.recruitId, - name: item.title, - publishTime: publishTime, - endTime: endTime, - urls: `https://srm.gwm.cn/#/portalBidding/vendorBiddingDetail?id=${item.recruitId}`, - }); - } - } - return { pages, info }; - } else { - // 公开寻源 - let arr = result[1].data.list; - let pages = result[1].data.pages; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - let endTime, publishTime; - endTime = item.publicEndTime; - publishTime = item.releaseDate; - // 命中关键词 - if (keywordsInclude(item.projectName)) { - info.push({ - id: item.reqHeadId, - name: item.projectName, - publishTime: publishTime, - endTime: endTime, - urls: `https://srm.gwm.cn/#/portal?id=${item.reqHeadId}`, - }); - } - } - return { pages, info }; - } - } - } - // 分页获取数据 - getList(pagenumber, config) { - let data = {}; - if (config.data) { - data = config.data; - data.payload.page.pageNum = pagenumber; - } else { - data = { pageNum: pagenumber, pageSize: 8 }; - } - return axios({ - url: config.url, - data: data, - method: "post", - }) - .then((res) => { - let result = res.data; - if (result.code == "0") { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - // writeFile(props) { - // fs.writeFileSync(props.filepath, JSON.stringify(props.info), "utf-8"); - // } - - // extractDeadlineTime(html) { - // // 匹配"预告报名截止时间:"后面的时间格式 - // const regex = /预告报名截止时间:(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/; - // const match = html.match(regex); - - // if (match) { - // return match[1]; - // } - - // return null; - // } -} - -new GreatWall(); diff --git a/leapMotor.js b/leapMotor.js deleted file mode 100644 index d99e73e..0000000 --- a/leapMotor.js +++ /dev/null @@ -1,193 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import cheerio from "cheerio"; - -class LeapMotor { - constructor() { - this.url = - "https://lpsrm.leapmotor.com/cloud-srm/api-inq/inq-anon/reqhead/listPage"; - this.info = []; - console.log("零跑 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("零跑"); - if (announcements.length > 0) { - // console.log(announcements); - await this.increment(); - } else { - await this.fullFetch(); - } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements("零跑", this.info); - this.queue.addMessage("零跑", this.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - // 判断数据是否存在 - try { - let newInfo = this.queue.filterNewAnnouncements( - "零跑", - result.info - ); - // 有新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - - this.queue.saveAnnouncements("零跑", newInfo); - this.queue.addMessage("零跑", newInfo); - - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - // 传入页码获取数据 - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - // let total = result[1].data.total; - let pages = result[1].data.pages; - let arr = result[1].data.list; - - for (let i = 0; i < arr.length; i++) { - let item = arr[i]; - // 命中关键词 - if (keywordsInclude(item.souReqTitile)) { - console.log("处理项目:", item.reqHeadId, item.souReqTitile); - let noticeRes = await this.getNoticeUrl(item.reqHeadId); - if (noticeRes[0]) { - // 获取招标公告内容报错 - console.error("获取公告链接失败:", noticeRes[0]); - } else { - info.push({ - id: item.reqHeadId, - name: item.souReqTitile, - publishTime: item.publishTime, - endTime: item.expirationTime, - urls: noticeRes[1], - }); - } - } - } - return { pages, info }; - } - } - getList(pagenumber) { - return axios({ - url: this.url, - data: { - pageNum: pagenumber, - pageSize: 8, - }, - method: "post", - }) - .then((res) => { - let result = res.data; - if (result.code === "0") { - return [null, result]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - return [err, null]; - }); - } - - getNoticeUrl(id) { - return axios({ - url: `https://lpsrm.leapmotor.com/cloud-srm/api-inq/inq-anon/pj/reqhead/get?id=${id}`, - method: "get", - }) - .then((res) => { - let result = res.data; - if (result.code === "0") { - return [null, result.data.extNoticeLink]; - } else { - return ["err", null]; - } - }) - .catch((err) => { - console.log("err:", err); - return [err, null]; - }); - } - - // writeFile(info) { - // fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8"); - // } -} - -new LeapMotor(); diff --git a/nio.js b/nio.js deleted file mode 100644 index c6d43f7..0000000 --- a/nio.js +++ /dev/null @@ -1,170 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { - timestampToDate, - loopCall, - keywordsInclude, - getYiqiNoticeUrl, - parseToGgDetailsParams, -} from "./utils.js"; -import config from "./config.js"; -import * as cheerio from "cheerio"; -import { SQLiteMessageQueue } from "./sqlite.js"; - -class NIO { - constructor() { - // this.filepath = path.resolve("yiqi.json"); - this.info = []; - console.log("蔚来 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("蔚来"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - if (this.info.length > 0) { - this.queue.saveAnnouncements("蔚来", this.info); - // this.writeFile(this.info); - this.queue.addMessage("蔚来", this.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "蔚来", - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // this.info.push(...newInfo); - this.queue.saveAnnouncements("蔚来", newInfo); - // this.writeFile(this.info); - this.queue.addMessage("蔚来", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getHtml(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 0, info: [] }; - } else { - let pages = 1; - let html = result[1]; - const $ = cheerio.load(html); - let jsonStr = $("#__NEXT_DATA__").text(); - let data = JSON.parse(jsonStr).props.pageProps.tenderNotices; - // console.log(data); - data.forEach((item) => { - let id = item.id; - let name = item.title; - let publishTime = item.publishDate; - let endTime = item.dueTime; - let urls = item.documents[0].url; - if ( - endTime && - +new Date(endTime) >= Date.now() && - keywordsInclude(name) - ) { - info.push({ - id, - name, - publishTime, - endTime, - urls, - }); - } - }); - return { pages, info }; - } - } - // 分页获取数据 - getHtml(pagenumber) { - return axios({ - url: "https://www.nio.cn/partnership/tender-notices", - method: "get", - }) - .then((res) => { - let result = res.data; - return [null, result]; - }) - .catch((err) => { - return [err, null]; - }); - } -} - -new NIO(); diff --git a/picc.js b/picc.js index 4e36fea..ddbb4c5 100644 --- a/picc.js +++ b/picc.js @@ -184,7 +184,7 @@ class PICC { }) .then((res) => { let result = res.data; - console.log("then",result) + console.log("then",JSON.stringify(result.res.rows, null, 2)) if (result.msg === "操作成功" && result.code === 0) { return [null, result]; } else { diff --git a/stats.js b/stats.js deleted file mode 100644 index ade7443..0000000 --- a/stats.js +++ /dev/null @@ -1,80 +0,0 @@ -import { SQLiteMessageQueue } from "./sqlite.js"; -import path from "path"; -import { md5 } from "./utils.js"; -import axios from "axios"; - -const queue = new SQLiteMessageQueue(); - -const stats = queue.getStats(); - -// function merge() { -// let files = [ -// { name: "长安", path: "changan.json" }, -// { name: "奇瑞变更公告", path: "chery_bg.json" }, -// { name: "奇瑞采购公告", path: "chery_cg.json" }, -// { name: "奇瑞寻源预告", path: "chery_xy.json" }, -// { name: "零跑", path: "leapMotor.json" }, -// { name: "吉利", path: "geely.json" }, -// { name: "一汽", path: "yiqi.json" }, -// ]; -// files.forEach((file) => { -// queue.migrateFromJsonFile(file.name, path.resolve(file.path)); -// }); -// } -// merge(); -// 把message中的数据状态改成pending -// queue.getFailedMessages() -// .forEach((message) => { -// queue.updateMessageStatus(message.id, "pending"); -// }); -// function getSign(timestamp) { -// let secret = "cpwyyds"; -// let uri = "/common/message/push"; -// const url = uri + timestamp + secret; -// console.log(url); -// const myCalc = md5(url); -// let sign = -// myCalc.substring(5, 13) + -// myCalc.substring(29, 31) + -// myCalc.substring(18, 27); -// //sign 转大写 -// sign = sign.toUpperCase(); -// return sign; -// } -// let time = new Date().getTime(); -// let data = { -// timestamp: time, -// sign: getSign(time), -// templateNo: "A002", -// url: "https://www.baidu.com/", -// paramList: [ -// { -// key: "thing8", -// value: "网站name", -// }, -// { -// key: "thing2", -// value: "项目name", -// }, -// { -// key: "time14", -// value: "2025-11-2", -// }, -// { -// key: "time17", -// value: "2025-11-3 00:00:00", -// }, -// ], -// }; -// axios({ -// url: "https://testadvert.shenlintech.com/platform/common/message/push", -// method: "post", -// data, -// }) -// .then((res) => { -// console.log(res.data); -// }) -// .catch((err) => { -// console.log(err); -// }); -console.log(stats); diff --git a/third.js b/third.js deleted file mode 100644 index ca9afd3..0000000 --- a/third.js +++ /dev/null @@ -1,309 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import JSON5 from "json5"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -import * as cheerio from "cheerio"; - -class Third { - constructor(jsonMap) { - this.axiosInstance = axios.create({ timeout: 30000, maxRedirects: 5 }); - this.axiosInstance.interceptors.request.use((config) => { - // 添加cookie到请求头 - const cookieString = Array.from(this.cookiePair.entries()) - .map(([name, value]) => `${name}=${value}`) - .join("; "); - config.headers.Cookie = cookieString; - // console.log(config); - return config; - }); - this.axiosInstance.interceptors.response.use( - (response) => { - // 更新cookie到请求头 - let cookieArr = response.headers["set-cookie"] || []; - this.extractCookie(cookieArr); - return response; - }, - (error) => { - return Promise.reject(error); - } - ); - this.cookiePair = new Map(); - // this.csrfToken = ""; - this.jsonMap = jsonMap; - console.log("三方平台 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - } - } - async initializeCookie() { - try { - let headers = { - headers: { - Accept: "text/plain, */*; q=0.01", - "Accept-Language": "zh-CN,zh;q=0.9", - "Cache-Control": "no-cache", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - Origin: "https://www.chinabidding.com", - Pragma: "no-cache", - Priority: "u=1, i", - Referer: "https://www.chinabidding.com/search/proj.htm", - "Sec-Ch-Ua": - '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": '"macOS"', - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-origin", - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - }, - }; - const homeResponse = await this.axiosInstance.get( - "https://www.chinabidding.com/search/proj.htm", - headers - ); - } catch (err) { - console.log("err", err); - throw err; - } - } - extractCookie(cookieArr) { - for (let cookie of cookieArr) { - let [key, value] = cookie.split(";")[0].split("="); - this.cookiePair.set(key, value); - } - // console.log(this.cookiePair); - } - // 全量爬取 - loopFetchFull(props) { - console.log("开始全量爬取"); - try { - loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - additional: props.options, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - if (props.info.length > 0) { - this.queue.saveAnnouncements(props.name, props.info); - this.queue.addMessage(props.name, props.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - console.log("开始增量爬取"); - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`${props.options.name}增量爬取失败:`, error); - } - } - - async getNoticeDetail(url) { - try { - let result = await axios.get(url); - return result.data; - } catch (err) { - return "err"; - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败: ", result[0]); - return { pages: 0, info: [] }; - } else { - let pages = 3; - let html = result[1]; - const $ = cheerio.load(html); - $(".as-pager-body li").each((index, element) => { - let idmatch = $(element) - .find(".as-pager-item") - .attr("href") - .match(/\/bidDetail\/(\d+)\.html/); - let id = idmatch ? idmatch[1] : ""; - let name = $(element).find(".txt").attr("title"); - - let url = $(element).find(".as-pager-item").attr("href"); - if (keywordsInclude(name)) { - console.log("处理项目:", name); - info.push({ - id: id, - name: name, - urls: url, - publishTime: "--", - endTime: "--", - }); - } - }); - return { pages, info }; - } - } - async getList(pagenumber, config) { - let data = config.data; - data.currentPage = pagenumber; - let headers = { - Accept: "text/plain, */*; q=0.01", - "Accept-Language": "zh-CN,zh;q=0.9", - "Cache-Control": "no-cache", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - Origin: "https://www.chinabidding.com", - Pragma: "no-cache", - Priority: "u=1, i", - Referer: "https://www.chinabidding.com/search/proj.htm", - "Sec-Ch-Ua": - '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": '"macOS"', - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-origin", - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - }; - try { - const response = await this.axiosInstance({ - url: config.url, - data, - method: "post", - headers, - }); - let result = response.data; - return [null, result]; - } catch (err) { - console.log("cookie不对"); - try { - await this.initializeCookie(); - const retryResponse = await this.axiosInstance({ - url: config.url, - data, - method: "post", - headers, - }); - let result = retryResponse.data; - return [null, result]; - } catch (retryErr) { - return [retryErr, null]; - } - } - } -} - -new Third([ - { - name: "机电项目招投标【招标公告】", - info: [], - options: { - name: "机电项目招投标【招标公告】", - url: "https://www.chinabidding.com/search/proj.htm", - data: { - fullText: "", - pubDate: "", - infoClassCodes: "(0105 0103)", - normIndustry: "", - zoneCode: "", - fundSourceCodes: "", - poClass: "BidNotice", - rangeType: "", - currentPage: 1, - }, - }, - }, -]); -new Third([ - { - name: "机电项目招投标【招标变更公告】", - info: [], - options: { - name: "机电项目招投标【招标变更公告】", - url: "https://www.chinabidding.com/search/proj.htm", - data: { - fullText: "", - pubDate: "", - infoClassCodes: "(0106 0104)", - normIndustry: "", - zoneCode: "", - fundSourceCodes: "", - poClass: "BidNotice", - rangeType: "", - currentPage: 1, - }, - }, - }, -]); diff --git a/yiqi.js b/yiqi.js deleted file mode 100644 index 37f895d..0000000 --- a/yiqi.js +++ /dev/null @@ -1,199 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import { - timestampToDate, - loopCall, - keywordsInclude, - getYiqiNoticeUrl, - parseToGgDetailsParams, - // addToMessageQueue, -} from "./utils.js"; -import config from "./config.js"; -import * as cheerio from "cheerio"; -import { SQLiteMessageQueue } from "./sqlite.js"; -// import { messageQueue } from "./msgManager.js"; - -class YiQi { - constructor() { - // this.filepath = path.resolve("yiqi.json"); - this.info = []; - console.log("一汽 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - let announcements = this.queue.getAnnouncementsBySpider("一汽"); - if (announcements.length > 0) { - await this.increment(); - } else { - await this.fullFetch(); - } - // if (fs.existsSync(this.filepath)) { - // let data = fs.readFileSync(this.filepath, "utf-8"); - // this.info = data ? JSON.parse(data) : []; - // if (this.info.length > 0) { - // await this.increment(); - // } else { - // await this.fullFetch(); - // } - // } else { - // console.log("历史文件不存在,开始全量爬取"); - // await this.fullFetch(); - // } - } - // 全量爬取 - async fullFetch() { - console.log("开始全量爬取..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.fullFetchTime, - pagenumber: 1, - stopWhen: (pagenumber, result) => { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - this.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - this.info.push(...result.info); - console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`); - try { - this.queue.saveAnnouncements("一汽", this.info); - // this.writeFile(this.info); - this.queue.addMessage("一汽", this.info); - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("全量爬取失败:", error); - } - console.log("开始增量爬取..."); - this.increment(); - } - - // 增量爬取 - async increment() { - console.log("开始增量爬取模式,每5分钟检查一次新数据..."); - try { - await loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - "一汽", - result.info - ); - // let newInfo = result.info.filter( - // (item) => !this.info.some((info) => info.id === item.id) - // ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // this.info.push(...newInfo); - this.queue.saveAnnouncements("一汽", newInfo); - // this.writeFile(this.info); - this.queue.addMessage("一汽", newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error("增量爬取失败:", error); - } - } - async getInfo(pagenumber = 1) { - let info = []; - console.log(`正在获取第 ${pagenumber} 页数据...`); - let result = await this.getHtml(pagenumber); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败:", result[0]); - return { pages: 30, info: [] }; - } else { - let pages = 30; - let html = result[1]; - const $ = cheerio.load(html); - let noticeEl = $(".zl-list-main .zl-col-6"); - noticeEl.each((index, element) => { - let id = $(element).find(".zl-desc-item:contains('项目编号')").text(); - let name = $(element).find(".title").text(); - let publishTime = $(element) - .find(".zl-desc-item:contains('发布时间')") - .text(); - let endTime = $(element).find(".daojishi").attr("data-time"); - // 获取生产链接的参数 - let funcStr = $(element).find(".jump").attr("onclick"); - - let funcArgs = parseToGgDetailsParams(funcStr); - // 公告未过期 && 命中关键词 - if (endTime && keywordsInclude(name)) { - let noticeUrl = getYiqiNoticeUrl(...funcArgs); - info.push({ - id: id.replace("项目编号:", ""), - name: name.trim(), - publishTime: publishTime.replace("发布时间:", "").trim(), - endTime: timestampToDate(Number(endTime)), - urls: noticeUrl, - }); - } - }); - - return { pages, info }; - } - } - // 分页获取数据 - getHtml(pagenumber) { - return axios({ - url: "https://etp.faw.cn/gg/allJYTypeGGList?hangYeType=-1&xmLeiXing=&ggStartTimeEnd=&gongGaoType=5&isNew=1", - data: { - searchType: "", - searchText: "", - currentPage: pagenumber, - }, - headers: { - "Content-Type": "application/x-www-form-urlencoded", - }, - method: "post", - }) - .then((res) => { - let result = res.data; - return [null, result]; - }) - .catch((err) => { - return [err, null]; - }); - } - - // writeFile(info) { - // fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8"); - // } -} - -new YiQi(); diff --git a/youzhicai.js b/youzhicai.js deleted file mode 100644 index ca6f15e..0000000 --- a/youzhicai.js +++ /dev/null @@ -1,406 +0,0 @@ -import axios from "axios"; -import fs from "fs"; -import path from "path"; -import JSON5 from "json5"; -import { timestampToDate, loopCall, keywordsInclude } from "./utils.js"; -import config from "./config.js"; -import { SQLiteMessageQueue } from "./sqlite.js"; -import * as cheerio from "cheerio"; - -class YouZhiCai { - constructor(jsonMap) { - this.axiosInstance = axios.create({ timeout: 30000, maxRedirects: 5 }); - this.axiosInstance.interceptors.request.use((config) => { - // 添加cookie到请求头 - const cookieString = Array.from(this.cookiePair.entries()) - .map(([name, value]) => `${name}=${value}`) - .join("; "); - config.headers.Cookie = cookieString; - return config; - }); - this.axiosInstance.interceptors.response.use( - (response) => { - // 更新cookie到请求头 - let cookieArr = response.headers["set-cookie"] || []; - this.extractCookie(cookieArr); - return response; - }, - (error) => { - return Promise.reject(error); - } - ); - this.cookiePair = new Map(); - // this.csrfToken = ""; - this.jsonMap = jsonMap; - console.log("优质采 爬虫启动..."); - this.queue = new SQLiteMessageQueue(); - this.start(); - } - - async start() { - try { - await this.init(); - } catch (err) { - console.error("启动失败:", err); - } - } - async init() { - for (let item of this.jsonMap) { - let announcements = this.queue.getAnnouncementsBySpider(item.name); - if (announcements.length > 0) { - this.loopFetchIncrement(item); - } else { - this.loopFetchFull(item); - } - } - } - async initializeCookie() { - try { - let headers = { - headers: { - Accept: "text/plain, */*; q=0.01", - "Accept-Language": "zh-CN,zh;q=0.9", - "Cache-Control": "no-cache", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - Origin: "https://www.youzhicai.com", - Pragma: "no-cache", - Priority: "u=1, i", - Referer: "https://www.youzhicai.com/s/1_1_0_0_.html", - "Sec-Ch-Ua": - '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": '"macOS"', - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-origin", - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - }, - }; - const homeResponse = await this.axiosInstance.get( - "https://www.youzhicai.com/s/1_1_0_0_.html", - headers - ); - // // 提取csrf-token - // let tokenMatch = homeResponse.data.match( - // / { - return ( - pagenumber >= result.pages || pagenumber >= config.pageNumberLimit - ); - }, - readyForNext: (pagenumber, result) => { - props.info.push(...result.info); - return pagenumber + 1; - }, - complete: (result) => { - props.info.push(...result.info); - console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`); - try { - if (props.info.length > 0) { - this.queue.saveAnnouncements(props.name, props.info); - this.queue.addMessage(props.name, props.info); - } - } catch (error) { - console.error("数据库操作失败:", error); - } - this.loopFetchIncrement(props); - }, - }); - } catch (error) { - console.error(`${props.options.name}全量爬取失败:`, error); - } - } - loopFetchIncrement(props) { - console.log("开始增量爬取"); - try { - loopCall(this.getInfo.bind(this), { - time: config.incrementFetchTime, // 5分钟间隔 - pagenumber: 1, - additional: props.options, - readyForNext: (pagenumber, result) => { - try { - let newInfo = this.queue.filterNewAnnouncements( - props.name, - result.info - ); - // 存在新数据 - if (newInfo.length > 0) { - console.log(`发现 ${newInfo.length} 条新数据`); - // props.info.push(...newInfo); - this.queue.saveAnnouncements(props.name, newInfo); - // this.writeFile(props); - this.queue.addMessage(props.name, newInfo); - // 全是新数据,继续下一页 - if (newInfo.length === result.info.length) { - return pagenumber + 1; - } else { - // 有部分重复数据,重新从第一页开始 - return 1; - } - } else { - console.log("没有发现新数据,继续监控..."); - return 1; // 重新从第一页开始 - } - } catch (error) { - console.error("数据库操作失败:", error); - } - }, - }); - } catch (error) { - console.error(`${props.options.name}增量爬取失败:`, error); - } - } - async getInfo(pagenumber = 1, config) { - let info = []; - console.log(`${config.name}--获取第 ${pagenumber} 页数据...`); - let result = await this.getList(pagenumber, config); - if (result[0]) { - // 出错, 记录错误日志 - console.error("获取页面数据失败: ", result[0]); - return { pages: 0, info: [] }; - } else { - // 后面的都要验证码 - - // let pages = 2; - let html = result[1]; - const $ = cheerio.load(html); - let total = $("#recommendMsg .info-num-value").text(); - let pages = Math.ceil(total / 15); - if (pages > 2) { - pages = 2; - } - $(".project-li").each((index, element) => { - let id = $(element).find(".project-name0").attr("href"); - let name = $(element).find(".project-name0").attr("title"); - let publishTime = $(element).find(".pub-value0").text(); - let leftDay = $(element).find(".left-day .emOrange:eq(0)").text(); - let endTime = new Date( - +new Date(publishTime) + leftDay * 24 * 60 * 60 * 1000 - ).toLocaleDateString(); - // console.log(endTime); - let urls = "https://www.youzhicai.com" + id; - if (keywordsInclude(name)) { - console.log("处理项目:", name, publishTime, endTime); - info.push({ - id: id, - name: name, - publishTime: publishTime, - endTime: endTime, - urls: urls, - }); - } - }); - return { pages, info }; - } - } - async getList(pagenumber, config) { - let data = config.data; - data.PageIndex = pagenumber; - if (this.cookiePair.get("__RequestVerificationToken")) { - data.__RequestVerificationToken = this.cookiePair.get( - "__RequestVerificationToken" - ); - } - let headers = { - Accept: "text/plain, */*; q=0.01", - "Accept-Language": "zh-CN,zh;q=0.9", - "Cache-Control": "no-cache", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - Origin: "https://www.youzhicai.com", - Pragma: "no-cache", - Priority: "u=1, i", - Referer: "https://www.youzhicai.com/s/1_1_0_0_.html", - "Sec-Ch-Ua": - '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": '"macOS"', - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-origin", - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", - "X-Requested-With": "XMLHttpRequest", - }; - try { - const response = await this.axiosInstance({ - url: config.url, - data, - method: "post", - headers, - }); - let result = response.data; - return [null, result]; - } catch (err) { - console.log("cookie不对"); - try { - await this.initializeCookie(); - data.__RequestVerificationToken = this.cookiePair.get( - "__RequestVerificationToken" - ); - const retryResponse = await this.axiosInstance({ - url: config.url, - data, - method: "post", - headers, - }); - // console.log(retryResponse.data); - let result = retryResponse.data; - return [null, result]; - } catch (retryErr) { - return [retryErr, null]; - } - } - } -} - -new YouZhiCai([ - { - name: "优质采【招标公告】", - info: [], - options: { - name: "优质采【招标公告】", - url: "https://www.youzhicai.com/s/1_1_0_0_.html", - data: { - MsProvince: "", - MsCity: "", - MsStartDate: "", - MsEndDate: "", - AutoOr: 0, - BackOr: 0, - NoticeTitle: "", - searchAccuracy: "precise", - matchType: "precise", - TenderType: "", - MsBidderType: 1, - MsNoticeType: 1, - MsPublishType: 0, - MsSingUpType: 1, - MsSort: 2, - MsProvince: "", - PageIndex: 1, - PageSize: 15, - AgencyId: "", - SecondSearch: "", - SecondSearchType: "", - TotalSize: 10000, - SearchRange: 3, - year: "", - key1: "", - key2: "", - key3: "", - }, - }, - }, -]); -new YouZhiCai([ - { - name: "优质采【澄清/变更公告】", - info: [], - options: { - name: "优质采【澄清/变更公告】", - url: "https://www.youzhicai.com/s/1_1_0_0_.html", - data: { - MsProvince: "", - MsCity: "", - MsStartDate: "", - MsEndDate: "", - AutoOr: 0, - BackOr: 0, - NoticeTitle: "", - searchAccuracy: "precise", - matchType: "precise", - TenderType: "", - MsBidderType: 1, - MsNoticeType: 5, - MsPublishType: 0, - MsSingUpType: 1, - MsSort: 2, - MsProvince: "", - PageIndex: 1, - PageSize: 15, - AgencyId: "", - SecondSearch: "", - SecondSearchType: "", - TotalSize: 10000, - SearchRange: 3, - year: "", - key1: "", - key2: "", - key3: "", - }, - }, - }, -]); -new YouZhiCai([ - { - name: "优质采【招标项目计划】", - info: [], - options: { - name: "优质采【招标项目计划】", - url: "https://www.youzhicai.com/s/1_1_0_0_.html", - data: { - MsProvince: "", - MsCity: "", - MsStartDate: "", - MsEndDate: "", - AutoOr: 0, - BackOr: 0, - NoticeTitle: "", - searchAccuracy: "precise", - matchType: "precise", - TenderType: "", - MsBidderType: 1, - MsNoticeType: 7, - MsPublishType: 0, - MsSingUpType: 1, - MsSort: 2, - MsProvince: "", - PageIndex: 1, - PageSize: 15, - AgencyId: "", - SecondSearch: "", - SecondSearchType: "", - TotalSize: 10000, - SearchRange: 3, - year: "", - key1: "", - key2: "", - key3: "", - }, - }, - }, -]);