初始化

This commit is contained in:
huzhengrong 2025-10-23 10:39:32 +08:00
commit 12ee63b814
23 changed files with 4351 additions and 0 deletions

109
.gitignore vendored Normal file
View File

@ -0,0 +1,109 @@
# Node.js
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
package-lock.json
yarn.lock
pnpm-lock.yaml
# Logs
logs
*.log
*.log.*
log/
pids
*.pid
*.seed
*.pid.lock
# OS
.DS_Store
Thumbs.db
*.db
# dotenv environment variables
.env
.env.*
!.env.example
# Editor directories and files
.idea/
.vscode/
*.sublime-workspace
*.sublime-project
# Build output
dist/
build/
out/
coverage/
.nyc_output/
# Optional npm cache directory
.npm/
# Optional eslint cache
.eslintcache
# Optional REPL history
.node_repl_history
# Mac system files
.AppleDouble
.LSOverride
# Test coverage
coverage/
# TypeScript cache
*.tsbuildinfo
# Optional: local data
*.local
# Optional: debug
debug.log
# Optional: next.js
.next/
# Optional: Nuxt.js
.nuxt/
# Optional: SvelteKit
.svelte-kit/
# Optional: vuepress
.vuepress/dist
# Optional: Storybook
.storybook-out/
# Optional: Parcel
.cache/
# Optional: output of 'npm pack'
*.tgz
# Optional: PM2 logs and pids
pids/
*.pid
*.seed
*.pid.lock
pm2.log
# Optional: dotenv
.env.local
.env.development.local
.env.test.local
.env.production.local
# Optional: jest
jest.config.js
jest.config.ts
# Optional: cypress
cypress/videos/
cypress/screenshots/

173
byd.js Normal file
View File

@ -0,0 +1,173 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall, keywordsInclude } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
class BYD {
constructor() {
this.info = [];
console.log("比亚迪 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("比亚迪");
if (announcements.length > 0) {
await this.increment();
} else {
await this.fullFetch();
}
}
// 全量爬取
async fullFetch() {
console.log("开始全量爬取...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
if (this.info.length > 0) {
this.queue.saveAnnouncements("比亚迪", this.info);
// this.writeFile(this.info);
this.queue.addMessage("比亚迪", this.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("全量爬取失败:", error);
}
console.log("开始增量爬取...");
this.increment();
}
// 增量爬取
async increment() {
console.log("开始增量爬取模式每5分钟检查一次新数据...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
"比亚迪",
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// this.info.push(...newInfo);
this.queue.saveAnnouncements("比亚迪", newInfo);
// this.writeFile(this.info);
this.queue.addMessage("比亚迪", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("增量爬取失败:", error);
}
}
async getInfo(pagenumber = 1) {
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 0, info: [] };
} else {
let total = result[1].data.total;
let pages = Math.ceil(total / 10);
let arr = result[1].data.records;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
let endTime = timestampToDate(
new Date(item.signUpEndTime).getTime(),
true
);
// 命中关键词
if (
keywordsInclude(item.title) &&
endTime &&
+new Date(endTime) >= Date.now()
) {
// console.log("处理项目:", item.sourcingId, item.title);
info.push({
id: item.sourcingId,
name: item.title,
publishTime: timestampToDate(
new Date(item.tenderNoticePublishTime).getTime(),
true
),
endTime: endTime,
urls: `https://spcn.byd.com/#/tender-detail?sourcingId=${item.sourcingId}`,
});
}
}
return { pages, info };
}
}
// 分页获取数据
getList(pagenumber) {
return axios({
url: "https://spcn.byd.com/api/srm-sou-sp/supplier/supplier/getTenderAnnouncementInfo",
data: {
pageNo: pagenumber,
pageSize: 10,
},
method: "post",
})
.then((res) => {
let result = res.data;
if (result.msg === "成功" && result.code === "000000") {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
return [err, null];
});
}
}
new BYD();

188
changan.js Normal file
View File

@ -0,0 +1,188 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import {
timestampToDate,
loopCall,
keywordsInclude,
// addToMessageQueue,
} from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
// import { messageQueue } from "./msgManager.js";
// import cheerio from "cheerio";
class ChangAn {
constructor() {
// this.filepath = path.resolve("changan.json");
this.info = [];
console.log("长安 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("长安");
if (announcements.length > 0) {
await this.increment();
} else {
await this.fullFetch();
}
// if (fs.existsSync(this.filepath)) {
// let data = fs.readFileSync(this.filepath, "utf-8");
// this.info = data ? JSON.parse(data) : [];
// if (this.info.length > 0) {
// await this.increment();
// } else {
// await this.fullFetch();
// }
// } else {
// console.log("历史文件不存在,开始全量爬取");
// await this.fullFetch();
// }
}
// 全量爬取
async fullFetch() {
console.log("开始全量爬取...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
this.queue.saveAnnouncements("长安", this.info);
// this.writeFile(this.info);
this.queue.addMessage("长安", this.info);
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("全量爬取失败:", error);
}
console.log("开始增量爬取...");
this.increment();
}
// 增量爬取
async increment() {
console.log("开始增量爬取模式每5分钟检查一次新数据...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
"长安",
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// this.info.push(...newInfo);
this.queue.saveAnnouncements("长安", newInfo);
// this.writeFile(this.info);
this.queue.addMessage("长安", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("增量爬取失败:", error);
}
}
async getInfo(pagenumber = 1) {
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 0, info: [] };
} else {
// let total = result[1].result.total;
let pages = result[1].result.pages;
let arr = result[1].result.records;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
// 命中关键词
if (keywordsInclude(item.projectName)) {
console.log("处理项目:", item.id, item.projectName);
info.push({
id: item.id,
name: item.projectName,
publishTime: item.startTime,
endTime: item.endTime,
urls: `https://portal.changan.com.cn/noProdNoticeInfo?_t=${Date.now()}&id=${
item.id
}`,
});
}
}
return { pages, info };
}
}
// 分页获取数据
getList(pagenumber) {
return axios({
url: "https://portal.changan.com.cn/backend_8086/changan_platform/api/nonPdcSourceNoticeCt/listSourceNoticePageBySupplier",
params: {
_t: Date.now(),
pageNo: pagenumber,
pageSize: 20,
},
method: "get",
})
.then((res) => {
let result = res.data;
if (result.success) {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
return [err, null];
});
}
// writeFile(info) {
// fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8");
// }
}
new ChangAn();

251
chery.js Normal file
View File

@ -0,0 +1,251 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import {
timestampToDate,
loopCall,
keywordsInclude,
// addToMessageQueue,
} from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
// import { messageQueue } from "./msgManager.js";
// import cheerio from "cheerio";
class Chery {
constructor() {
this.jsonMap = [
{
name: "奇瑞采购公告",
// filepath: path.resolve("chery_cg.json"),
info: [],
options: {
name: "采购公告",
url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage",
categoryId: "5035",
siteId: "747",
},
},
{
name: "奇瑞寻源预告",
// filepath: path.resolve("chery_xy.json"),
info: [],
options: {
name: "寻源预告",
url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage",
categoryId: "965901485789413376",
siteId: "747",
},
},
{
name: "奇瑞变更公告",
// filepath: path.resolve("chery_bg.json"),
info: [],
options: {
name: "变更公告",
url: "https://ebd.mychery.com/cms/api/dynamicData/queryContentPage",
categoryId: "5032",
siteId: "747",
},
},
];
console.log("奇瑞 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
for (let item of this.jsonMap) {
let announcements = this.queue.getAnnouncementsBySpider(item.name);
if (announcements.length > 0) {
this.loopFetchIncrement(item);
} else {
this.loopFetchFull(item);
}
// if (fs.existsSync(item.filepath)) {
// let data = fs.readFileSync(item.filepath, "utf-8");
// item.info = data ? JSON.parse(data) : [];
// if (item.info.length > 0) {
// // await this.increment(item);
// console.log(`${item.name} 历史文件存在,开始增量爬取`);
// this.loopFetchIncrement(item);
// } else {
// this.loopFetchFull(item);
// }
// } else {
// console.log(`${item.name}历史文件不存在,开始全量爬取`);
// this.loopFetchFull(item);
// }
}
}
// 全量爬取
loopFetchFull(props) {
try {
loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
additional: props.options,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
props.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
props.info.push(...result.info);
console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`);
try {
this.queue.saveAnnouncements(props.name, props.info);
// this.writeFile(props);
this.queue.addMessage(props.name, props.info);
} catch (error) {
console.error("数据库操作失败:", error);
}
this.loopFetchIncrement(props);
},
});
} catch (error) {
console.error(`奇瑞${props.options.name}全量爬取失败:`, error);
}
}
loopFetchIncrement(props) {
try {
loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
additional: props.options,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
props.name,
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// props.info.push(...newInfo);
this.queue.saveAnnouncements(props.name, newInfo);
// this.writeFile(props);
this.queue.addMessage(props.name, newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error(`奇瑞${props.options.name}增量爬取失败:`, error);
}
}
async getInfo(pagenumber = 1, config) {
let info = [];
console.log(`${config.name}--获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber, config);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 30, info: [] };
} else {
let pages = 30;
let arr = result[1].res.rows;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
let endTime, publishTime;
if (config.categoryId === "965901485789413376") {
publishTime = item.publishDate.replace("T", " ").split(".")[0];
endTime = this.extractDeadlineTime(item.text);
} else {
endTime = item.signUpEndTime.replace("T", " ").split(".")[0];
publishTime = item.signUpBeginTime.replace("T", " ").split(".")[0];
}
// 命中关键词
if (
endTime &&
keywordsInclude(item.title) &&
+new Date(endTime) >= Date.now()
) {
// console.log("处理项目:", item.id, item.projectName);
info.push({
id: item.url,
name: item.title,
publishTime: publishTime,
endTime: endTime,
urls: `https://ebd.mychery.com/cms` + item.url,
});
}
}
return { pages, info };
}
}
// 分页获取数据
getList(pagenumber, config) {
return axios({
url: config.url,
data: {
dto: {
bidType: "",
categoryId: config.categoryId,
city: "",
county: "",
province: "",
purchaseMode: "",
secondCompanyId: "",
siteId: config.siteId,
},
pageNo: pagenumber,
pageSize: "10",
},
method: "post",
})
.then((res) => {
let result = res.data;
if (result.code === 0) {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
return [err, null];
});
}
// writeFile(props) {
// fs.writeFileSync(props.filepath, JSON.stringify(props.info), "utf-8");
// }
extractDeadlineTime(html) {
// 匹配"预告报名截止时间:"后面的时间格式
const regex = /预告报名截止时间:(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/;
const match = html.match(regex);
if (match) {
return match[1];
}
return null;
}
}
new Chery();

6
config.js Normal file
View File

@ -0,0 +1,6 @@
export default {
// 分页获取限制
pageNumberLimit: 3,
fullFetchTime: 2000,
incrementFetchTime: 5 * 60 * 1000,
};

187
df.js Normal file
View File

@ -0,0 +1,187 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall, keywordsInclude } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
import * as cheerio from "cheerio";
class DF {
constructor() {
this.jsonMap = [
{
name: "东风【招标采购】",
info: [],
options: {
name: "东风【招标采购】",
url: "https://etp.dfmc.com.cn/jyxx/004001/",
homeIndex: "trade_info_new.html",
},
},
{
name: "东风【非招标采购】",
info: [],
options: {
name: "东风【非招标采购】",
url: "https://etp.dfmc.com.cn/jyxx/004002/",
homeIndex: "trade_info_newf.html",
},
},
];
console.log("东风 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
for (let item of this.jsonMap) {
let announcements = this.queue.getAnnouncementsBySpider(item.name);
if (announcements.length > 0) {
this.loopFetchIncrement(item);
} else {
this.loopFetchFull(item);
}
}
}
// 全量爬取
loopFetchFull(props) {
try {
loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
additional: props.options,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
props.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
props.info.push(...result.info);
console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`);
try {
if (props.info.length > 0) {
this.queue.saveAnnouncements(props.name, props.info);
// this.writeFile(props);
this.queue.addMessage(props.name, props.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
this.loopFetchIncrement(props);
},
});
} catch (error) {
console.error(`${props.options.name}全量爬取失败:`, error);
}
}
loopFetchIncrement(props) {
try {
loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
additional: props.options,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
props.name,
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// props.info.push(...newInfo);
this.queue.saveAnnouncements(props.name, newInfo);
// this.writeFile(props);
this.queue.addMessage(props.name, newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error(`${props.options.name}增量爬取失败:`, error);
}
}
async getInfo(pagenumber = 1, config) {
let info = [];
console.log(`${config.name}--获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber, config);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0].status);
return { pages: 0, info: [] };
} else {
// 第六页开始就要验证码了
let pages = 5;
let html = result[1];
const $ = cheerio.load(html);
$(".public-table tbody tr").each((index, element) => {
let id = $(element).find("td:nth-child(3)").text();
let name = $(element).find("a").text();
let publishTime = $(element).find("td:nth-child(6)").text();
let endTime = $(element).find("td:nth-child(5)").text();
let urls =
"https://etp.dfmc.com.cn" + $(element).find("a").attr("href");
if (
endTime &&
+new Date(endTime) >= Date.now() &&
keywordsInclude(name)
) {
console.log("处理项目:", id, name);
info.push({
id: id,
name: name,
publishTime: publishTime,
endTime: endTime,
urls: urls,
});
}
});
return { pages, info };
}
}
// 分页获取数据
getList(pagenumber, config) {
let url = config.url;
if (pagenumber === 1) {
url += config.homeIndex;
} else {
url += `${pagenumber}.html`;
}
return axios({
url: url,
method: "get",
})
.then((res) => {
let result = res.data;
return [null, result];
})
.catch((err) => {
return [err, null];
});
}
}
new DF();

37
ecosystem.config.cjs Normal file
View File

@ -0,0 +1,37 @@
module.exports = {
apps: [
// 消息队列管理器(优先启动)
{
name: "msg-manager",
script: "msgManager.js",
instances: 1,
autorestart: true,
watch: false,
max_memory_restart: "200M",
env: {
NODE_ENV: "production",
SERVICE_NAME: "msg-manager",
},
error_file: "./logs/msg-manager-error.log",
out_file: "./logs/msg-manager-out.log",
log_file: "./logs/msg-manager-combined.log",
time: true,
},
{
name: "picc-spider",
script: "picc.js",
instances: 1,
autorestart: true,
watch: false,
max_memory_restart: "300M",
env: {
NODE_ENV: "production",
SPIDER_NAME: "picc",
},
error_file: "./logs/picc-error.log",
out_file: "./logs/picc-out.log",
log_file: "./logs/picc-combined.log",
time: true,
},
],
};

237
geely.js Normal file
View File

@ -0,0 +1,237 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
// import cheerio from "cheerio";
// import { messageQueue } from "./msgManager.js";
class GEELY {
constructor() {
this.url = "https://glzb.geely.com/gpmp/notice/listnotice";
// this.filepath = path.resolve("geely.json");
this.info = [];
console.log("GEELY 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("吉利");
if (announcements.length > 0) {
await this.increment();
} else {
await this.fullFetch();
}
// if (fs.existsSync(this.filepath)) {
// let data = fs.readFileSync(this.filepath, "utf-8");
// this.info = data ? JSON.parse(data) : [];
// if (this.info.length > 0) {
// await this.increment();
// } else {
// await this.fullFetch();
// }
// } else {
// console.log("历史文件不存在,开始全量爬取");
// await this.fullFetch();
// }
}
// 全量爬取
async fullFetch() {
console.log("开始全量爬取...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
); // 限制最多2页用于测试
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
this.queue.saveAnnouncements("吉利", this.info);
// this.writeFile(this.info);
this.queue.addMessage("吉利", this.info);
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("全量爬取失败:", error);
}
console.log("开始增量爬取...");
this.increment();
}
// 增量爬取
async increment() {
console.log("开始增量爬取模式每5分钟检查一次新数据...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
"吉利",
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
this.queue.saveAnnouncements("吉利", newInfo);
this.queue.addMessage("吉利", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("增量爬取失败:", error);
}
}
// 传入页码获取数据
async getInfo(pagenumber = 1) {
let today = new Date().setHours(0, 0, 0, 0);
let beforeOneMonth = today - 30 * 24 * 60 * 60 * 1000;
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 0, info: [] };
} else {
let total = result[1].data.total;
let pages = Math.ceil(total / 20);
let arr = result[1].data.items;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
if (item.endtime >= today && item.publishtime >= beforeOneMonth) {
console.log("处理项目:", item.pjtnoticeid, item.pjtnoticename);
let noticeRes = await this.getNoticeUrl(item.pjtnoticeid);
if (noticeRes[0]) {
// 获取招标公告内容报错
console.error("获取公告详情失败:", noticeRes[0]);
} else {
info.push({
id: item.pjtnoticeid,
name: item.pjtnoticename,
publishTime: timestampToDate(item.publishtime),
endTime: timestampToDate(item.endtime),
urls: noticeRes[1],
});
}
}
}
return { pages, info };
}
}
getList(pagenumber) {
return axios({
url: this.url,
params: {
pagesize: 20,
pagenumber: pagenumber,
publishstatus: 2,
bidcategoryid: 1442,
iflongpro: 0,
_: Date.now(),
},
method: "get",
})
.then((res) => {
let result = res.data;
if (result.code === "success") {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
return [err, null];
});
}
getNoticeUrl(id) {
let timestamp = Date.now();
return axios({
url: `https://glzb.geely.com/gpmp/notice/query?_=${timestamp}&pjtnoticeid=${id}`,
method: "get",
})
.then((res) => {
let result = res.data;
if (result.code === "success") {
let promises = [];
for (let item of result.data.attachs) {
let params = {
name: item.attachname,
downloadUrl: item.downloadUrl,
previewUrl: item.previewUrl,
attachname: item.attachname,
_: Date.now(),
};
promises.push(
axios({
url: `https://glzb.geely.com/pub/file/info/preview`,
method: "get",
params,
})
);
}
return Promise.allSettled(promises).then((results) => {
let urls = [];
results.forEach((result) => {
if (
result.status === "fulfilled" &&
result.value.data.code === "success"
) {
urls.push(result.value.data.data);
}
});
return [null, urls];
});
} else {
return ["err", null];
}
})
.catch((err) => {
console.log("err:", err);
return [err, null];
});
}
// writeFile(info) {
// fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8");
// }
}
new GEELY();

234
greatWall.js Normal file
View File

@ -0,0 +1,234 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall, keywordsInclude } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
class GreatWall {
constructor() {
this.jsonMap = [
{
name: "长城公开寻源",
info: [],
options: {
name: "长城公开寻源",
url: "https://srm.gwm.cn/cloud-srm/api-sou/sou-firstPage/souReqlistPage",
},
},
{
name: "长城招募公示大厅",
info: [],
options: {
name: "长城招募公示大厅",
url: "https://srm.gwm.cn/cloud-srm/api-sou/api-ql/Recruit/visitList",
data: {
type: "Recruit",
lang: "zh-cn",
query: { "*": {} },
payload: {
filter: {},
page: { sort: "lastUpdateDate desc", pageNum: 1, pageSize: 8 },
},
action: "visitList",
tree: true,
},
},
},
];
console.log("长城 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
for (let item of this.jsonMap) {
let announcements = this.queue.getAnnouncementsBySpider(item.name);
if (announcements.length > 0) {
this.loopFetchIncrement(item);
} else {
this.loopFetchFull(item);
}
}
}
// 全量爬取
loopFetchFull(props) {
try {
loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
additional: props.options,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
props.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
props.info.push(...result.info);
console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`);
try {
if (props.info.length > 0) {
this.queue.saveAnnouncements(props.name, props.info);
// this.writeFile(props);
this.queue.addMessage(props.name, props.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
this.loopFetchIncrement(props);
},
});
} catch (error) {
console.error(`${props.options.name}全量爬取失败:`, error);
}
}
loopFetchIncrement(props) {
try {
loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
additional: props.options,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
props.name,
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// props.info.push(...newInfo);
this.queue.saveAnnouncements(props.name, newInfo);
// this.writeFile(props);
this.queue.addMessage(props.name, newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error(`${props.options.name}增量爬取失败:`, error);
}
}
async getInfo(pagenumber = 1, config) {
let info = [];
console.log(`${config.name}--获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber, config);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 0, info: [] };
} else {
if (config.data) {
// 招募公示大厅
let arr = result[1].data.records;
let pages = result[1].data.pageCount;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
let endTime, publishTime;
endTime = item.deadlineTime;
publishTime = item.publishTime;
// 命中关键词
if (keywordsInclude(item.title)) {
info.push({
id: item.recruitId,
name: item.title,
publishTime: publishTime,
endTime: endTime,
urls: `https://srm.gwm.cn/#/portalBidding/vendorBiddingDetail?id=${item.recruitId}`,
});
}
}
return { pages, info };
} else {
// 公开寻源
let arr = result[1].data.list;
let pages = result[1].data.pages;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
let endTime, publishTime;
endTime = item.publicEndTime;
publishTime = item.releaseDate;
// 命中关键词
if (keywordsInclude(item.projectName)) {
info.push({
id: item.reqHeadId,
name: item.projectName,
publishTime: publishTime,
endTime: endTime,
urls: `https://srm.gwm.cn/#/portal?id=${item.reqHeadId}`,
});
}
}
return { pages, info };
}
}
}
// 分页获取数据
getList(pagenumber, config) {
let data = {};
if (config.data) {
data = config.data;
data.payload.page.pageNum = pagenumber;
} else {
data = { pageNum: pagenumber, pageSize: 8 };
}
return axios({
url: config.url,
data: data,
method: "post",
})
.then((res) => {
let result = res.data;
if (result.code == "0") {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
return [err, null];
});
}
// writeFile(props) {
// fs.writeFileSync(props.filepath, JSON.stringify(props.info), "utf-8");
// }
// extractDeadlineTime(html) {
// // 匹配"预告报名截止时间:"后面的时间格式
// const regex = /预告报名截止时间:(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/;
// const match = html.match(regex);
// if (match) {
// return match[1];
// }
// return null;
// }
}
new GreatWall();

385
jianghuai.js Normal file
View File

@ -0,0 +1,385 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import JSON5 from "json5";
import { timestampToDate, loopCall, keywordsInclude } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
class JiangHuai {
constructor(jsonMap) {
this.axiosInstance = axios.create({ timeout: 30000, maxRedirects: 5 });
this.axiosInstance.interceptors.request.use((config) => {
// 添加cookie到请求头
const cookieString = Array.from(this.cookiePair.entries())
.map(([name, value]) => `${name}=${value}`)
.join("; ");
config.headers.Cookie = cookieString;
return config;
});
this.axiosInstance.interceptors.response.use(
(response) => {
// 更新cookie到请求头
let cookieArr = response.headers["set-cookie"];
this.extractCookie(cookieArr);
return response;
},
(error) => {
return Promise.reject(error);
}
);
this.cookiePair = new Map();
this.csrfToken = "";
this.jsonMap = jsonMap;
// [
// {
// name: "江淮【招标公告】",
// info: [],
// options: {
// name: "江淮【招标公告】",
// url: "https://ahjhqc.youzhicai.com/domain/data-list-new",
// data: {
// pageIndex: 1,
// type: 1,
// companyId: "",
// title: "",
// ntype: 1,
// start_time: "",
// end_time: "",
// child: "",
// tenderType: 3,
// },
// },
// },
// {
// name: "江淮【变更/澄清公告】",
// info: [],
// options: {
// name: "江淮【变更/澄清公告】",
// url: "https://ahjhqc.youzhicai.com/domain/data-list-new",
// data: {
// pageIndex: 1,
// type: 1,
// companyId: "",
// title: "",
// ntype: "4,6",
// start_time: "",
// end_time: "",
// child: "",
// tenderType: 3,
// },
// },
// },
// ];
console.log("江淮 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
for (let item of this.jsonMap) {
let announcements = this.queue.getAnnouncementsBySpider(item.name);
if (announcements.length > 0) {
this.loopFetchIncrement(item);
} else {
this.loopFetchFull(item);
}
}
}
async initializeCookie() {
try {
let headers = {
headers: {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
Pragma: "no-cache",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Upgrade-Insecure-Requests": "1",
},
};
const homeResponse = await this.axiosInstance.get(
"https://ahjhqc.youzhicai.com/homeindex/noticeListNew.html?type=1",
headers
);
// 提取csrf-token
let tokenMatch = homeResponse.data.match(
/<meta name="csrf-token" content="([^"]+)"/
);
// console.log(tokenMatch);
if (tokenMatch) {
let csrfToken = tokenMatch[1];
this.csrfToken = csrfToken;
}
console.log(this.csrfToken);
headers.headers["X-Csrf-Token"] = this.csrfToken;
const cacheResponse = await this.axiosInstance.get(
"https://ahjhqc.youzhicai.com/?cache=1",
headers
);
} catch (err) {
console.log("err", err);
throw err;
}
}
extractCookie(cookieArr) {
for (let cookie of cookieArr) {
let [key, value] = cookie.split(";")[0].split("=");
this.cookiePair.set(key, value);
}
// console.log(this.cookiePair);
}
// 全量爬取
loopFetchFull(props) {
console.log("开始全量爬取");
try {
loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
additional: props.options,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
props.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
props.info.push(...result.info);
console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`);
try {
if (props.info.length > 0) {
this.queue.saveAnnouncements(props.name, props.info);
this.queue.addMessage(props.name, props.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
this.loopFetchIncrement(props);
},
});
} catch (error) {
console.error(`${props.options.name}全量爬取失败:`, error);
}
}
loopFetchIncrement(props) {
console.log("开始增量爬取");
try {
loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
additional: props.options,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
props.name,
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// props.info.push(...newInfo);
this.queue.saveAnnouncements(props.name, newInfo);
// this.writeFile(props);
this.queue.addMessage(props.name, newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error(`${props.options.name}增量爬取失败:`, error);
}
}
async getInfo(pagenumber = 1, config) {
let info = [];
console.log(`${config.name}--获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber, config);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败: ", result[0]);
return { pages: 0, info: [] };
} else {
// 公开寻源
let arr = result[1].list;
let total = result[1].total;
let pages = Math.ceil(total / 10);
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
let endTime, publishTime;
publishTime = new Date(item.startTime).toLocaleDateString();
endTime = new Date(item.endTime).toLocaleDateString();
// 命中关键词
if (
keywordsInclude(item.noticeTitle) &&
item.endTime &&
+new Date(item.endTime) >= Date.now()
) {
console.log("处理项目:", item.noticeTitle);
info.push({
id: item.bulletinSID,
name: item.noticeTitle,
publishTime: publishTime,
endTime: endTime,
urls: `https://ahjhqc.youzhicai.com/${item.Url}`,
});
}
}
return { pages, info };
}
}
async getList(pagenumber, config) {
let data = config.data;
data.pageIndex = pagenumber;
let headers = {
Accept: "text/plain, */*; q=0.01",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Origin: "https://ahjhqc.youzhicai.com",
Pragma: "no-cache",
Priority: "u=1, i",
Referer:
"https://ahjhqc.youzhicai.com/homeindex/noticeListNew.html?type=1",
"Sec-Ch-Ua":
'"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"macOS"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
"X-Csrf-Token": this.csrfToken,
};
try {
const response = await this.axiosInstance({
url: config.url,
data,
method: "post",
headers,
});
let result = JSON5.parse(response.data);
if (result.list && result.list.length > 0) {
return [null, result];
} else {
return ["err", null];
}
} catch (err) {
console.log("cookie不对");
try {
await this.initializeCookie();
headers["X-Csrf-Token"] = this.csrfToken;
const retryResponse = await this.axiosInstance({
url: config.url,
data,
method: "post",
headers,
});
// console.log(retryResponse.data);
let result = JSON5.parse(retryResponse.data);
if (result.list && result.list.length > 0) {
return [null, result];
} else {
return ["err", null];
}
} catch (retryErr) {
return [retryErr, null];
}
}
}
// 分页获取数据
// getList(pagenumber, config) {
// let data = config.data;
// data.pageIndex = pagenumber;
// return axios({
// url: config.url,
// data: data,
// method: "post",
// headers: {
// "Content-Type": "application/x-www-form-urlencoded",
// },
// })
// .then((res) => {
// let result = res.data;
// if (result.list && result.list.length > 0) {
// return [null, result];
// } else {
// return ["err", null];
// }
// })
// .catch((err) => {
// return [err, null];
// });
// }
}
new JiangHuai([
{
name: "江淮【招标公告】",
info: [],
options: {
name: "江淮【招标公告】",
url: "https://ahjhqc.youzhicai.com/domain/data-list-new",
data: {
pageIndex: 1,
type: 1,
companyId: "",
title: "",
ntype: 1,
start_time: "",
end_time: "",
child: "",
tenderType: 3,
},
},
},
]);
new JiangHuai([
{
name: "江淮【变更/澄清公告】",
info: [],
options: {
name: "江淮【变更/澄清公告】",
url: "https://ahjhqc.youzhicai.com/domain/data-list-new",
data: {
pageIndex: 1,
type: 1,
companyId: "",
title: "",
ntype: "4,6",
start_time: "",
end_time: "",
child: "",
tenderType: 3,
},
},
},
]);

193
leapMotor.js Normal file
View File

@ -0,0 +1,193 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall, keywordsInclude } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
// import cheerio from "cheerio";
class LeapMotor {
constructor() {
this.url =
"https://lpsrm.leapmotor.com/cloud-srm/api-inq/inq-anon/reqhead/listPage";
this.info = [];
console.log("零跑 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("零跑");
if (announcements.length > 0) {
// console.log(announcements);
await this.increment();
} else {
await this.fullFetch();
}
}
// 全量爬取
async fullFetch() {
console.log("开始全量爬取...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
this.queue.saveAnnouncements("零跑", this.info);
this.queue.addMessage("零跑", this.info);
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("全量爬取失败:", error);
}
console.log("开始增量爬取...");
this.increment();
}
// 增量爬取
async increment() {
console.log("开始增量爬取模式每5分钟检查一次新数据...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
// 判断数据是否存在
try {
let newInfo = this.queue.filterNewAnnouncements(
"零跑",
result.info
);
// 有新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
this.queue.saveAnnouncements("零跑", newInfo);
this.queue.addMessage("零跑", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("增量爬取失败:", error);
}
}
// 传入页码获取数据
async getInfo(pagenumber = 1) {
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 0, info: [] };
} else {
// let total = result[1].data.total;
let pages = result[1].data.pages;
let arr = result[1].data.list;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
// 命中关键词
if (keywordsInclude(item.souReqTitile)) {
console.log("处理项目:", item.reqHeadId, item.souReqTitile);
let noticeRes = await this.getNoticeUrl(item.reqHeadId);
if (noticeRes[0]) {
// 获取招标公告内容报错
console.error("获取公告链接失败:", noticeRes[0]);
} else {
info.push({
id: item.reqHeadId,
name: item.souReqTitile,
publishTime: item.publishTime,
endTime: item.expirationTime,
urls: noticeRes[1],
});
}
}
}
return { pages, info };
}
}
getList(pagenumber) {
return axios({
url: this.url,
data: {
pageNum: pagenumber,
pageSize: 8,
},
method: "post",
})
.then((res) => {
let result = res.data;
if (result.code === "0") {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
return [err, null];
});
}
getNoticeUrl(id) {
return axios({
url: `https://lpsrm.leapmotor.com/cloud-srm/api-inq/inq-anon/pj/reqhead/get?id=${id}`,
method: "get",
})
.then((res) => {
let result = res.data;
if (result.code === "0") {
return [null, result.data.extNoticeLink];
} else {
return ["err", null];
}
})
.catch((err) => {
console.log("err:", err);
return [err, null];
});
}
// writeFile(info) {
// fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8");
// }
}
new LeapMotor();

100
mailer.js Normal file
View File

@ -0,0 +1,100 @@
import nodemailer from "nodemailer";
import path from "path";
class EmailSender {
constructor(config) {
this.transporter = nodemailer.createTransport(config);
this.defaultFrom = config.auth.user;
}
async sendEmail(options) {
try {
const mailOptions = {
from: options.from || this.defaultFrom,
to: options.to,
cc: options.cc,
bcc: options.bcc,
subject: options.subject,
text: options.text,
html: options.html,
attachments: options.attachments || [],
};
const info = await this.transporter.sendMail(mailOptions);
console.log(`邮件发送成功: ${options.to} - ${info.messageId}`);
return { success: true, messageId: info.messageId };
} catch (error) {
console.error(`邮件发送失败: ${options.to} -`, error.message);
throw error;
}
}
async sendBasicEmail(to, subject, content) {
return await this.sendEmail({ to, subject, html: content });
}
async sendEmailWithAttachments(to, subject, content, attachmentPath) {
const attachments = [];
if (attachmentPath) {
attachments.push({
filename: path.basename(attachmentPath),
path: attachmentPath,
});
}
return await this.sendEmail({ to, subject, html: content, attachments });
}
async sendBulkEmail(recipients, subject, content) {
const results = [];
for (const recipient of recipients) {
try {
const result = await this.sendEmail({
to: recipient,
subject,
html: content,
});
results.push({ recipient, success: true, result });
} catch (error) {
results.push({ recipient, success: false, error: error.message });
}
await new Promise((resolve) => setTimeout(resolve, 1000));
}
return results;
}
async testConnection() {
try {
await this.transporter.verify();
console.log("邮件服务器连接成功");
return true;
} catch (error) {
console.error("邮件服务器连接失败:", error);
return false;
}
}
}
// async function example() {
// let emailSender = new EmailSender({
// host: "smtp.exmail.qq.com",
// port: 465,
// secure: true,
// auth: {
// user: "jiqiren@axbbaoxian.com",
// pass: "Am13579q",
// },
// });
// const isConnected = await emailSender.testConnection();
// if (!isConnected) {
// console.log("邮件服务器连接失败");
// return;
// }
// emailSender.sendBasicEmail(
// "cpw@axbbaoxian.com",
// "测试邮件",
// "这是测试邮件内容"
// );
// }
// example().catch((err) => {
// console.error("程序错误:", err);
// });
export { EmailSender };

212
msgManager.js Normal file
View File

@ -0,0 +1,212 @@
// msgQueue.js - 基于事件的消息队列
import { EventEmitter } from "events";
import fs from "fs";
import path from "path";
import { EmailSender } from "./mailer.js";
import { SQLiteMessageQueue } from "./sqlite.js";
import { md5 } from "./utils.js";
import axios from "axios";
class MessageQueue extends EventEmitter {
constructor() {
super();
this.queue = new SQLiteMessageQueue();
this.processing = false;
// this.queueFile = path.resolve("message_queue.json");K
this.emailSender = new EmailSender({
host: "smtp.exmail.qq.com",
port: 465,
secure: true,
auth: {
user: "jiqiren@axbbaoxian.com",
pass: "Am13579q",
},
});
this.recipients = [
"huzhengrong@axbbaoxian.com",
];
// 启动处理器
this.startProcessor();
}
// 添加消息到队列
// 处理队列
async startProcessor() {
setInterval(async () => {
// 清除状态 不等于 pending的数据
console.log("开始处理队列");
try {
const pendingMessages = this.queue.getPendingMessages();
if (!this.processing && pendingMessages.length > 0) {
await this.processQueue(pendingMessages);
}
} catch (error) {
console.error(`❌ 获取待处理消息失败:`, error);
}
}, 60 * 60 * 1000); // 1h处理一次
}
async processQueue(pendingMessages) {
this.processing = true;
let msgMap = {};
for (const message of pendingMessages) {
try {
console.log(`📧 处理消息: ${message.spider_name}`);
// console.log(typeof message.data);
// let formdata = JSON.parse(message.data);
if (!msgMap[message.spider_name]) {
msgMap[message.spider_name] = message.data;
} else {
msgMap[message.spider_name].push(...message.data);
}
message.status = "sent";
message.sent_at = new Date().toISOString();
this.queue.updateMessageStatus(
message.id,
message.status,
message.sent_at
);
} catch (error) {
console.error(`❌ 消息处理失败: ${message.id}`, error);
message.status = "failed";
message.error_message = error.message;
this.queue.updateMessageStatus(
message.id,
message.status,
null,
// message.sent_at,
message.error_message
);
}
}
let html = "";
for (const spiderName in msgMap) {
html += this.generateTable(spiderName, msgMap[spiderName]);
}
try {
this.emailSender.sendBulkEmail(this.recipients, "招标项目最新公告", html);
} catch (error) {
console.error(`❌ 通知发送失败: ${error}`);
}
this.processing = false;
}
generateTable(spiderName, data) {
let tableHtml = `
<div style="margin: 30px 0; font-family: Arial, sans-serif;">
<h2 style="color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; margin-bottom: 20px;">
🕷 ${spiderName} (${data.length} 条新增)
</h2>
<div style="overflow-x: auto; box-shadow: 0 2px 8px rgba(0,0,0,0.1); border-radius: 8px; margin-bottom: 20px;">
<table style="width: 100%; border-collapse: collapse; background: white; min-width: 800px;">
<thead>
<tr style="background: linear-gradient(135deg, #3498db 0%, #2980b9 100%); color: white;">
<th style="border: 1px solid #ddd; padding: 12px 8px; text-align: left; font-weight: bold; width: 50px;">序号</th>
<th style="border: 1px solid #ddd; padding: 12px 8px; text-align: left; font-weight: bold; min-width: 300px;">项目名称</th>
<th style="border: 1px solid #ddd; padding: 12px 8px; text-align: left; font-weight: bold; width: 140px;">发布时间</th>
<th style="border: 1px solid #ddd; padding: 12px 8px; text-align: left; font-weight: bold; width: 140px;">截止时间</th>
<th style="border: 1px solid #ddd; padding: 12px 8px; text-align: left; font-weight: bold; width: 100px;">查看详情</th>
</tr>
</thead>
<tbody>
`;
data.forEach((item, index) => {
const rowColor = index % 2 === 0 ? "#f8f9fa" : "white";
// const publishTime = this.formatDateTime(item.publishTime);
// const endTime = this.formatDateTime(item.endTime);
const urls = this.formatUrls(item.urls);
tableHtml += `
<tr style="background-color: ${rowColor}; border-bottom: 1px solid #eee;">
<td style="border: 1px solid #ddd; padding: 10px 8px; text-align: center; font-weight: bold; color: #666;">
${index + 1}
</td>
<td style="border: 1px solid #ddd; padding: 10px 8px; line-height: 1.4;">
<div style="font-weight: 500; color: #2c3e50; margin-bottom: 4px;">
${item.name}
</div>
</td>
<td style="border: 1px solid #ddd; padding: 10px 8px; color: #495057;">
${item.publishTime}
</td>
<td style="border: 1px solid #ddd; padding: 10px 8px; color: #495057;">
<div>${item.endTime}</div>
</td>
<td style="border: 1px solid #ddd; padding: 10px 8px; text-align: center;">
${urls}
</td>
</tr>
`;
});
tableHtml += `
</tbody>
</table>
</div>
</div>
`;
return tableHtml;
}
getSign(timestamp) {
let secret = "cpwyyds";
let uri = "/common/message/push";
const url = uri + timestamp + secret;
const myCalc = md5(url);
let sign =
myCalc.substring(5, 13) +
myCalc.substring(29, 31) +
myCalc.substring(18, 27);
//sign 转大写
sign = sign.toUpperCase();
return sign;
}
formatUrls(urls) {
if (!urls) {
return '<span style="color: #6c757d;">无链接</span>';
}
// 处理数组形式的URLs
if (Array.isArray(urls)) {
if (urls.length === 0) {
return '<span style="color: #6c757d;">无链接</span>';
}
if (urls.length === 1) {
return `<a href="${urls[0]}" target="_blank" style="color: #007bff; text-decoration: none; padding: 6px 12px; background-color: #e3f2fd; border-radius: 4px; font-size: 12px; border: 1px solid #90caf9; display: inline-block;">📄 查看</a>`;
}
// 多个链接的情况
let linksHtml = '<div style="line-height: 1.6;">';
urls.forEach((url, index) => {
linksHtml += `<a href="${url}" target="_blank" style="color: #007bff; text-decoration: none; padding: 4px 8px; background-color: #e3f2fd; border-radius: 3px; font-size: 11px; margin: 2px; display: inline-block; border: 1px solid #90caf9;">📄 链接${
index + 1
}</a>`;
});
linksHtml += "</div>";
return linksHtml;
}
// 处理字符串形式的URL
if (typeof urls === "string") {
return `<a href="${urls}" target="_blank" style="color: #007bff; text-decoration: none; padding: 6px 12px; background-color: #e3f2fd; border-radius: 4px; font-size: 12px; border: 1px solid #90caf9; display: inline-block;">📄 查看</a>`;
}
return '<span style="color: #6c757d;">链接格式错误</span>';
}
}
const messageQueue = new MessageQueue();
export { messageQueue };
// export default MessageQueue;

170
nio.js Normal file
View File

@ -0,0 +1,170 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import {
timestampToDate,
loopCall,
keywordsInclude,
getYiqiNoticeUrl,
parseToGgDetailsParams,
} from "./utils.js";
import config from "./config.js";
import * as cheerio from "cheerio";
import { SQLiteMessageQueue } from "./sqlite.js";
class NIO {
constructor() {
// this.filepath = path.resolve("yiqi.json");
this.info = [];
console.log("蔚来 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("蔚来");
if (announcements.length > 0) {
await this.increment();
} else {
await this.fullFetch();
}
}
// 全量爬取
async fullFetch() {
console.log("开始全量爬取...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
if (this.info.length > 0) {
this.queue.saveAnnouncements("蔚来", this.info);
// this.writeFile(this.info);
this.queue.addMessage("蔚来", this.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("全量爬取失败:", error);
}
console.log("开始增量爬取...");
this.increment();
}
// 增量爬取
async increment() {
console.log("开始增量爬取模式每5分钟检查一次新数据...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
"蔚来",
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// this.info.push(...newInfo);
this.queue.saveAnnouncements("蔚来", newInfo);
// this.writeFile(this.info);
this.queue.addMessage("蔚来", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("增量爬取失败:", error);
}
}
async getInfo(pagenumber = 1) {
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getHtml(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 0, info: [] };
} else {
let pages = 1;
let html = result[1];
const $ = cheerio.load(html);
let jsonStr = $("#__NEXT_DATA__").text();
let data = JSON.parse(jsonStr).props.pageProps.tenderNotices;
// console.log(data);
data.forEach((item) => {
let id = item.id;
let name = item.title;
let publishTime = item.publishDate;
let endTime = item.dueTime;
let urls = item.documents[0].url;
if (
endTime &&
+new Date(endTime) >= Date.now() &&
keywordsInclude(name)
) {
info.push({
id,
name,
publishTime,
endTime,
urls,
});
}
});
return { pages, info };
}
}
// 分页获取数据
getHtml(pagenumber) {
return axios({
url: "https://www.nio.cn/partnership/tender-notices",
method: "get",
})
.then((res) => {
let result = res.data;
return [null, result];
})
.catch((err) => {
return [err, null];
});
}
}
new NIO();

23
package.json Normal file
View File

@ -0,0 +1,23 @@
{
"name": "net-spider",
"version": "1.0.0",
"description": "",
"main": "index.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "pm2 start ecosystem.config.cjs",
"stop": "pm2 stop all",
"stats": "node stats.js",
"restart": "pm2 restart all"
},
"author": "",
"license": "ISC",
"dependencies": {
"axios": "^1.12.2",
"better-sqlite3": "^12.4.1",
"cheerio": "^1.1.2",
"json5": "^2.2.3",
"nodemailer": "^7.0.6"
}
}

214
picc.js Normal file
View File

@ -0,0 +1,214 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import { timestampToDate, loopCall } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
class PICC {
constructor() {
this.info = [];
console.log("中国人民保险 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("中国人民保险");
if (announcements.length > 0) {
await this.increment();
} else {
await this.fullFetch();
}
}
// 全量爬取
async fullFetch() {
console.log("开始全量爬取...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
if (this.info.length > 0) {
this.queue.saveAnnouncements("中国人民保险", this.info);
// this.writeFile(this.info);
this.queue.addMessage("中国人民保险", this.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("全量爬取失败:", error);
}
console.log("开始增量爬取...");
this.increment();
}
// 增量爬取
async increment() {
console.log("开始增量爬取模式每5分钟检查一次新数据...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
"中国人民保险",
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// this.info.push(...newInfo);
this.queue.saveAnnouncements("中国人民保险", newInfo);
// this.writeFile(this.info);
this.queue.addMessage("中国人民保险", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("增量爬取失败:", error);
}
}
async getInfo(pagenumber = 1) {
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 0, info: [] };
} else {
let total = result[1].res.total;
let pages = Math.ceil(total / 10);
let arr = result[1].res.rows;
for (let i = 0; i < arr.length; i++) {
let item = arr[i];
let endTime = timestampToDate(
new Date(item.tenderFileSaleEndTime).getTime(),
true
);
// 命中关键词
if (
this.keywordsInclude(item.title) &&
endTime &&
+new Date(endTime) >= Date.now()
) {
// console.log("处理项目:", item.sourcingId, item.title);
info.push({
id: item.sourcingId,
name: item.title,
publishTime: timestampToDate(
new Date(item.tenderFileSaleBeginTime).getTime(),
true
),
endTime: endTime,
urls: `https://ec.picc.com/cms/default/webfile${item.url}`,
});
}
}
return { pages, info };
}
}
// 分页获取数据
getList(pagenumber) {
return axios({
url: "https://ec.picc.com/cms/api/dynamicData/queryContentPage",
data: {
dto:{
categoryId:"211,213,214,215,216,217",
city:"",
county:"",
purchaseMode:"",
siteId:"725"
},
pageNo: pagenumber,
pageSize: 10,
},
method: "post",
headers: {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Type': 'application/json; charset=UTF-8',
'Cookie': 'G_rbec_47_11_8080=22685.52745.19855.0000',
'Host': 'ec.picc.com',
'Origin': 'https://ec.picc.com',
'Referer': 'https://ec.picc.com/cms/default/webfile/ywgg1/index.html',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Sec-Ch-Ua': '"Google Chrome";v="141", "Not?A_Brand";v="8", "Chromium";v="141"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': "macOS",
}
})
.then((res) => {
let result = res.data;
console.log("then",result)
if (result.msg === "操作成功" && result.code === 0) {
return [null, result];
} else {
return ["err", null];
}
})
.catch((err) => {
console.log('catch', err)
return [err, null];
});
}
keywordsInclude(name) {
let keywords = [
"保险",
"车险",
"非车险",
"科技",
"大模型",
"承保",
"第三方平台",
];
return keywords.some((keyword) => name.includes(keyword));
}
}
new PICC();

47
readme.md Normal file
View File

@ -0,0 +1,47 @@
# 查看指定爬虫详细信息
pm2 show chery-spider
# 查看指定爬虫状态
pm2 list | grep chery-spider
# 实时监控指定爬虫
pm2 monit chery-spider
# 停止指定爬虫(不删除)
pm2 stop chery-spider
# 彻底删除爬虫进程
pm2 delete chery-spider
# 停止并删除
pm2 stop chery-spider && pm2 delete chery-spider
# 查看指定爬虫的实时日志
pm2 logs chery-spider
# 查看最近 100 行日志
pm2 logs chery-spider --lines 100
# 只查看标准输出日志
pm2 logs chery-spider --out
# 只查看错误日志
pm2 logs chery-spider --err
# 查看某个时间段的日志
pm2 logs chery-spider --timestamp
# 清空日志
pm2 flush chery-spider

320
sqlite.js Normal file
View File

@ -0,0 +1,320 @@
import Database from "better-sqlite3";
import fs from "fs";
// import { wechatPush } from "./utils.js";
class SQLiteMessageQueue {
constructor() {
// this.db = new Database("message_queue.db");
this.db = new Database("spider_data.db");
this.init();
this.setupGracefulShutdown();
}
init() {
this.db.exec(`
CREATE TABLE IF NOT EXISTS announcements (
id TEXT PRIMARY KEY,
spider_name TEXT NOT NULL,
name TEXT NOT NULL,
publish_time TEXT,
end_time TEXT,
urls TEXT,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT
)
`);
this.db.exec(`
CREATE TABLE IF NOT EXISTS messages (
id TEXT PRIMARY KEY,
spider_name TEXT NOT NULL,
data TEXT NOT NULL,
timestamp TEXT NOT NULL,
status TEXT DEFAULT 'pending',
sent_at TEXT,
error_message TEXT
)
`);
this.db.exec(`
CREATE INDEX IF NOT EXISTS idx_announcements_spider ON announcements(spider_name);
CREATE INDEX IF NOT EXISTS idx_announcements_time ON announcements(publish_time);
CREATE INDEX IF NOT EXISTS idx_announcements_created ON announcements(created_at);
CREATE INDEX IF NOT EXISTS idx_status ON messages(status);
CREATE INDEX IF NOT EXISTS idx_spider_status ON messages(spider_name, status);
CREATE INDEX IF NOT EXISTS idx_timestamp ON messages(timestamp);
`);
this.insertAnnouncementStmt = this.db.prepare(`
INSERT OR REPLACE INTO announcements
(id, spider_name, name, publish_time, end_time, urls, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
`);
this.getAnnouncementStmt = this.db.prepare(`
SELECT * FROM announcements WHERE id = ?
`);
this.getAnnouncementsBySpiderStmt = this.db.prepare(`
SELECT * FROM announcements WHERE spider_name = ?
ORDER BY created_at DESC
`);
this.checkAnnouncementExistsStmt = this.db.prepare(`
SELECT COUNT(*) as count FROM announcements WHERE id = ?
`);
// 预编译SQL语句提高性能
this.insertStmt = this.db.prepare(`
INSERT INTO messages (id, spider_name, data, timestamp, status)
VALUES (?, ?, ?, ?, ?)
`);
this.getPendingStmt = this.db.prepare(`
SELECT * FROM messages WHERE status = 'pending'
ORDER BY timestamp ASC
`);
this.getFailedStmt = this.db.prepare(`
SELECT * FROM messages WHERE status = 'failed'
ORDER BY timestamp ASC
`);
this.updateStatusStmt = this.db.prepare(`
UPDATE messages
SET status = ?, sent_at = ?, error_message = ?
WHERE id = ?
`);
}
// safeExecute(methodName, operation, ...args) {
// }
saveAnnouncement(spiderName, announcement) {
const now = new Date().toISOString();
const isNew = !this.isAnnouncementExists(announcement.id);
this.insertAnnouncementStmt.run(
announcement.id,
spiderName,
announcement.name,
announcement.publishTime,
announcement.endTime,
announcement.urls,
isNew ? now : this.getAnnouncement(announcement.id)?.created_at || now,
now
);
return isNew;
}
/**
* 批量保存公告并返回新公告
*/
saveAnnouncements(spiderName, announcements) {
const newAnnouncements = [];
// 使用事务提高性能
const saveMany = this.db.transaction((announcements) => {
for (const announcement of announcements) {
const isNew = this.saveAnnouncement(spiderName, announcement);
if (isNew) {
newAnnouncements.push(announcement);
}
}
});
saveMany(announcements);
console.log(`💾 ${spiderName}: 保存 ${announcements.length} 条公告`);
return newAnnouncements;
}
/**
* 检查公告是否存在
*/
isAnnouncementExists(announcementId) {
const result = this.checkAnnouncementExistsStmt.get(announcementId);
return result.count > 0;
}
/**
* 获取单个公告
*/
getAnnouncement(id) {
return this.getAnnouncementStmt.get(id);
}
/**
* 获取指定爬虫的所有公告
*/
getAnnouncementsBySpider(spiderName) {
return this.getAnnouncementsBySpiderStmt.all(spiderName);
}
/**
* 根据 spiderName 删除其所有公告
*/
deleteAnnouncementsBySpider(spiderName) {
const stmt = this.db.prepare(`DELETE FROM announcements WHERE spider_name = ?`);
const info = stmt.run(spiderName);
console.log(`🗑️ 删除 ${spiderName} 的公告,共删除 ${info.changes}`);
return info.changes;
}
/**
* 过滤出新公告
*/
filterNewAnnouncements(spiderName, announcements) {
return announcements.filter(
(announcement) => !this.isAnnouncementExists(announcement.id)
);
}
// =============
// 消息队列相关方法
// =============
addMessage(spiderName, data) {
const message = {
id: Date.now() + "-" + Math.random().toString(36).substr(2, 9),
spider_name: spiderName,
data: JSON.stringify(data),
timestamp: new Date().toISOString(),
status: "pending",
};
this.insertStmt.run(
message.id,
message.spider_name,
message.data,
message.timestamp,
message.status
);
// wechatPush(spiderName, data);
console.log(`📤 添加消息到队列: ${spiderName} - ${data.length} 条数据`);
return message.id;
}
getPendingMessages() {
const rows = this.getPendingStmt.all();
return rows.map((row) => ({
...row,
data: JSON.parse(row.data),
}));
}
getFailedMessages() {
const rows = this.getFailedStmt.all();
return rows.map((row) => ({
...row,
data: JSON.parse(row.data),
}));
}
updateMessageStatus(id, status, sentAt = null, errorMessage = null) {
this.updateStatusStmt.run(status, sentAt, errorMessage, id);
}
migrateFromJsonFile(spiderName, jsonFilePath) {
try {
if (!fs.existsSync(jsonFilePath)) {
console.log(`📁 ${jsonFilePath} 不存在,跳过迁移`);
return 0;
}
const data = JSON.parse(fs.readFileSync(jsonFilePath, "utf-8"));
if (!Array.isArray(data) || data.length === 0) {
console.log(`📁 ${jsonFilePath} 数据为空,跳过迁移`);
return 0;
}
const migrateMany = this.db.transaction((announcements) => {
for (const announcement of announcements) {
this.saveAnnouncement(spiderName, announcement);
}
});
migrateMany(data);
console.log(`🔄 成功迁移 ${data.length}${spiderName} 数据到数据库`);
return data.length;
} catch (error) {
console.error(`❌ 迁移 ${jsonFilePath} 失败:`, error);
return 0;
}
}
cleanOldMessages(daysBefore = 30) {
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - daysBefore);
const stmt = this.db.prepare(`
DELETE FROM messages
WHERE status = 'sent' AND sent_at < ?
`);
const result = stmt.run(cutoffDate.toISOString());
console.log(`🧹 清理了 ${result.changes} 条旧消息`);
}
/**
* 获取统计信息
*/
getStats() {
const stats = {};
// 按爬虫统计公告数量
const announcementStats = this.db
.prepare(
`
SELECT spider_name, COUNT(*) as count
FROM announcements
GROUP BY spider_name
`
).all()
// .prepare(`
// SELECT spider_name, name
// FROM announcements WHERE spider_name = '吉利'
// `)
// .all();
// 消息状态统计(status == pending)
const messageStats = this.db
.prepare(
`
SELECT status, data, sent_at
FROM messages WHERE status = 'pending'
`
)
.all();
stats.announcements = announcementStats;
stats.messages = messageStats;
return stats;
}
setupGracefulShutdown() {
// 正常退出信号
process.on("SIGINT", () => {
console.log("收到 SIGINT 信号,正在关闭数据库...");
this.close();
process.exit(0);
});
// 终止信号
process.on("SIGTERM", () => {
console.log("收到 SIGTERM 信号,正在关闭数据库...");
this.close();
process.exit(0);
});
// 未捕获异常
process.on("uncaughtException", (error) => {
console.error("未捕获异常:", error);
this.close();
process.exit(1);
});
// 未处理的Promise拒绝
process.on("unhandledRejection", (reason, promise) => {
console.error("未处理的Promise拒绝:", reason);
this.close();
process.exit(1);
});
}
// 关闭数据库连接
close() {
this.db.close();
}
}
export { SQLiteMessageQueue };

80
stats.js Normal file
View File

@ -0,0 +1,80 @@
import { SQLiteMessageQueue } from "./sqlite.js";
import path from "path";
import { md5 } from "./utils.js";
import axios from "axios";
const queue = new SQLiteMessageQueue();
const stats = queue.getStats();
// function merge() {
// let files = [
// { name: "长安", path: "changan.json" },
// { name: "奇瑞变更公告", path: "chery_bg.json" },
// { name: "奇瑞采购公告", path: "chery_cg.json" },
// { name: "奇瑞寻源预告", path: "chery_xy.json" },
// { name: "零跑", path: "leapMotor.json" },
// { name: "吉利", path: "geely.json" },
// { name: "一汽", path: "yiqi.json" },
// ];
// files.forEach((file) => {
// queue.migrateFromJsonFile(file.name, path.resolve(file.path));
// });
// }
// merge();
// 把message中的数据状态改成pending
// queue.getFailedMessages()
// .forEach((message) => {
// queue.updateMessageStatus(message.id, "pending");
// });
// function getSign(timestamp) {
// let secret = "cpwyyds";
// let uri = "/common/message/push";
// const url = uri + timestamp + secret;
// console.log(url);
// const myCalc = md5(url);
// let sign =
// myCalc.substring(5, 13) +
// myCalc.substring(29, 31) +
// myCalc.substring(18, 27);
// //sign 转大写
// sign = sign.toUpperCase();
// return sign;
// }
// let time = new Date().getTime();
// let data = {
// timestamp: time,
// sign: getSign(time),
// templateNo: "A002",
// url: "https://www.baidu.com/",
// paramList: [
// {
// key: "thing8",
// value: "网站name",
// },
// {
// key: "thing2",
// value: "项目name",
// },
// {
// key: "time14",
// value: "2025-11-2",
// },
// {
// key: "time17",
// value: "2025-11-3 00:00:00",
// },
// ],
// };
// axios({
// url: "https://testadvert.shenlintech.com/platform/common/message/push",
// method: "post",
// data,
// })
// .then((res) => {
// console.log(res.data);
// })
// .catch((err) => {
// console.log(err);
// });
console.log(stats);

309
third.js Normal file
View File

@ -0,0 +1,309 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import JSON5 from "json5";
import { timestampToDate, loopCall, keywordsInclude } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
import * as cheerio from "cheerio";
class Third {
constructor(jsonMap) {
this.axiosInstance = axios.create({ timeout: 30000, maxRedirects: 5 });
this.axiosInstance.interceptors.request.use((config) => {
// 添加cookie到请求头
const cookieString = Array.from(this.cookiePair.entries())
.map(([name, value]) => `${name}=${value}`)
.join("; ");
config.headers.Cookie = cookieString;
// console.log(config);
return config;
});
this.axiosInstance.interceptors.response.use(
(response) => {
// 更新cookie到请求头
let cookieArr = response.headers["set-cookie"] || [];
this.extractCookie(cookieArr);
return response;
},
(error) => {
return Promise.reject(error);
}
);
this.cookiePair = new Map();
// this.csrfToken = "";
this.jsonMap = jsonMap;
console.log("三方平台 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
for (let item of this.jsonMap) {
let announcements = this.queue.getAnnouncementsBySpider(item.name);
if (announcements.length > 0) {
this.loopFetchIncrement(item);
} else {
this.loopFetchFull(item);
}
}
}
async initializeCookie() {
try {
let headers = {
headers: {
Accept: "text/plain, */*; q=0.01",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Origin: "https://www.chinabidding.com",
Pragma: "no-cache",
Priority: "u=1, i",
Referer: "https://www.chinabidding.com/search/proj.htm",
"Sec-Ch-Ua":
'"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"macOS"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
},
};
const homeResponse = await this.axiosInstance.get(
"https://www.chinabidding.com/search/proj.htm",
headers
);
} catch (err) {
console.log("err", err);
throw err;
}
}
extractCookie(cookieArr) {
for (let cookie of cookieArr) {
let [key, value] = cookie.split(";")[0].split("=");
this.cookiePair.set(key, value);
}
// console.log(this.cookiePair);
}
// 全量爬取
loopFetchFull(props) {
console.log("开始全量爬取");
try {
loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
additional: props.options,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
props.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
props.info.push(...result.info);
console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`);
try {
if (props.info.length > 0) {
this.queue.saveAnnouncements(props.name, props.info);
this.queue.addMessage(props.name, props.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
this.loopFetchIncrement(props);
},
});
} catch (error) {
console.error(`${props.options.name}全量爬取失败:`, error);
}
}
loopFetchIncrement(props) {
console.log("开始增量爬取");
try {
loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
additional: props.options,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
props.name,
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// props.info.push(...newInfo);
this.queue.saveAnnouncements(props.name, newInfo);
// this.writeFile(props);
this.queue.addMessage(props.name, newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error(`${props.options.name}增量爬取失败:`, error);
}
}
async getNoticeDetail(url) {
try {
let result = await axios.get(url);
return result.data;
} catch (err) {
return "err";
}
}
async getInfo(pagenumber = 1, config) {
let info = [];
console.log(`${config.name}--获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber, config);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败: ", result[0]);
return { pages: 0, info: [] };
} else {
let pages = 3;
let html = result[1];
const $ = cheerio.load(html);
$(".as-pager-body li").each((index, element) => {
let idmatch = $(element)
.find(".as-pager-item")
.attr("href")
.match(/\/bidDetail\/(\d+)\.html/);
let id = idmatch ? idmatch[1] : "";
let name = $(element).find(".txt").attr("title");
let url = $(element).find(".as-pager-item").attr("href");
if (keywordsInclude(name)) {
console.log("处理项目:", name);
info.push({
id: id,
name: name,
urls: url,
publishTime: "--",
endTime: "--",
});
}
});
return { pages, info };
}
}
async getList(pagenumber, config) {
let data = config.data;
data.currentPage = pagenumber;
let headers = {
Accept: "text/plain, */*; q=0.01",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Origin: "https://www.chinabidding.com",
Pragma: "no-cache",
Priority: "u=1, i",
Referer: "https://www.chinabidding.com/search/proj.htm",
"Sec-Ch-Ua":
'"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"macOS"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
};
try {
const response = await this.axiosInstance({
url: config.url,
data,
method: "post",
headers,
});
let result = response.data;
return [null, result];
} catch (err) {
console.log("cookie不对");
try {
await this.initializeCookie();
const retryResponse = await this.axiosInstance({
url: config.url,
data,
method: "post",
headers,
});
let result = retryResponse.data;
return [null, result];
} catch (retryErr) {
return [retryErr, null];
}
}
}
}
new Third([
{
name: "机电项目招投标【招标公告】",
info: [],
options: {
name: "机电项目招投标【招标公告】",
url: "https://www.chinabidding.com/search/proj.htm",
data: {
fullText: "",
pubDate: "",
infoClassCodes: "(0105 0103)",
normIndustry: "",
zoneCode: "",
fundSourceCodes: "",
poClass: "BidNotice",
rangeType: "",
currentPage: 1,
},
},
},
]);
new Third([
{
name: "机电项目招投标【招标变更公告】",
info: [],
options: {
name: "机电项目招投标【招标变更公告】",
url: "https://www.chinabidding.com/search/proj.htm",
data: {
fullText: "",
pubDate: "",
infoClassCodes: "(0106 0104)",
normIndustry: "",
zoneCode: "",
fundSourceCodes: "",
poClass: "BidNotice",
rangeType: "",
currentPage: 1,
},
},
},
]);

271
utils.js Normal file
View File

@ -0,0 +1,271 @@
import crypto from "crypto";
import axios from "axios";
/**
* 将时间戳毫秒转换为 yyyy-mm-dd 格式的字符串
* @param {number} timestamp - 毫秒级时间戳
* @returns {string} yyyy-mm-dd 格式日期
*/
function timestampToDate(timestamp, mode) {
const date = new Date(timestamp);
const year = date.getFullYear();
// 补零
const month = String(date.getMonth() + 1).padStart(2, "0");
const day = String(date.getDate()).padStart(2, "0");
if (!mode) {
return `${year}-${month}-${day}`;
} else {
const hours = String(date.getHours()).padStart(2, "0");
const minutes = String(date.getMinutes()).padStart(2, "0");
const seconds = String(date.getSeconds()).padStart(2, "0");
return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}`;
}
}
function md5(text, inputEncoding = "utf8", outputEncoding = "hex") {
return crypto
.createHash("md5")
.update(text, inputEncoding)
.digest(outputEncoding);
}
function getSign(timestamp) {
let secret = "cpwyyds";
let uri = "/common/message/push";
const url = uri + timestamp + secret;
const myCalc = md5(url);
let sign =
myCalc.substring(5, 13) +
myCalc.substring(29, 31) +
myCalc.substring(18, 27);
//sign 转大写
sign = sign.toUpperCase();
return sign;
}
// 微信推送
// function wechatPush(spiderName, arr) {
// for (let item of arr) {
// let timestamp = new Date().getTime();
// let sign = getSign(timestamp);
// let url = "";
// if (typeof item.urls === "string") {
// url = item.urls;
// } else {
// url = item.urls[0];
// }
// let data = {
// timestamp,
// sign,
// templateNo: "A002",
// url,
// paramList: [
// {
// key: "thing8",
// value: spiderName,
// },
// {
// key: "thing2",
// value:
// item.name.length > 20
// ? item.name.substring(0, 16) + "..."
// : item.name,
// },
// {
// key: "time14",
// value: item.publishTime,
// },
// {
// key: "time17",
// value: item.endTime,
// },
// ],
// };
// axios({
// url: "https://advert.shenlintech.com/platform/common/message/push",
// method: "post",
// data,
// });
// }
// }
// 废弃
function addToMessageQueue(spiderName, data) {
const message = {
id: Date.now() + "-" + Math.random().toString(36).substr(2, 9),
spiderName,
data,
timestamp: new Date().toISOString(),
status: "pending",
};
let queue = [];
const queueFile = "message_queue.json";
if (fs.existsSync(queueFile)) {
queue = JSON.parse(fs.readFileSync(queueFile, "utf-8"));
}
// 添加新消息
queue.push(message);
fs.writeFileSync(queueFile, JSON.stringify(queue, null, 2));
console.log(`📤 添加消息到队列: ${spiderName} - ${data.length} 条数据`);
}
async function loopCall(fn, options = {}) {
let { time, pagenumber, stopWhen, readyForNext, complete, additional } =
options;
let shouldContinue = true;
while (shouldContinue) {
try {
let result = await fn(pagenumber, additional);
// console.log(`页面 ${pagenumber} 处理完成`);
// 检查停止条件
if (stopWhen && stopWhen(pagenumber, result)) {
complete && complete(result);
shouldContinue = false;
} else {
pagenumber = readyForNext(pagenumber, result);
await new Promise((resolve) => setTimeout(resolve, time));
}
} catch (err) {
console.error("loopCall 出错:", err);
shouldContinue = false;
}
}
}
function keywordsInclude(name) {
let keywords = [
"海外",
"国际",
"内容",
"营销",
"运营",
"直播",
"品牌",
"事件",
"策略",
"传播",
"执行",
"社媒",
"视频",
"制作",
"拍摄",
"效果",
];
return keywords.some((keyword) => name.includes(keyword));
}
// 一汽专用获取公告链接的方法
function getYiqiNoticeUrl(gongGaoType, guid, version, origin) {
let baseUrl = "https://etp.faw.cn/";
//是否对参数加密
var isSecrect = false;
//候选人公示加密
if (gongGaoType == 7) {
isSecrect = true;
}
if (isSecrect) {
var url = baseUrl + "/gg/toGongGaoDetail";
guid = encodeSixF(guid);
// var params = {
// guid: guid,
// gongGaoType: gongGaoType,
// version: dealNullAndUndefined(version),
// statusCode: 1,
// isNew: 1,
// };
// try {
// await httpPostCurrent(url, params);
// } catch (err) {
// console.log(err);
// return "加密链接";
// }
return "加密链接,请直接上对应网站查看";
} else {
var url =
baseUrl +
"/gg/toGongGaoDetail?guid=" +
guid +
"&gongGaoType=" +
gongGaoType +
"&version=" +
version +
"&isNew=1";
return url;
}
}
function parseToGgDetailsParams(funcStr) {
// funcStr = "toGgDetails('6','642ed424-cd9b-4cb0-8b74-9cc868d8f95a:2','2','1','')"
const match = funcStr.match(/toGgDetails\(([^)]+)\)/);
if (match) {
// 解析参数字符串
const paramsStr = match[1];
// 简单的参数解析(处理引号包围的参数)
const params = paramsStr
.split(",")
.map((param) => param.trim().replace(/['"]/g, ""));
return params;
}
return null;
}
function encodeSixF(input) {
var keyStr =
"ABCDEFGHIJKLMNOP" +
"QRSTUVWXYZabcdef" +
"ghijklmnopqrstuv" +
"wxyz0123456789+/" +
"=";
var output = "";
var chr1,
chr2,
chr3 = "";
var enc1,
enc2,
enc3,
enc4 = "";
var i = 0;
do {
chr1 = input.charCodeAt(i++);
chr2 = input.charCodeAt(i++);
chr3 = input.charCodeAt(i++);
enc1 = chr1 >> 2;
enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
enc4 = chr3 & 63;
if (isNaN(chr2)) {
enc3 = enc4 = 64;
} else if (isNaN(chr3)) {
enc4 = 64;
}
output =
output +
keyStr.charAt(enc1) +
keyStr.charAt(enc2) +
keyStr.charAt(enc3) +
keyStr.charAt(enc4);
chr1 = chr2 = chr3 = "";
enc1 = enc2 = enc3 = enc4 = "";
} while (i < input.length);
if (output != null && output.indexOf("=") != -1) {
var reg = new RegExp("=", "g");
var outputNew = output.replace(reg, "r1e2p3l4");
output = outputNew;
}
return output + "+*+";
}
function dealNullAndUndefined(value) {
if (typeof value == "undefined") return "";
if (value == null) return "";
if (value == "null") return "";
if (value == "undefined") return "";
return value;
}
export {
timestampToDate,
loopCall,
keywordsInclude,
getYiqiNoticeUrl,
parseToGgDetailsParams,
addToMessageQueue,
md5,
// wechatPush
};

199
yiqi.js Normal file
View File

@ -0,0 +1,199 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import {
timestampToDate,
loopCall,
keywordsInclude,
getYiqiNoticeUrl,
parseToGgDetailsParams,
// addToMessageQueue,
} from "./utils.js";
import config from "./config.js";
import * as cheerio from "cheerio";
import { SQLiteMessageQueue } from "./sqlite.js";
// import { messageQueue } from "./msgManager.js";
class YiQi {
constructor() {
// this.filepath = path.resolve("yiqi.json");
this.info = [];
console.log("一汽 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
let announcements = this.queue.getAnnouncementsBySpider("一汽");
if (announcements.length > 0) {
await this.increment();
} else {
await this.fullFetch();
}
// if (fs.existsSync(this.filepath)) {
// let data = fs.readFileSync(this.filepath, "utf-8");
// this.info = data ? JSON.parse(data) : [];
// if (this.info.length > 0) {
// await this.increment();
// } else {
// await this.fullFetch();
// }
// } else {
// console.log("历史文件不存在,开始全量爬取");
// await this.fullFetch();
// }
}
// 全量爬取
async fullFetch() {
console.log("开始全量爬取...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
this.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
this.info.push(...result.info);
console.log(`爬取完成,共获取 ${this.info.length} 条有效数据`);
try {
this.queue.saveAnnouncements("一汽", this.info);
// this.writeFile(this.info);
this.queue.addMessage("一汽", this.info);
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("全量爬取失败:", error);
}
console.log("开始增量爬取...");
this.increment();
}
// 增量爬取
async increment() {
console.log("开始增量爬取模式每5分钟检查一次新数据...");
try {
await loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
"一汽",
result.info
);
// let newInfo = result.info.filter(
// (item) => !this.info.some((info) => info.id === item.id)
// );
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// this.info.push(...newInfo);
this.queue.saveAnnouncements("一汽", newInfo);
// this.writeFile(this.info);
this.queue.addMessage("一汽", newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error("增量爬取失败:", error);
}
}
async getInfo(pagenumber = 1) {
let info = [];
console.log(`正在获取第 ${pagenumber} 页数据...`);
let result = await this.getHtml(pagenumber);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败:", result[0]);
return { pages: 30, info: [] };
} else {
let pages = 30;
let html = result[1];
const $ = cheerio.load(html);
let noticeEl = $(".zl-list-main .zl-col-6");
noticeEl.each((index, element) => {
let id = $(element).find(".zl-desc-item:contains('项目编号')").text();
let name = $(element).find(".title").text();
let publishTime = $(element)
.find(".zl-desc-item:contains('发布时间')")
.text();
let endTime = $(element).find(".daojishi").attr("data-time");
// 获取生产链接的参数
let funcStr = $(element).find(".jump").attr("onclick");
let funcArgs = parseToGgDetailsParams(funcStr);
// 公告未过期 && 命中关键词
if (endTime && keywordsInclude(name)) {
let noticeUrl = getYiqiNoticeUrl(...funcArgs);
info.push({
id: id.replace("项目编号:", ""),
name: name.trim(),
publishTime: publishTime.replace("发布时间:", "").trim(),
endTime: timestampToDate(Number(endTime)),
urls: noticeUrl,
});
}
});
return { pages, info };
}
}
// 分页获取数据
getHtml(pagenumber) {
return axios({
url: "https://etp.faw.cn/gg/allJYTypeGGList?hangYeType=-1&xmLeiXing=&ggStartTimeEnd=&gongGaoType=5&isNew=1",
data: {
searchType: "",
searchText: "",
currentPage: pagenumber,
},
headers: {
"Content-Type": "application/x-www-form-urlencoded",
},
method: "post",
})
.then((res) => {
let result = res.data;
return [null, result];
})
.catch((err) => {
return [err, null];
});
}
// writeFile(info) {
// fs.writeFileSync(this.filepath, JSON.stringify(info), "utf-8");
// }
}
new YiQi();

406
youzhicai.js Normal file
View File

@ -0,0 +1,406 @@
import axios from "axios";
import fs from "fs";
import path from "path";
import JSON5 from "json5";
import { timestampToDate, loopCall, keywordsInclude } from "./utils.js";
import config from "./config.js";
import { SQLiteMessageQueue } from "./sqlite.js";
import * as cheerio from "cheerio";
class YouZhiCai {
constructor(jsonMap) {
this.axiosInstance = axios.create({ timeout: 30000, maxRedirects: 5 });
this.axiosInstance.interceptors.request.use((config) => {
// 添加cookie到请求头
const cookieString = Array.from(this.cookiePair.entries())
.map(([name, value]) => `${name}=${value}`)
.join("; ");
config.headers.Cookie = cookieString;
return config;
});
this.axiosInstance.interceptors.response.use(
(response) => {
// 更新cookie到请求头
let cookieArr = response.headers["set-cookie"] || [];
this.extractCookie(cookieArr);
return response;
},
(error) => {
return Promise.reject(error);
}
);
this.cookiePair = new Map();
// this.csrfToken = "";
this.jsonMap = jsonMap;
console.log("优质采 爬虫启动...");
this.queue = new SQLiteMessageQueue();
this.start();
}
async start() {
try {
await this.init();
} catch (err) {
console.error("启动失败:", err);
}
}
async init() {
for (let item of this.jsonMap) {
let announcements = this.queue.getAnnouncementsBySpider(item.name);
if (announcements.length > 0) {
this.loopFetchIncrement(item);
} else {
this.loopFetchFull(item);
}
}
}
async initializeCookie() {
try {
let headers = {
headers: {
Accept: "text/plain, */*; q=0.01",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Origin: "https://www.youzhicai.com",
Pragma: "no-cache",
Priority: "u=1, i",
Referer: "https://www.youzhicai.com/s/1_1_0_0_.html",
"Sec-Ch-Ua":
'"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"macOS"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
},
};
const homeResponse = await this.axiosInstance.get(
"https://www.youzhicai.com/s/1_1_0_0_.html",
headers
);
// // 提取csrf-token
// let tokenMatch = homeResponse.data.match(
// /<meta name="csrf-token" content="([^"]+)"/
// );
// // console.log(tokenMatch);
// if (tokenMatch) {
// let csrfToken = tokenMatch[1];
// this.csrfToken = csrfToken;
// }
// console.log(this.csrfToken);
// headers.headers["X-Csrf-Token"] = this.csrfToken;
// const cacheResponse = await this.axiosInstance.get(
// "https://ahjhqc.youzhicai.com/?cache=1",
// headers
// );
} catch (err) {
console.log("err", err);
throw err;
}
}
extractCookie(cookieArr) {
for (let cookie of cookieArr) {
let [key, value] = cookie.split(";")[0].split("=");
this.cookiePair.set(key, value);
}
// console.log(this.cookiePair);
}
// 全量爬取
loopFetchFull(props) {
console.log("开始全量爬取");
try {
loopCall(this.getInfo.bind(this), {
time: config.fullFetchTime,
pagenumber: 1,
additional: props.options,
stopWhen: (pagenumber, result) => {
return (
pagenumber >= result.pages || pagenumber >= config.pageNumberLimit
);
},
readyForNext: (pagenumber, result) => {
props.info.push(...result.info);
return pagenumber + 1;
},
complete: (result) => {
props.info.push(...result.info);
console.log(`爬取完成,共获取 ${props.info.length} 条有效数据`);
try {
if (props.info.length > 0) {
this.queue.saveAnnouncements(props.name, props.info);
this.queue.addMessage(props.name, props.info);
}
} catch (error) {
console.error("数据库操作失败:", error);
}
this.loopFetchIncrement(props);
},
});
} catch (error) {
console.error(`${props.options.name}全量爬取失败:`, error);
}
}
loopFetchIncrement(props) {
console.log("开始增量爬取");
try {
loopCall(this.getInfo.bind(this), {
time: config.incrementFetchTime, // 5分钟间隔
pagenumber: 1,
additional: props.options,
readyForNext: (pagenumber, result) => {
try {
let newInfo = this.queue.filterNewAnnouncements(
props.name,
result.info
);
// 存在新数据
if (newInfo.length > 0) {
console.log(`发现 ${newInfo.length} 条新数据`);
// props.info.push(...newInfo);
this.queue.saveAnnouncements(props.name, newInfo);
// this.writeFile(props);
this.queue.addMessage(props.name, newInfo);
// 全是新数据,继续下一页
if (newInfo.length === result.info.length) {
return pagenumber + 1;
} else {
// 有部分重复数据,重新从第一页开始
return 1;
}
} else {
console.log("没有发现新数据,继续监控...");
return 1; // 重新从第一页开始
}
} catch (error) {
console.error("数据库操作失败:", error);
}
},
});
} catch (error) {
console.error(`${props.options.name}增量爬取失败:`, error);
}
}
async getInfo(pagenumber = 1, config) {
let info = [];
console.log(`${config.name}--获取第 ${pagenumber} 页数据...`);
let result = await this.getList(pagenumber, config);
if (result[0]) {
// 出错, 记录错误日志
console.error("获取页面数据失败: ", result[0]);
return { pages: 0, info: [] };
} else {
// 后面的都要验证码
// let pages = 2;
let html = result[1];
const $ = cheerio.load(html);
let total = $("#recommendMsg .info-num-value").text();
let pages = Math.ceil(total / 15);
if (pages > 2) {
pages = 2;
}
$(".project-li").each((index, element) => {
let id = $(element).find(".project-name0").attr("href");
let name = $(element).find(".project-name0").attr("title");
let publishTime = $(element).find(".pub-value0").text();
let leftDay = $(element).find(".left-day .emOrange:eq(0)").text();
let endTime = new Date(
+new Date(publishTime) + leftDay * 24 * 60 * 60 * 1000
).toLocaleDateString();
// console.log(endTime);
let urls = "https://www.youzhicai.com" + id;
if (keywordsInclude(name)) {
console.log("处理项目:", name, publishTime, endTime);
info.push({
id: id,
name: name,
publishTime: publishTime,
endTime: endTime,
urls: urls,
});
}
});
return { pages, info };
}
}
async getList(pagenumber, config) {
let data = config.data;
data.PageIndex = pagenumber;
if (this.cookiePair.get("__RequestVerificationToken")) {
data.__RequestVerificationToken = this.cookiePair.get(
"__RequestVerificationToken"
);
}
let headers = {
Accept: "text/plain, */*; q=0.01",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Origin: "https://www.youzhicai.com",
Pragma: "no-cache",
Priority: "u=1, i",
Referer: "https://www.youzhicai.com/s/1_1_0_0_.html",
"Sec-Ch-Ua":
'"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"macOS"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
};
try {
const response = await this.axiosInstance({
url: config.url,
data,
method: "post",
headers,
});
let result = response.data;
return [null, result];
} catch (err) {
console.log("cookie不对");
try {
await this.initializeCookie();
data.__RequestVerificationToken = this.cookiePair.get(
"__RequestVerificationToken"
);
const retryResponse = await this.axiosInstance({
url: config.url,
data,
method: "post",
headers,
});
// console.log(retryResponse.data);
let result = retryResponse.data;
return [null, result];
} catch (retryErr) {
return [retryErr, null];
}
}
}
}
new YouZhiCai([
{
name: "优质采【招标公告】",
info: [],
options: {
name: "优质采【招标公告】",
url: "https://www.youzhicai.com/s/1_1_0_0_.html",
data: {
MsProvince: "",
MsCity: "",
MsStartDate: "",
MsEndDate: "",
AutoOr: 0,
BackOr: 0,
NoticeTitle: "",
searchAccuracy: "precise",
matchType: "precise",
TenderType: "",
MsBidderType: 1,
MsNoticeType: 1,
MsPublishType: 0,
MsSingUpType: 1,
MsSort: 2,
MsProvince: "",
PageIndex: 1,
PageSize: 15,
AgencyId: "",
SecondSearch: "",
SecondSearchType: "",
TotalSize: 10000,
SearchRange: 3,
year: "",
key1: "",
key2: "",
key3: "",
},
},
},
]);
new YouZhiCai([
{
name: "优质采【澄清/变更公告】",
info: [],
options: {
name: "优质采【澄清/变更公告】",
url: "https://www.youzhicai.com/s/1_1_0_0_.html",
data: {
MsProvince: "",
MsCity: "",
MsStartDate: "",
MsEndDate: "",
AutoOr: 0,
BackOr: 0,
NoticeTitle: "",
searchAccuracy: "precise",
matchType: "precise",
TenderType: "",
MsBidderType: 1,
MsNoticeType: 5,
MsPublishType: 0,
MsSingUpType: 1,
MsSort: 2,
MsProvince: "",
PageIndex: 1,
PageSize: 15,
AgencyId: "",
SecondSearch: "",
SecondSearchType: "",
TotalSize: 10000,
SearchRange: 3,
year: "",
key1: "",
key2: "",
key3: "",
},
},
},
]);
new YouZhiCai([
{
name: "优质采【招标项目计划】",
info: [],
options: {
name: "优质采【招标项目计划】",
url: "https://www.youzhicai.com/s/1_1_0_0_.html",
data: {
MsProvince: "",
MsCity: "",
MsStartDate: "",
MsEndDate: "",
AutoOr: 0,
BackOr: 0,
NoticeTitle: "",
searchAccuracy: "precise",
matchType: "precise",
TenderType: "",
MsBidderType: 1,
MsNoticeType: 7,
MsPublishType: 0,
MsSingUpType: 1,
MsSort: 2,
MsProvince: "",
PageIndex: 1,
PageSize: 15,
AgencyId: "",
SecondSearch: "",
SecondSearchType: "",
TotalSize: 10000,
SearchRange: 3,
year: "",
key1: "",
key2: "",
key3: "",
},
},
},
]);