-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawlEbay.js
121 lines (100 loc) · 3.06 KB
/
crawlEbay.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
const schedule = require("node-schedule");
require("dotenv").config();
const TelegramBot = require("node-telegram-bot-api");
const originalFetch = require("node-fetch");
const fetch = require("fetch-retry")(originalFetch);
const cheerio = require("cheerio");
let isInitialRun = true;
let appartmentsCount = 0;
let reqCount = 0;
const searchLink = process.env.KLEINANZEIGEN_SEARCH_URL;
const token = process.env.TELEGRAM_TOKEN;
const chatId = process.env.CHAT_ID;
const bot = new TelegramBot(token);
const ids = {};
const fetchApartments = async (url) => {
reqCount++;
try {
let fetchResult = await fetch(url || searchLink, {
retries: 3,
retryDelay: 10000,
});
let fetchResultHTML = await fetchResult.text();
const $ = cheerio.load(fetchResultHTML);
if (!url) {
const pages = $(".pagination-pages")
?.text()
?.trim()
?.split("\n")
?.map((page) => page.trim());
if (pages) {
pages.shift(1);
for (const pageNum of pages) {
await fetchApartments(
searchLink.replace("seite:1", `seite:${pageNum}`)
);
}
}
}
let scrapedApartments = [];
$(".ad-listitem").each((i, el) => {
const title = $(el).find(".text-module-begin")?.text()?.trim();
const link =
"https://www.ebay-kleinanzeigen.de" +
$(el).find(".text-module-begin")?.find("a")?.attr("href");
const description = $(el)
.find(".aditem-main--middle--description")
?.text()
?.trim();
const address = $(el).find(".aditem-main--top--left")?.text()?.trim();
const additionalData = $(el)
.find(".text-module-end")
?.text()
?.trim()
?.split("\n")
?.map((page) => page.trim());
const price = $(el).find(".aditem-main--middle--price").text()?.trim();
scrapedApartments.push({
title,
link,
description,
price,
additionalData,
address,
});
});
const filteredApartments = scrapedApartments.filter(
(apartment) => apartment.title
);
for (const scrapedApartment of filteredApartments) {
appartmentsCount++;
if (!ids[scrapedApartment.link]) {
if (!isInitialRun) {
const message = `${scrapedApartment.title} 🏠 ${
scrapedApartment.price
} 💰 - ${scrapedApartment.additionalData}
${scrapedApartment.description}
🗺 address: ${encodeURI(
"https://www.google.com/maps/search/?api=1&query=" +
scrapedApartment.address
)}
🔍 ${scrapedApartment.link}
`;
bot.sendMessage(chatId, message);
}
ids[scrapedApartment.link] = scrapedApartment;
}
}
if (!url) {
console.info(
`EBAY - Runned with result: reqCount=${reqCount} appartmentsCount=${appartmentsCount}`
);
isInitialRun = false;
appartmentsCount = 0;
reqCount = 0;
}
} catch (error) {
console.error("EBAY - Error processing response:", err, url);
}
};
module.exports = fetchApartments;