-
Notifications
You must be signed in to change notification settings - Fork 5k
/
Copy pathtweetextractor.mjs
131 lines (109 loc) · 4.48 KB
/
tweetextractor.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import { Scraper } from "agent-twitter-client";
import fs from "fs";
import path from "path";
import { fileURLToPath } from 'url';
// Get the directory name properly in ES modules
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const OUTPUT_FILE = path.join(__dirname, "tweet_scraped.json");
const TARGET_USERNAME = "aixbt_agent";
const MAX_TWEETS = 3000;
// Direct credentials
const credentials = {
username: "evepredict",
password: "Roving4-Avoid0-Revival6-Snide3",
email: "ilessio.aimaster@gmail.com"
};
async function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function scrapeTweets() {
try {
console.log(`Starting tweet extraction for @${TARGET_USERNAME}`);
console.log(`Tweets will be saved to: ${OUTPUT_FILE}`);
// Create a new instance of the Scraper
const scraper = new Scraper();
// Login to Twitter
console.log("Attempting to login...");
await scraper.login(credentials.username, credentials.password);
if (!(await scraper.isLoggedIn())) {
throw new Error("Login failed. Please check your credentials.");
}
console.log("Successfully logged in to Twitter");
// Initialize tweet storage
let allTweets = [];
if (fs.existsSync(OUTPUT_FILE)) {
const existingContent = fs.readFileSync(OUTPUT_FILE, "utf-8");
allTweets = JSON.parse(existingContent);
console.log(`Loaded ${allTweets.length} existing tweets`);
}
// Get tweets iterator
const tweets = scraper.getTweets(TARGET_USERNAME, MAX_TWEETS);
let count = 0;
// Fetch and process tweets
for await (const tweet of tweets) {
count++;
// Process tweet
const processedTweet = {
id: tweet.id,
text: tweet.text,
createdAt: tweet.createdAt,
metrics: {
retweets: tweet.retweetCount,
likes: tweet.likeCount,
replies: tweet.replyCount,
quotes: tweet.quoteCount
},
isRetweet: tweet.isRetweet,
isReply: tweet.isReply,
hasMedia: tweet.hasMedia
};
// Skip retweets and replies for cleaner content
if (!processedTweet.isRetweet && !processedTweet.isReply) {
allTweets.push(processedTweet);
// Log progress
console.log(`\n--- Tweet ${count} ---`);
console.log(`Text: ${processedTweet.text.substring(0, 100)}...`);
console.log(`Engagement: ${processedTweet.metrics.likes} likes, ${processedTweet.metrics.retweets} RTs`);
// Save periodically to avoid losing progress
if (count % 50 === 0) {
try {
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(allTweets, null, 2));
console.log(`\nSaved ${allTweets.length} tweets to ${OUTPUT_FILE}`);
} catch (err) {
console.error("Error saving file:", err);
}
// Add a small delay to avoid rate limiting
await sleep(1000);
}
}
if (count >= MAX_TWEETS) {
break;
}
}
// Final save
try {
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(allTweets, null, 2));
console.log(`\nCompleted! Total tweets saved: ${allTweets.length}`);
} catch (err) {
console.error("Error saving final file:", err);
}
// Create a cleaned version with just tweet texts
const cleanedTweets = allTweets.map(tweet => tweet.text);
const cleanFile = path.join(__dirname, 'tweet_scraped_clean.json');
try {
fs.writeFileSync(cleanFile, JSON.stringify(cleanedTweets, null, 2));
console.log("Created cleaned version in tweet_scraped_clean.json");
} catch (err) {
console.error("Error saving cleaned file:", err);
}
// Logout
await scraper.logout();
console.log("Successfully logged out from Twitter");
} catch (error) {
console.error("An error occurred:", error);
process.exit(1);
}
}
// Run the scraper
scrapeTweets();