Skip to content

Commit 738bdca

Browse files
committed
twitter scraper target
1 parent 57059fd commit 738bdca

9 files changed

+3593
-73
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,5 @@ cache/*
4848
log*.txt
4949
# Explicitly include plugin-node data files
5050
!packages/plugin-node/data/**
51+
52+
.turbo

README.md

+33
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,39 @@ docker-compose up -d --build
144144
- Docker Compose v2.0.0 or higher
145145

146146

147+
148+
### Twitter Scraper Target
149+
150+
We have included a new script to scrape tweets from a specified Twitter user. This script uses the `agent-twitter-client` package to perform the scraping.
151+
152+
#### Usage
153+
154+
1. Ensure you have the necessary environment variables set in your `.env` file:
155+
```plaintext
156+
TWITTER_USERNAME=your_twitter_username
157+
TWITTER_PASSWORD=your_twitter_password
158+
TWITTER_EMAIL=your_twitter_email
159+
```
160+
161+
2. Run the script with the target Twitter username as an argument:
162+
```bash
163+
pnpm scrape-twitter <username>
164+
```
165+
166+
Replace `<username>` with the Twitter handle of the user you want to scrape tweets from.
167+
168+
3. The scraped tweets will be saved in the `data/tweets` directory with a timestamped filename.
169+
170+
#### Example
171+
172+
To scrape tweets from the user `exampleuser`, you would run:
173+
```bash
174+
pnpm scrape-twitter exampleuser
175+
```
176+
177+
This will create a file named something like `exampleuser_2023-09-15_12-34-56.json` in the `data/tweets` directory, containing the scraped tweets.
178+
179+
147180
### Community & contact
148181
149182
- [GitHub Issues](https://github.com/ai16z/eliza/issues). Best for: bugs you encounter using Eliza, and feature proposals.

data/discord/rules/temple_page_1.csv

+1,104
Large diffs are not rendered by default.

data/discord/rules/temple_page_2.csv

+1,092
Large diffs are not rendered by default.

data/discord/rules/temple_page_3.csv

+1,101
Large diffs are not rendered by default.

data/discord/rules/temple_page_4.csv

+250
Large diffs are not rendered by default.

package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
"docker:bash": "bash ./scripts/docker.sh bash",
2121
"docker:start": "bash ./scripts/docker.sh start",
2222
"docker": "pnpm docker:build && pnpm docker:run && pnpm docker:bash",
23-
"test": "pnpm --dir packages/core test"
23+
"test": "pnpm --dir packages/core test",
24+
"scrape-twitter": "ts-node scripts/twitter-scraper.ts"
2425
},
2526
"nodemonConfig": {
2627
"watch": [

scripts/extracttweets.js

-70
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,14 @@
1-
import { Scraper } from "agent-twitter-client";
2-
import dotenv from "dotenv";
31
import fs from "fs";
42
import path from "path";
53

6-
dotenv.config();
7-
84
// Initialize directories
95
const tweetsDir = path.join(process.cwd(), 'data', 'tweets');
106
if (!fs.existsSync(tweetsDir)) {
117
fs.mkdirSync(tweetsDir, { recursive: true });
128
}
139

14-
async function initTwitterScraper() {
15-
const scraper = new Scraper();
16-
let isAuthenticated = false;
17-
18-
// Check for existing cookies
19-
if (fs.existsSync('./cookies.json')) {
20-
try {
21-
const cookiesText = fs.readFileSync('./cookies.json', 'utf8');
22-
const cookiesArray = JSON.parse(cookiesText);
23-
24-
// Format cookies for setting
25-
const cookieStrings = cookiesArray.map(cookie =>
26-
`${cookie.key}=${cookie.value}; Domain=${cookie.domain}; Path=${cookie.path}; ` +
27-
`${cookie.secure ? 'Secure' : ''}; ${cookie.httpOnly ? 'HttpOnly' : ''}; ` +
28-
`SameSite=${cookie.sameSite || 'Lax'}`
29-
);
30-
31-
await scraper.setCookies(cookieStrings);
32-
isAuthenticated = await scraper.isLoggedIn();
33-
console.log('Loaded existing cookies:', isAuthenticated ? 'success' : 'failed');
34-
} catch (e) {
35-
console.error('Error loading cookies:', e);
36-
}
37-
}
38-
39-
// If no valid cookies, login with credentials
40-
if (!isAuthenticated) {
41-
if (!process.env.TWITTER_USERNAME || !process.env.TWITTER_PASSWORD) {
42-
throw new Error('Twitter credentials are required');
43-
}
44-
45-
try {
46-
await scraper.login(
47-
process.env.TWITTER_USERNAME,
48-
process.env.TWITTER_PASSWORD,
49-
process.env.TWITTER_EMAIL
50-
);
51-
52-
// Save cookies for future use
53-
const cookies = await scraper.getCookies();
54-
fs.writeFileSync('./cookies.json', JSON.stringify(cookies, null, 2));
55-
console.log('Logged in and saved new cookies');
56-
} catch (e) {
57-
console.error('Login failed:', e);
58-
throw e;
59-
}
60-
}
61-
62-
return scraper;
63-
}
64-
6510
async function processTwitterData() {
6611
try {
67-
const scraper = await initTwitterScraper();
68-
69-
if (!(await scraper.isLoggedIn())) {
70-
console.error('Failed to authenticate with Twitter');
71-
process.exit(1);
72-
}
73-
7412
// Get the most recent tweets file
7513
const files = fs.readdirSync(tweetsDir)
7614
.filter(f => f.endsWith('.json') && !f.endsWith('-export.json'))
@@ -113,16 +51,8 @@ async function processTwitterData() {
11351

11452
console.log(`Extracted ${tweetTexts.length} tweets and saved to ${outputFile}`);
11553

116-
// Cleanup
117-
await scraper.logout();
118-
console.log("Logged out successfully!");
119-
12054
} catch (error) {
12155
console.error("An error occurred:", error);
122-
if (error.response) {
123-
console.error("Response data:", error.response.data);
124-
console.error("Response status:", error.response.status);
125-
}
12656
process.exit(1);
12757
}
12858
}

scripts/gettweets.mjs

+9-2
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,20 @@ import path from "path";
55

66
dotenv.config();
77

8+
// Get username from command line argument
9+
const TARGET_USERNAME = process.argv[2];
10+
if (!TARGET_USERNAME) {
11+
console.error("Please provide a Twitter username as an argument");
12+
console.error("Usage: ts-node twitter-scraper.ts <username>");
13+
process.exit(1);
14+
}
15+
816
// Create necessary directories
917
const DATA_DIR = path.join(process.cwd(), 'data', 'tweets');
1018
if (!fs.existsSync(DATA_DIR)) {
1119
fs.mkdirSync(DATA_DIR, { recursive: true });
1220
}
1321

14-
const TARGET_USERNAME = "michaelmjfm"; // Change this to the user you want to scrape
15-
1622
// Create timestamp for filename
1723
const timestamp = new Date().toISOString()
1824
.replace(/[:.]/g, '-')
@@ -21,6 +27,7 @@ const timestamp = new Date().toISOString()
2127

2228
const TWEETS_FILE = path.join(DATA_DIR, `${TARGET_USERNAME}_${timestamp}.json`);
2329

30+
2431
async function initTwitterScraper() {
2532
const scraper = new Scraper();
2633
let isAuthenticated = false;

0 commit comments

Comments
 (0)