Skip to content

Commit

Permalink
fix: exclude known bad instances
Browse files Browse the repository at this point in the history
these aren't pds or labeler, please stop inserting them!
  • Loading branch information
mary-ext committed Nov 22, 2024
1 parent b7b3421 commit 047ccb2
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 20 deletions.
30 changes: 25 additions & 5 deletions scripts/export-dids.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
type SerializedState,
} from '../src/state';

import { DEFAULT_HEADERS, MAX_FAILURE_DAYS, PLC_URL, RELAY_URL } from '../src/constants';
import { DEFAULT_HEADERS, EXCLUSIONS_RE, MAX_FAILURE_DAYS, PLC_URL, RELAY_URL } from '../src/constants';
import { didDocument, type DidDocument } from '../src/utils/did';
import { PromiseQueue } from '../src/utils/pqueue';
import { LineBreakStream, TextDecoderStream } from '../src/utils/stream';
Expand Down Expand Up @@ -74,7 +74,12 @@ let firehoseCursor: string | undefined = state?.firehose.cursor;
const pds = getEndpoint(operation.services.atproto_pds?.endpoint);
const labeler = getEndpoint(operation.services.atproto_labeler?.endpoint);

if (pds) {
jump: if (pds) {
if (EXCLUSIONS_RE.test(pds)) {
console.log(` found excluded pds: ${pds}`);
break jump;
}

const info = pdses.get(pds);

if (info === undefined) {
Expand All @@ -87,7 +92,12 @@ let firehoseCursor: string | undefined = state?.firehose.cursor;
}
}

if (labeler) {
jump: if (labeler) {
if (EXCLUSIONS_RE.test(labeler)) {
console.log(` found excluded labeler: ${labeler}`);
break jump;
}

const info = labelers.get(labeler);

if (info === undefined) {
Expand Down Expand Up @@ -247,7 +257,12 @@ let firehoseCursor: string | undefined = state?.firehose.cursor;

console.log(` ${did}: pass (updated)`);

if (pds) {
jump: if (pds) {
if (EXCLUSIONS_RE.test(pds)) {
console.log(` found excluded pds: ${pds}`);
break jump;
}

const info = pdses.get(pds);

if (info === undefined) {
Expand All @@ -260,7 +275,12 @@ let firehoseCursor: string | undefined = state?.firehose.cursor;
}
}

if (labeler) {
jump: if (labeler) {
if (EXCLUSIONS_RE.test(labeler)) {
console.log(` found excluded labeler: ${labeler}`);
break jump;
}

const info = labelers.get(labeler);

if (info === undefined) {
Expand Down
4 changes: 4 additions & 0 deletions src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ export const USER_AGENT = 'github:mary-ext/atproto-scraping';
export const DEFAULT_HEADERS = {
'user-agent': USER_AGENT,
};

// None of these are either a personal data server or labeler instance.
// - bsky.social is an entryway, not the actual PDS.
export const EXCLUSIONS_RE = /^https?:\/\/(?:bsky\.social|bsky\.network|jetstream\d+\.[a-z-]+\.bsky\.network)$/;
15 changes: 0 additions & 15 deletions state.json
Original file line number Diff line number Diff line change
Expand Up @@ -401,14 +401,6 @@
"did": "did:plc:skibpmllbhxvbvwgtjxl3uao",
"version": "0.1.38"
},
"https://jetstream1.us-east.bsky.network/": {
"did": "did:plc:3hzvshto76gqe5up6gvs7fwq",
"errorAt": 1731892814312
},
"https://jetstream1.us-west.bsky.network/": {
"did": "did:plc:4j6e6lhmrjsihjtshsuh3chk",
"errorAt": 1731719983905
},
"https://jntestlabeler.jamienemeth.co.uk/": {
"did": "did:plc:x7d5qy5kmr34qd4w7ji4cbr4",
"version": null
Expand Down Expand Up @@ -3570,9 +3562,6 @@
"inviteCodeRequired": true,
"version": "0.4.67"
},
"https://bsky.network/": {
"errorAt": 1732246709198
},
"https://bsky.newhimazin.net/": {
"errorAt": 1731806551573
},
Expand Down Expand Up @@ -3860,10 +3849,6 @@
"inviteCodeRequired": true,
"version": "0.4.67"
},
"https://bsky.social/": {
"inviteCodeRequired": false,
"version": "git-7edd51a"
},
"https://bsky.soopy.moe/": {
"inviteCodeRequired": true,
"version": "0.4.67"
Expand Down

0 comments on commit 047ccb2

Please sign in to comment.