Skip to content

Commit 2bdca4e

Browse files
committed
feat: Get Proxy Cloudflare Checker Working
Not finished yet. Encountering issues with proxies rotation.
1 parent 675c867 commit 2bdca4e

File tree

8 files changed

+370
-1
lines changed

8 files changed

+370
-1
lines changed

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,9 @@
1313

1414
# Dependency directories (remove the comment below to include it)
1515
# vendor/
16+
17+
proxies.txt
18+
cloudflare-proxy-ban-checker
19+
bad.txt
20+
good.txt
21+
.idea

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2020 Aurélien SCHILTZ
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+28-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,29 @@
11
# cloudflare-proxy-ban-checker
2-
Checks a set of proxies against a protected cloudflare website to check if the proxies are banned or not.
2+
3+
Checks a set of proxies against a protected CloudFlare website to check if the proxies are banned or not.
4+
5+
# Installation
6+
7+
Very simple :
8+
9+
```
10+
git clone git@github.com:kangoo13/cloudflare-proxy-ban-checker.git
11+
cd cloudflare-proxy-ban-checker
12+
go build
13+
./cloudflare-proxy-ban-checker -h
14+
Usage: cloudflare-proxy-ban-checker [--goodproxiespath GOODPROXIESPATH] [--badproxiespath BADPROXIESPATH] [--timeoutproxy TIMEOUTPROXY] WEBSITE PROXYLIST
15+
16+
Positional arguments:
17+
WEBSITE website to test proxy against cloudflare
18+
PROXYLIST path to the proxyList
19+
20+
Options:
21+
--goodproxiespath GOODPROXIESPATH
22+
path to the good proxies identified [default: good.txt]
23+
--badproxiespath BADPROXIESPATH
24+
path to the bad proxies identified [default: bad.txt]
25+
--timeoutproxy TIMEOUTPROXY
26+
timeout proxy duration [default: 5]
27+
--help, -h display this help and exit
28+
```
29+

cfprxchecker/checker.go

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package cfprxchecker
2+
3+
import (
4+
"github.com/gocolly/colly"
5+
"github.com/gocolly/colly/extensions"
6+
"log"
7+
"net/http"
8+
"os"
9+
"strings"
10+
"sync"
11+
"time"
12+
)
13+
14+
type Args struct {
15+
WebsiteToCrawl string
16+
ProxyList []string
17+
GoodProxiesOutputFile *os.File
18+
BadProxiesOutputFile *os.File
19+
TimeoutProxy time.Duration
20+
muBad sync.Mutex
21+
muGood sync.Mutex
22+
}
23+
24+
func (a *Args) writeGoodProxy(proxy string) {
25+
if proxy != "" {
26+
27+
var b strings.Builder
28+
a.muGood.Lock()
29+
defer a.muGood.Unlock()
30+
31+
b.WriteString(proxy)
32+
b.WriteString("\n")
33+
_, err := a.GoodProxiesOutputFile.WriteString(b.String())
34+
if err != nil {
35+
log.Fatalf("[WriteGoodProxy] error while appending to file %s", err)
36+
}
37+
}
38+
}
39+
40+
func (a *Args) writeBadProxy(proxy string) {
41+
if proxy != "" {
42+
43+
var b strings.Builder
44+
45+
a.muBad.Lock()
46+
defer a.muBad.Unlock()
47+
48+
b.WriteString(proxy)
49+
b.WriteString("\n")
50+
_, err := a.BadProxiesOutputFile.WriteString(b.String())
51+
if err != nil {
52+
log.Fatalf("[WriteBadProxy] error while appending to file %s", err)
53+
}
54+
}
55+
}
56+
57+
func CheckProxiesAgainstCloudFlare(args *Args) {
58+
// Rotate the proxies
59+
rp, err := RoundRobinProxySwitcher(args.ProxyList...)
60+
if err != nil {
61+
log.Fatal(err)
62+
}
63+
64+
// Instantiate default collector
65+
c := colly.NewCollector(
66+
colly.Async(true),
67+
)
68+
69+
c.IgnoreRobotsTxt = true
70+
c.AllowURLRevisit = true
71+
c.CacheDir = ""
72+
73+
c.WithTransport(&http.Transport{
74+
Proxy: rp,
75+
DisableKeepAlives: true,
76+
MaxIdleConns: 100,
77+
MaxIdleConnsPerHost: 100,
78+
})
79+
80+
// Limit the maximum parallelism to 24
81+
err = c.Limit(&colly.LimitRule{DomainGlob: "*", Parallelism: 24})
82+
83+
if err != nil {
84+
log.Printf("error while doing c.Limit %s", err)
85+
}
86+
87+
extensions.RandomUserAgent(c)
88+
89+
if args.BadProxiesOutputFile != nil {
90+
c.OnError(func(response *colly.Response, err error) {
91+
log.Printf("[DEBUG] Bad proxy found error status %d [%s] [%s]", response.StatusCode, response.Request.ProxyURL, err)
92+
args.writeBadProxy(response.Request.ProxyURL)
93+
})
94+
}
95+
96+
c.OnResponse(func(response *colly.Response) {
97+
log.Printf("[DEBUG] Good proxy found [%s]", response.Request.ProxyURL)
98+
args.writeGoodProxy(response.Request.ProxyURL)
99+
})
100+
101+
for _, proxy := range args.ProxyList {
102+
log.Printf("[DEBUG] Doing %s", proxy)
103+
if err = c.Visit(args.WebsiteToCrawl); err != nil {
104+
log.Printf("Error happening doing Visit %s", err)
105+
}
106+
}
107+
108+
// Wait until threads are finished
109+
c.Wait()
110+
}
+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package cfprxchecker
2+
3+
import (
4+
"context"
5+
"github.com/gocolly/colly"
6+
"log"
7+
"net/http"
8+
"net/url"
9+
"sync/atomic"
10+
)
11+
12+
type roundRobinSwitcher struct {
13+
proxyURLs []*url.URL
14+
proxyURLsMap map[string]*url.URL
15+
index uint32
16+
}
17+
18+
func (r *roundRobinSwitcher) GetProxy(pr *http.Request) (*url.URL, error) {
19+
var u *url.URL
20+
// Use Same Proxy when Redirect
21+
if pr.Response != nil && pr.Response.StatusCode == 301 {
22+
u = r.proxyURLsMap[pr.Response.Request.Context().Value(colly.ProxyURLKey).(string)]
23+
log.Printf("titi %s", u.String())
24+
} else if pr.Response != nil {
25+
log.Printf("tototototo")
26+
} else {
27+
u = r.proxyURLs[r.index%uint32(len(r.proxyURLs))]
28+
atomic.AddUint32(&r.index, 1)
29+
log.Printf("grosminet %s", u.String())
30+
}
31+
32+
ctx := context.WithValue(pr.Context(), colly.ProxyURLKey, u.String())
33+
*pr = *pr.WithContext(ctx)
34+
35+
return u, nil
36+
}
37+
38+
// RoundRobinProxySwitcher creates a proxy switcher function which rotates
39+
// ProxyURLs on every request.
40+
// The proxy type is determined by the URL scheme. "http", "https"
41+
// and "socks5" are supported. If the scheme is empty,
42+
// "http" is assumed.
43+
func RoundRobinProxySwitcher(ProxyURLs ...string) (colly.ProxyFunc, error) {
44+
urls := make([]*url.URL, len(ProxyURLs))
45+
urlsMap := make(map[string]*url.URL, len(ProxyURLs))
46+
for i, u := range ProxyURLs {
47+
parsedU, err := url.Parse(u)
48+
if err != nil {
49+
return nil, err
50+
}
51+
urls[i] = parsedU
52+
urlsMap[parsedU.String()] = parsedU
53+
}
54+
return (&roundRobinSwitcher{
55+
proxyURLs: urls,
56+
proxyURLsMap: urlsMap,
57+
index: 0,
58+
}).GetProxy, nil
59+
}

go.mod

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module github.com/kangoo13/cloudflare-proxy-ban-checker
2+
3+
go 1.13
4+
5+
require (
6+
github.com/PuerkitoBio/goquery v1.5.1 // indirect
7+
github.com/alexflint/go-arg v1.3.0
8+
github.com/antchfx/htmlquery v1.2.2 // indirect
9+
github.com/antchfx/xmlquery v1.2.3 // indirect
10+
github.com/antchfx/xpath v1.1.4 // indirect
11+
github.com/gobwas/glob v0.2.3 // indirect
12+
github.com/gocolly/colly v1.2.0
13+
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
14+
github.com/golang/protobuf v1.3.4 // indirect
15+
github.com/jawher/mow.cli v1.1.0 // indirect
16+
github.com/kennygrant/sanitize v1.2.4 // indirect
17+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
18+
github.com/temoto/robotstxt v1.1.1 // indirect
19+
golang.org/x/net v0.0.0-20200301022130-244492dfa37a // indirect
20+
google.golang.org/appengine v1.6.5 // indirect
21+
)

go.sum

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
2+
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
3+
github.com/alexflint/go-arg v1.3.0 h1:UfldqSdFWeLtoOuVRosqofU4nmhI1pYEbT4ZFS34Bdo=
4+
github.com/alexflint/go-arg v1.3.0/go.mod h1:9iRbDxne7LcR/GSvEr7ma++GLpdIU1zrghf2y2768kM=
5+
github.com/alexflint/go-scalar v1.0.0 h1:NGupf1XV/Xb04wXskDFzS0KWOLH632W/EO4fAFi+A70=
6+
github.com/alexflint/go-scalar v1.0.0/go.mod h1:GpHzbCOZXEKMEcygYQ5n/aa4Aq84zbxjy3MxYW0gjYw=
7+
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
8+
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
9+
github.com/antchfx/htmlquery v1.2.2 h1:exe4hUStBqXdRZ+9nB7EYA+W2zfIHIq3rRFpChh+VSk=
10+
github.com/antchfx/htmlquery v1.2.2/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
11+
github.com/antchfx/xmlquery v1.2.3 h1:++irmxT+Pkn55FGtSTkUTHarZ6E0b1yyR+UiPZRA+eY=
12+
github.com/antchfx/xmlquery v1.2.3/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
13+
github.com/antchfx/xpath v1.1.4 h1:naPIpjBGeT3eX0Vw7E8iyHsY8FGt6EbGdkcd8EZCo+g=
14+
github.com/antchfx/xpath v1.1.4/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
15+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
16+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
17+
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
18+
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
19+
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
20+
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
21+
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
22+
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
23+
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
24+
github.com/golang/protobuf v1.3.4 h1:87PNWwrRvUSnqS4dlcBU/ftvOIBep4sYuBLlh6rX2wk=
25+
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
26+
github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg=
27+
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
28+
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
29+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
30+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
31+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
32+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
33+
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
34+
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
35+
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
36+
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
37+
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
38+
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
39+
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
40+
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
41+
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
42+
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
43+
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
44+
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
45+
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
46+
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
47+
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
48+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
49+
google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM=
50+
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=

main.go

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package main
2+
3+
import (
4+
"bufio"
5+
"github.com/alexflint/go-arg"
6+
"github.com/kangoo13/cloudflare-proxy-ban-checker/cfprxchecker"
7+
"log"
8+
"os"
9+
"strings"
10+
"time"
11+
)
12+
13+
func main() {
14+
var args cfprxchecker.Args
15+
var inputArgs struct {
16+
Website string `arg:"positional,required" help:"website to test proxy against cloudflare"`
17+
ProxyList string `arg:"positional,required" help:"path to the proxyList"`
18+
GoodProxiesPath string `default:"good.txt" help:"path to the good proxies identified"`
19+
BadProxiesPath string `default:"bad.txt" help:"path to the bad proxies identified"`
20+
TimeoutProxy int64 `default:"5" help:"timeout proxy duration"`
21+
}
22+
23+
arg.MustParse(&inputArgs)
24+
25+
if inputArgs.GoodProxiesPath != "" {
26+
args.GoodProxiesOutputFile = OpenFile(inputArgs.GoodProxiesPath)
27+
defer args.GoodProxiesOutputFile.Close()
28+
}
29+
30+
if inputArgs.BadProxiesPath != "" {
31+
args.BadProxiesOutputFile = OpenFile(inputArgs.BadProxiesPath)
32+
defer args.BadProxiesOutputFile.Close()
33+
}
34+
35+
args.WebsiteToCrawl = inputArgs.Website
36+
args.ProxyList = FileToStringSlice(inputArgs.ProxyList)
37+
args.TimeoutProxy = time.Duration(inputArgs.TimeoutProxy) * time.Second
38+
39+
cfprxchecker.CheckProxiesAgainstCloudFlare(&args)
40+
}
41+
42+
func OpenFile(filePath string) *os.File {
43+
f, err := os.OpenFile(filePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, os.ModeAppend|0660)
44+
if err != nil {
45+
log.Fatalf("[OpenFile] error while opening file %s", err)
46+
}
47+
48+
return f
49+
}
50+
51+
func FileToStringSlice(filePath string) []string {
52+
var (
53+
fileTextLines []string
54+
)
55+
readFile, err := os.Open(filePath)
56+
57+
if err != nil {
58+
log.Fatalf("failed to open file: %s", err)
59+
}
60+
61+
defer readFile.Close()
62+
63+
fileScanner := bufio.NewScanner(readFile)
64+
fileScanner.Split(bufio.ScanLines)
65+
66+
for fileScanner.Scan() {
67+
newLine := fileScanner.Text()
68+
if strings.Trim(newLine, "\n\t\r") != "" {
69+
fileTextLines = append(fileTextLines, fileScanner.Text())
70+
}
71+
}
72+
73+
return fileTextLines
74+
75+
}

0 commit comments

Comments
 (0)