Skip to content

Commit

Permalink
allow bots to bypass authorization and go directly to the default site
Browse files Browse the repository at this point in the history
  • Loading branch information
briskt committed Nov 26, 2024
1 parent 4453e2f commit a6d8661
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 16 deletions.
5 changes: 5 additions & 0 deletions local-example.env
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ COOKIE_NAME=_auth_proxy
# Host name is required. Port is optional. Path is ignored.
# For example: "one:server1.org,two:server2.net,three:server3.com:8080"
SITES=
# Site used for proxy when no token is available. Example: "server1.org"
DEFAULT_SITE=
# The URL for the management API. Do not include a path or query string
# For example: https://www.example.com
MANAGEMENT_API=
Expand All @@ -19,3 +21,6 @@ TOKEN_PATH=
TOKEN_SECRET=
# disable robots.txt and X-Robots-Tag handling. Default is "false".
ROBOTS_TXT_DISABLE=false
# List of trusted bots which are directly proxied to a specific site identified by SITE_DEFAULT. Use
# comma-separated user agent keywords. Example: "googlebot,duckduckgo"
TRUSTED_BOTS=
52 changes: 36 additions & 16 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,19 @@ type ProxyClaim struct {
}

type Proxy struct {
DefaultSite string `required:"true" split_words:"true"`
Host string `required:"true"`
TokenSecret string `required:"true" split_words:"true"`
Sites AuthSites `required:"true" split_words:"true"`
ManagementAPI string `required:"true" split_words:"true"`

// optional params
CookieName string `default:"_auth_proxy" split_words:"true"`
ReturnToParam string `default:"returnTo" split_words:"true"`
RobotsTxtDisable bool `default:"false" split_words:"true"`
TokenParam string `default:"token" split_words:"true"`
TokenPath string `default:"/auth/token" split_words:"true"`
CookieName string `split_words:"true" default:"_auth_proxy"`
ReturnToParam string `split_words:"true" default:"returnTo"`
RobotsTxtDisable bool `split_words:"true" default:"false"`
TokenParam string `split_words:"true" default:"token"`
TokenPath string `split_words:"true" default:"/auth/token"`
TrustedBots []string `split_words:"true" default:"googlebot"`

// Secret is the binary token secret. Must be exported to be valid after being passed back from Caddy.
Secret []byte `ignored:"true"`
Expand Down Expand Up @@ -124,6 +126,12 @@ func (p Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.
}

func (p Proxy) handleRequest(w http.ResponseWriter, r *http.Request) error {
if p.isTrustedBot(r) {
upstream := p.DefaultSite
p.setVar(r, CaddyVarUpstream, upstream)
return nil
}

queryToken := p.getTokenFromQueryString(r)
queryClaim := p.getClaimFromToken(queryToken)
cookieToken := p.getTokenFromCookie(r)
Expand Down Expand Up @@ -176,10 +184,7 @@ func (p Proxy) handleRequest(w http.ResponseWriter, r *http.Request) error {
return nil
}

upstream, err := p.getSite(claim.Level)
if err != nil {
return err
}
upstream := p.getSite(claim.Level)

p.setVar(r, CaddyVarUpstream, upstream)
return nil
Expand Down Expand Up @@ -215,6 +220,10 @@ func newProxy() (Proxy, error) {
if err != nil {
return p, fmt.Errorf("unable to decode Proxy TokenSecret: %w", err)
}

for i := range p.TrustedBots {
p.TrustedBots[i] = strings.ToLower(p.TrustedBots[i])
}
return p, nil
}

Expand All @@ -230,16 +239,12 @@ func (p Proxy) getTokenFromCookie(r *http.Request) string {
return cookie.Value
}

func (p Proxy) getSite(level string) (string, error) {
func (p Proxy) getSite(level string) string {
upstream, ok := p.Sites[level]
if !ok {
return "", &Error{
err: fmt.Errorf("auth level '%v' not in sites: %v", level, p.Sites),
Message: "error: unrecognized access level",
Status: http.StatusBadRequest,
}
return p.DefaultSite
}
return upstream, nil
return upstream
}

func (p Proxy) clearQueryToken(r *http.Request) {
Expand Down Expand Up @@ -320,6 +325,21 @@ func (p Proxy) getNewToken(_ http.ResponseWriter, r *http.Request) error {
return nil
}

// isTrustedBot compares the user agent in the request against a list of trusted bots in the configuration and
// returns true if the user agent contains one of the configured keywords.
func (p Proxy) isTrustedBot(r *http.Request) bool {
userAgent := strings.ToLower(r.Header.Get("User-Agent"))
if userAgent == "" {
return false
}
for _, s := range p.TrustedBots {
if strings.Contains(userAgent, s) {
return true
}
}
return false
}

func claimsAreValidAndDifferent(a, b ProxyClaim) bool {
return a.IsValid && b.IsValid && !a.IssuedAt.Time.Equal(b.IssuedAt.Time)
}
45 changes: 45 additions & 0 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,48 @@ func makeTestJWT(secret []byte, level string, expires time.Time) string {

return tokenString
}

func TestProxy_isTrustedBot(t *testing.T) {
tests := []struct {
name string
trusted []string
userAgent string
want bool
}{
{
name: "empty user agent",
trusted: []string{"googlebot"},
userAgent: "",
want: false,
},
{
name: "empty trusted list",
trusted: nil,
userAgent: "Googlebot/2.1 (+http://www.googlebot.com/bot.html)",
want: false,
},
{
name: "not in trusted list",
trusted: []string{"googlebot"},
userAgent: "duckduckgo",
want: false,
},
{
name: "in trusted list",
trusted: []string{"duckduckgo", "googlebot"},
userAgent: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
proxy := Proxy{
TrustedBots: tt.trusted,
}
r := httptest.NewRequest(http.MethodGet, "/", nil)
r.Header.Set("User-Agent", tt.userAgent)
assert.Equalf(t, tt.want, proxy.isTrustedBot(r), "user agent '%s', trusted %+v",
r.Header.Get("User-Agent"), tt.trusted)
})
}
}

0 comments on commit a6d8661

Please sign in to comment.