Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle orm-rt cookie issue #152

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
.vscode/
.venv/
.env/
venv

Books/

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
lxml>=4.1.1
requests>=2.20.0
requests>=2.22.0

105 changes: 43 additions & 62 deletions safaribooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
import argparse
import requests
import traceback
import re
from http.cookies import SimpleCookie
from lxml import html, etree
from html import escape
from random import random
from multiprocessing import Process, Queue, Value
from urllib.parse import urljoin, urlsplit, urlparse
from urllib.parse import urljoin, urlparse, parse_qs, quote_plus


PATH = os.path.dirname(os.path.realpath(__file__))
Expand All @@ -27,6 +29,7 @@
ORLY_BASE_URL = "https://www." + ORLY_BASE_HOST
SAFARI_BASE_URL = "https://" + SAFARI_BASE_HOST
API_ORIGIN_URL = "https://" + API_ORIGIN_HOST
PROFILE_URL = SAFARI_BASE_URL + "/profile/"


class Display:
Expand Down Expand Up @@ -216,20 +219,6 @@ class SafariBooks:

API_TEMPLATE = SAFARI_BASE_URL + "/api/v1/book/{0}/"

HEADERS = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"accept-encoding": "gzip, deflate",
"accept-language": "it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7",
"cache-control": "no-cache",
"cookie": "",
"pragma": "no-cache",
"origin": SAFARI_BASE_URL,
"referer": LOGIN_ENTRY_URL,
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/60.0.3112.113 Safari/537.36"
}

BASE_01_HTML = "<!DOCTYPE html>\n" \
"<html lang=\"en\" xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\"" \
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"" \
Expand Down Expand Up @@ -299,26 +288,30 @@ class SafariBooks:
"<navMap>{4}</navMap>\n" \
"</ncx>"

COOKIE_FLOAT_MAX_AGE_PATTERN = re.compile(r'(max-age=\d*)\.\d*', re.IGNORECASE)

def __init__(self, args):
self.args = args
self.display = Display("info_%s.log" % escape(args.bookid))
self.display.intro()

self.cookies = {}
self.session = requests.Session()
self.jwt = {}

if not args.cred:
if not os.path.isfile(COOKIES_FILE):
self.display.exit("Login: unable to find cookies file.\n"
" Please use the `--cred` or `--login` options to perform the login.")

self.cookies = json.load(open(COOKIES_FILE))
self.session.cookies.update(json.load(open(COOKIES_FILE)))

else:
self.display.info("Logging into Safari Books Online...", state=True)
self.do_login(*args.cred)
if not args.no_cookies:
json.dump(self.cookies, open(COOKIES_FILE, "w"))
json.dump(self.session.cookies.get_dict(), open(COOKIES_FILE, 'w'))

self.check_login()

self.book_id = args.bookid
self.api_url = self.API_TEMPLATE.format(self.book_id)
Expand Down Expand Up @@ -386,44 +379,30 @@ def __init__(self, args):
self.create_epub()

if not args.no_cookies:
json.dump(self.cookies, open(COOKIES_FILE, "w"))
json.dump(self.session.cookies.get_dict(), open(COOKIES_FILE, "w"))

self.display.done(os.path.join(self.BOOK_PATH, self.book_id + ".epub"))
self.display.unregister()

if not self.display.in_error and not args.log:
os.remove(self.display.log_file)

def return_cookies(self):
return " ".join(["{0}={1};".format(k, v) for k, v in self.cookies.items()])
def update_cookie_jar_with_float_max_age_cookies(self, set_cookie_headers):
for morsel in set_cookie_headers:
morsel_without_float_max_age = self.COOKIE_FLOAT_MAX_AGE_PATTERN.sub(r'\1', morsel)
if morsel_without_float_max_age != morsel:
for name, parsed_morsel in SimpleCookie(morsel_without_float_max_age).items():
self.session.cookies.set(name, parsed_morsel)

def return_headers(self, url):
if ORLY_BASE_HOST in urlsplit(url).netloc:
self.HEADERS["cookie"] = self.return_cookies()

else:
self.HEADERS["cookie"] = ""

return self.HEADERS

def update_cookies(self, jar):
for cookie in jar:
if cookie.name != 'sessionid': # TODO
self.cookies.update({
cookie.name: cookie.value
})

def requests_provider(
self, url, post=False, data=None, perfom_redirect=True, update_cookies=True, update_referer=True, **kwargs
):
def requests_provider(self, url, is_post=False, data=None, perform_redirect=True, **kwargs):
try:
response = getattr(requests, "post" if post else "get")(
response = getattr(self.session, "post" if is_post else "get")(
url,
headers=self.return_headers(url),
data=data,
allow_redirects=False,
**kwargs
)
self.update_cookie_jar_with_float_max_age_cookies(response.raw.headers.getlist("Set-Cookie"))

self.display.last_request = (
url, data, kwargs, response.status_code, "\n".join(
Expand All @@ -435,16 +414,8 @@ def requests_provider(
self.display.error(str(request_exception))
return 0

if update_cookies:
self.update_cookies(response.cookies)

if update_referer:
# TODO Update Referer HTTP Header
# TODO How about Origin?
self.HEADERS["referer"] = response.request.url

if response.is_redirect and perfom_redirect:
return self.requests_provider(response.next.url, post, None, perfom_redirect, update_cookies, update_referer)
if response.is_redirect and perform_redirect:
return self.requests_provider(response.next.url, is_post, None, perform_redirect)
# TODO How about **kwargs?

return response
Expand All @@ -468,19 +439,18 @@ def do_login(self, email, password):
if response == 0:
self.display.exit("Login: unable to reach Safari Books Online. Try again...")

redirect_uri = response.request.path_url[response.request.path_url.index("redirect_uri"):] # TODO try...catch
redirect_uri = redirect_uri[:redirect_uri.index("&")]
redirect_uri = "https://api.oreilly.com%2Fapi%2Fv1%2Fauth%2Fopenid%2Fauthorize%3F" + redirect_uri
next_parameter = parse_qs(urlparse(response.request.url).query)['next'][0]
redirect_uri = f'{API_ORIGIN_URL}{quote_plus(next_parameter)}'

response = self.requests_provider(
self.LOGIN_URL,
post=True,
is_post=True,
json={
"email": email,
"password": password,
"redirect_uri": redirect_uri
},
perfom_redirect=False
perform_redirect=False
)

if response == 0:
Expand Down Expand Up @@ -509,6 +479,19 @@ def do_login(self, email, password):
if response == 0:
self.display.exit("Login: unable to reach Safari Books Online. Try again...")


def check_login(self):
response = self.requests_provider(PROFILE_URL, perform_redirect=False)

if response == 0:
self.display.exit("Login: unable to reach Safari Books Online. Try again...")

if response.status_code != 200:
self.display.exit("Authentication issue: unable to access profile page.")

self.display.info("Successfully authenticated.", state=True)


def get_book_info(self):
response = self.requests_provider(self.api_url)
if response == 0:
Expand Down Expand Up @@ -548,7 +531,7 @@ def get_book_chapters(self, page=1):
return result + (self.get_book_chapters(page + 1) if response["next"] else [])

def get_default_cover(self):
response = self.requests_provider(self.book_info["cover"], update_cookies=False, stream=True)
response = self.requests_provider(self.book_info["cover"], stream=True)
if response == 0:
self.display.error("Error trying to retrieve the cover: %s" % self.book_info["cover"])
return False
Expand Down Expand Up @@ -815,7 +798,7 @@ def _thread_download_css(self, url):
self.display.css_ad_info.value = 1

else:
response = self.requests_provider(url, update_cookies=False)
response = self.requests_provider(url)
if response == 0:
self.display.error("Error trying to retrieve this CSS: %s\n From: %s" % (css_file, url))

Expand All @@ -838,9 +821,7 @@ def _thread_download_images(self, url):
self.display.images_ad_info.value = 1

else:
response = self.requests_provider(urljoin(SAFARI_BASE_URL, url),
update_cookies=False,
stream=True)
response = self.requests_provider(urljoin(SAFARI_BASE_URL, url), stream=True)
if response == 0:
self.display.error("Error trying to retrieve this image: %s\n From: %s" % (image_name, url))

Expand Down