Skip to content

Commit

Permalink
update user-agent, change parser from 'html.parser' to 'lxml' parser,…
Browse files Browse the repository at this point in the history
… and add new dependency on setup
  • Loading branch information
yoshikuniii committed Feb 24, 2023
1 parent 2e81342 commit 892627d
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 8 deletions.
3 changes: 3 additions & 0 deletions pynimeapi/classes/datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ def __init__(self, title: str, category_url: str):
self.title = title
self.category_url = category_url

def __str__(self):
return f"title: {self.title} | category_url: {self.category_url}"

class AnimeDetailsObj:
def __init__(
self, season: str, title: str,
Expand Down
3 changes: 2 additions & 1 deletion pynimeapi/downloader/http_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ class HTTPDownloader:
def __init__(self):
self.chunksize = 16384
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0"
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0",
"Connection": "Keep-Alive",
}

def remove_forbiden_string(self, input_string):
Expand Down
6 changes: 3 additions & 3 deletions pynimeapi/pynime.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def get_anime_details(self, anime_category_url: str) -> AnimeDetailsObj:
'''
try:
detail_page = requests.get(anime_category_url)
soup = BeautifulSoup(detail_page.text, "html.parser")
soup = BeautifulSoup(detail_page.text, "lxml")
info_body = soup.find("div", {"class": "anime_info_body_bg"})
image_url = info_body.find("img")["src"]
other_info = info_body.find_all("p", {"class": "type"})
Expand All @@ -70,7 +70,7 @@ def get_anime_details(self, anime_category_url: str) -> AnimeDetailsObj:
synopsis = other_info[1].text.replace("\n", "")
genres = [
x["title"]
for x in BeautifulSoup(str(other_info[2]), "html.parser").find_all("a")
for x in BeautifulSoup(str(other_info[2]), "lxml").find_all("a")
]
released = other_info[3].text.replace("Released: ", "")
status = other_info[4].text.replace("\n", "").replace("Status: ", "")
Expand Down Expand Up @@ -104,7 +104,7 @@ def get_episode_urls(self, anime_category_url: str) -> list:
res = requests.get("https://ajax.gogo-load.com/ajax/load-list-episode",
params={"ep_start": 0, "ep_end": 9999, "id": anime_id}, )

soup = BeautifulSoup(res.content, "html.parser")
soup = BeautifulSoup(res.content, "lxml")
eps_urls = soup.find_all("a")

# Append found links to list
Expand Down
3 changes: 3 additions & 0 deletions pynimeapi/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from pynimeapi.classes.datatype import *
from pynimeapi.classes.color import bcolors

# This class is not part from GoGoAnime website.
# Instead, this use GraphQL API from Anilist

class GetSchedule:
def __init__(self):
# anilist API backend URL
Expand Down
4 changes: 2 additions & 2 deletions pynimeapi/streaming/url_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ def aes_decrypt(self, data, key):

def get_data(self, embed_url):
r = self.session.get(embed_url)
soup = BeautifulSoup(r.content, "html.parser")
soup = BeautifulSoup(r.content, "lxml")
crypto = soup.find("script", {"data-name": "episode"})
return crypto["data-value"]

def stream_url(self):
r = self.session.get(self.episode_link)
soup = BeautifulSoup(r.content, "html.parser")
soup = BeautifulSoup(r.content, "lxml")
link = soup.find("a", {"class": "active", "rel": "1"})
embed_url = f'https:{link["data-video"]}'

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages

VERSION = '0.1.42'
VERSION = '0.1.43'
DESCRIPTION = 'Yet simple API wrapper for GoGoAnime'
LONG_DESCRIPTION = 'PyNime is a (simple) straightforward Python3 script to scrape GoGoAnime using Python. The project is a work in progress, not finished yet. But, the code works well, feel free to take part of the code.'

Expand All @@ -15,7 +15,7 @@
long_description=LONG_DESCRIPTION,
long_description_content_type="text/markdown",
packages=find_packages(),
install_requires=['beautifulsoup4', 'requests', 'pycryptodome', 'm3u8'],
install_requires=['beautifulsoup4', 'requests', 'pycryptodome', 'm3u8', 'lxml'],

keywords=['python', 'downloader', 'anime', 'webscrapping', 'beautifulsoup4', 'gogoanime', 'gogoanime-scraper'],
classifiers= [
Expand Down

0 comments on commit 892627d

Please sign in to comment.