update user-agent, change parser from 'html.parser' to 'lxml' parser,…

… and add new dependency on setup
MGunturG · Feb 24, 2023 · 892627d · 892627d
1 parent 2e81342
commit 892627d
Show file tree

Hide file tree

Showing 6 changed files with 15 additions and 8 deletions.
diff --git a/pynimeapi/classes/datatype.py b/pynimeapi/classes/datatype.py
@@ -3,6 +3,9 @@ def __init__(self, title: str, category_url: str):
 		self.title = title
 		self.category_url = category_url
 
+	def __str__(self):
+		return f"title: {self.title} | category_url: {self.category_url}"
+
 class AnimeDetailsObj:
 	def __init__(
 		self, season: str, title: str,

diff --git a/pynimeapi/downloader/http_downloader.py b/pynimeapi/downloader/http_downloader.py
@@ -8,7 +8,8 @@ class HTTPDownloader:
 	def __init__(self):
 		self.chunksize = 16384
 		self.headers = {
-			"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0"
+			"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0",
+			"Connection": "Keep-Alive",
 		}
 
 	def remove_forbiden_string(self, input_string):

diff --git a/pynimeapi/pynime.py b/pynimeapi/pynime.py
@@ -60,7 +60,7 @@ def get_anime_details(self, anime_category_url: str) -> AnimeDetailsObj:
         '''
         try:
             detail_page = requests.get(anime_category_url)
-            soup = BeautifulSoup(detail_page.text, "html.parser")
+            soup = BeautifulSoup(detail_page.text, "lxml")
             info_body = soup.find("div", {"class": "anime_info_body_bg"})
             image_url = info_body.find("img")["src"]
             other_info = info_body.find_all("p", {"class": "type"})
@@ -70,7 +70,7 @@ def get_anime_details(self, anime_category_url: str) -> AnimeDetailsObj:
             synopsis = other_info[1].text.replace("\n", "")
             genres = [
                 x["title"]
-                for x in BeautifulSoup(str(other_info[2]), "html.parser").find_all("a")
+                for x in BeautifulSoup(str(other_info[2]), "lxml").find_all("a")
             ]
             released = other_info[3].text.replace("Released: ", "")
             status = other_info[4].text.replace("\n", "").replace("Status: ", "")
@@ -104,7 +104,7 @@ def get_episode_urls(self, anime_category_url: str) -> list:
             res = requests.get("https://ajax.gogo-load.com/ajax/load-list-episode",
                                params={"ep_start": 0, "ep_end": 9999, "id": anime_id}, )
 
-            soup = BeautifulSoup(res.content, "html.parser")
+            soup = BeautifulSoup(res.content, "lxml")
             eps_urls = soup.find_all("a")
 
             # Append found links to list

diff --git a/pynimeapi/schedule.py b/pynimeapi/schedule.py
@@ -8,6 +8,9 @@
 from pynimeapi.classes.datatype import *
 from pynimeapi.classes.color import bcolors
 
+# This class is not part from GoGoAnime website.
+# Instead, this use GraphQL API from Anilist
+
 class GetSchedule:
 	def __init__(self):
 		# anilist API backend URL

diff --git a/pynimeapi/streaming/url_handler.py b/pynimeapi/streaming/url_handler.py
@@ -56,13 +56,13 @@ def aes_decrypt(self, data, key):
 
 	def get_data(self, embed_url):
 		r = self.session.get(embed_url)
-		soup = BeautifulSoup(r.content, "html.parser")
+		soup = BeautifulSoup(r.content, "lxml")
 		crypto = soup.find("script", {"data-name": "episode"})
 		return crypto["data-value"]
 
 	def stream_url(self):
 		r = self.session.get(self.episode_link)
-		soup = BeautifulSoup(r.content, "html.parser")
+		soup = BeautifulSoup(r.content, "lxml")
 		link = soup.find("a", {"class": "active", "rel": "1"})
 		embed_url = f'https:{link["data-video"]}'
 

diff --git a/setup.py b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-VERSION = '0.1.42' 
+VERSION = '0.1.43' 
 DESCRIPTION = 'Yet simple API wrapper for GoGoAnime'
 LONG_DESCRIPTION = 'PyNime is a (simple) straightforward Python3 script to scrape GoGoAnime using Python. The project is a work in progress, not finished yet. But, the code works well, feel free to take part of the code.'
 
@@ -15,7 +15,7 @@
         long_description=LONG_DESCRIPTION,
         long_description_content_type="text/markdown",
         packages=find_packages(),
-        install_requires=['beautifulsoup4', 'requests', 'pycryptodome', 'm3u8'],
+        install_requires=['beautifulsoup4', 'requests', 'pycryptodome', 'm3u8', 'lxml'],
 
         keywords=['python', 'downloader', 'anime', 'webscrapping', 'beautifulsoup4', 'gogoanime', 'gogoanime-scraper'],
         classifiers= [