From c59051759f33c5934e96b7448d223116b16cfafe Mon Sep 17 00:00:00 2001
From: ddungiii <ggcc503@gmail.com>
Date: Tue, 13 Feb 2024 14:30:16 +0000
Subject: [PATCH 1/3] fix(crawler): add login fail handling

---
 apps/core/management/scripts/portal_crawler.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/apps/core/management/scripts/portal_crawler.py b/apps/core/management/scripts/portal_crawler.py
index dd527a1b..6a5f5059 100644
--- a/apps/core/management/scripts/portal_crawler.py
+++ b/apps/core/management/scripts/portal_crawler.py
@@ -208,11 +208,21 @@ def _get_board_today(page_num):
         linklist = []
         links = soup.select("table > tbody > tr > td > a")
         dates = soup.select("table > tbody > tr > td:nth-child(5)")
+        total = soup.select("div > ul > li > em")[0].get_text()
 
-        if links:
-            log.info("------- portal login success!")
-        else:
-            log.info("------- portal login failed!")
+        if not links:
+            log.error("------- portal login failed!")
+            raise RuntimeError("portal login failed!")
+
+        if int(total) < 10_000:
+            """
+            If the total number of response articles is small,
+            all responses are public. (LOGIN FAILED)
+            """
+            log.error("------- portal login cookie failed!")
+            raise RuntimeError(f"portal login cookie {COOKIES} failed!")
+
+        log.info("------- portal login success!")
 
         today_date = str(day).replace("-", ".")
         for link, date in zip(links, dates):

From 7da3204ea4a60c3db0a19a1920ee6dd3daca523e Mon Sep 17 00:00:00 2001
From: ddungiii <ggcc503@gmail.com>
Date: Tue, 13 Feb 2024 14:37:18 +0000
Subject: [PATCH 2/3] fix(crawler): fix bulk_create does not retreive ids

---
 apps/core/management/scripts/portal_crawler.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/apps/core/management/scripts/portal_crawler.py b/apps/core/management/scripts/portal_crawler.py
index 6a5f5059..23a3c608 100644
--- a/apps/core/management/scripts/portal_crawler.py
+++ b/apps/core/management/scripts/portal_crawler.py
@@ -326,7 +326,14 @@ def _get_board_today(page_num):
         last_portal_article_in_db.save()
         new_articles.pop()
 
-    created_articles = Article.objects.bulk_create(new_articles)
+    # @NOTE
+    # MySQL's bulk_create method does not return IDs. However, PortalViewCount requires the IDs of the created articles.
+    # Therefore, insert one article at a time and retrieve their IDs.
+    # Reference: https://docs.djangoproject.com/en/5.0/ref/models/querysets/#bulk-create
+    created_articles = []
+    for new_article in new_articles:
+        new_article.save()
+        created_articles.append(new_article)
 
     new_portal_view_counts = []
 

From 78700c5f550e1454d01ed85b01a9280925da6b7f Mon Sep 17 00:00:00 2001
From: ddungiii <ggcc503@gmail.com>
Date: Tue, 13 Feb 2024 15:29:34 +0000
Subject: [PATCH 3/3] fix(crawler): extract raw HTML content instead of
 prettier

---
 apps/core/management/scripts/portal_crawler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/core/management/scripts/portal_crawler.py b/apps/core/management/scripts/portal_crawler.py
index 23a3c608..0a0c1646 100644
--- a/apps/core/management/scripts/portal_crawler.py
+++ b/apps/core/management/scripts/portal_crawler.py
@@ -163,13 +163,13 @@ def _save_portal_image(html, session):
 
     for tr in trs:
         if len(list(tr.children)) == 3:
-            html = tr.find("td").prettify()
+            html = str(tr.find("td"))
             break
 
     if html is None:
         for tr in trs:
             if len(list(tr.children)) == 2:
-                html = tr.find("td").prettify()
+                html = str(tr.find("td"))
                 break
 
     html = _save_portal_image(html, session)