From 4e3619ad134fa36ce34c053be00bcd63019f81e2 Mon Sep 17 00:00:00 2001
From: MetaPrime <wheninthedark@gmail.com>
Date: Thu, 2 Jan 2025 00:00:25 -0800
Subject: [PATCH 1/6] Fix usage of deprecated URL constructors

---
 .../ripme/ripper/AbstractHTMLRipper.java      | 55 +++++++++++--------
 .../ripme/ripper/AbstractJSONRipper.java      |  3 +-
 .../ripme/ripper/rippers/ChanRipper.java      |  5 +-
 .../rippers/video/MotherlessVideoRipper.java  |  8 ++-
 4 files changed, 43 insertions(+), 28 deletions(-)
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
index e7b646e5a..3733fb153 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@@ -28,7 +28,7 @@
  * Simplified ripper, designed for ripping from sites by parsing HTML.
  */
 public abstract class AbstractHTMLRipper extends AbstractRipper {
-    
+
     private final Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<>());
     private final Map<URL, Path> itemsCompleted = Collections.synchronizedMap(new HashMap<>());
     private final Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<>());
@@ -60,11 +60,15 @@ protected Document getCachedFirstPage() throws IOException, URISyntaxException {
     public Document getNextPage(Document doc) throws IOException, URISyntaxException {
         return null;
     }
-    protected abstract List<String> getURLsFromPage(Document page) throws UnsupportedEncodingException;
+
+    protected abstract List<String> getURLsFromPage(Document page) throws UnsupportedEncodingException, URISyntaxException;
+
     protected List<String> getDescriptionsFromPage(Document doc) throws IOException {
         throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
     }
+
     protected abstract void downloadURL(URL url, int index);
+
     protected DownloadThreadPool getThreadPool() {
         return null;
     }
@@ -130,7 +134,7 @@ public void rip() throws IOException, URISyntaxException {
         List<String> doclocation = new ArrayList<>();
 
         LOGGER.info("Got doc location " + doc.location());
-        
+
         while (doc != null) {
 
             LOGGER.info("Processing a doc...");
@@ -167,7 +171,7 @@ public void rip() throws IOException, URISyntaxException {
                 for (String imageURL : imageURLs) {
                     index += 1;
                     LOGGER.debug("Found image url #" + index + ": '" + imageURL + "'");
-                    downloadURL(new URL(imageURL), index);
+                    downloadURL(new URI(imageURL).toURL(), index);
                     if (isStopped() || isThisATest()) {
                         break;
                     }
@@ -182,19 +186,26 @@ public void rip() throws IOException, URISyntaxException {
                         if (isStopped() || isThisATest()) {
                             break;
                         }
+
                         textindex += 1;
                         LOGGER.debug("Getting description from " + textURL);
                         String[] tempDesc = getDescription(textURL,doc);
+
                         if (tempDesc != null) {
-                            if (Utils.getConfigBoolean("file.overwrite", false) || !(new File(
-                                    workingDir.getCanonicalPath()
-                                            + ""
-                                            + File.separator
-                                            + getPrefix(index)
-                                            + (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL)))
-                                            + ".txt").exists())) {
+                            URL url = new URI(textURL).toURL();
+                            String filename = fileNameFromURL(url);
+
+                            boolean fileExists = new File(
+                                workingDir.getCanonicalPath()
+                                        + ""
+                                        + File.separator
+                                        + getPrefix(index)
+                                        + (tempDesc.length > 1 ? tempDesc[1] : filename)
+                                        + ".txt").exists();
+
+                            if (Utils.getConfigBoolean("file.overwrite", false) || !fileExists) {
                                 LOGGER.debug("Got description from " + textURL);
-                                saveText(new URL(textURL), "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL))));
+                                saveText(url, "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : filename));
                                 sleep(descSleepTime());
                             } else {
                                 LOGGER.debug("Description from " + textURL + " already exists.");
@@ -225,12 +236,12 @@ public void rip() throws IOException, URISyntaxException {
         }
         waitForThreads();
     }
-    
+
     /**
      * Gets the file name from the URL
-     * @param url 
+     * @param url
      *      URL that you want to get the filename from
-     * @return 
+     * @return
      *      Filename of the URL
      */
     private String fileNameFromURL(URL url) {
@@ -244,7 +255,7 @@ private String fileNameFromURL(URL url) {
         return saveAs;
     }
     /**
-     * 
+     *
      * @param url
      *      Target URL
      * @param subdirectory
@@ -253,7 +264,7 @@ private String fileNameFromURL(URL url) {
      *      Text you want to save
      * @param index
      *      Index in something like an album
-     * @return 
+     * @return
      *      True if ripped successfully
      *      False if failed
      */
@@ -295,12 +306,12 @@ private boolean saveText(URL url, String subdirectory, String text, int index, S
         }
         return true;
     }
-    
+
     /**
      * Gets prefix based on where in the index it is
-     * @param index 
+     * @param index
      *      The index in question
-     * @return 
+     * @return
      *      Returns prefix for a file. (?)
      */
     protected String getPrefix(int index) {
@@ -313,9 +324,9 @@ protected String getPrefix(int index) {
 
     /*
      * ------ Methods copied from AlbumRipper. ------
-     * This removes AlbumnRipper's usage from this class. 
+     * This removes AlbumnRipper's usage from this class.
      */
-    
+
     protected boolean allowDuplicates() {
         return false;
     }
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java
index 1d8e688a0..8b00cec37 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java
@@ -8,6 +8,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
+import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
@@ -94,7 +95,7 @@ public void rip() throws IOException, URISyntaxException {
                 
                 index += 1;
                 LOGGER.debug("Found image url #" + index+ ": " + imageURL);
-                downloadURL(new URL(imageURL), index);
+                downloadURL(new URI(imageURL).toURL(), index);
             }
 
             if (isStopped() || isThisATest()) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
index f1d41426a..7551d198e 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
@@ -6,6 +6,7 @@
 import com.rarchives.ripme.utils.RipUtils;
 import java.io.IOException;
 import java.net.MalformedURLException;
+import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.ArrayList;
@@ -208,7 +209,7 @@ private boolean isURLBlacklisted(String url) {
         return false;
     }
     @Override
-    public List<String> getURLsFromPage(Document page) {
+    public List<String> getURLsFromPage(Document page) throws URISyntaxException {
         List<String> imageURLs = new ArrayList<>();
         Pattern p; Matcher m;
         for (Element link : page.select("a")) {
@@ -254,7 +255,7 @@ public List<String> getURLsFromPage(Document page) {
                 //Copied code from RedditRipper, getFilesFromURL should also implement stuff like flickr albums
                 URL originalURL;
                 try {
-                    originalURL = new URL(href);
+                    originalURL = new URI(href).toURL();
                 } catch (MalformedURLException e) {
                     continue;
                 }
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/MotherlessVideoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/MotherlessVideoRipper.java
index 6af8840ba..035ab73ef 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/MotherlessVideoRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/MotherlessVideoRipper.java
@@ -2,6 +2,8 @@
 
 import java.io.IOException;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.List;
 import java.util.regex.Matcher;
@@ -51,7 +53,7 @@ public String getGID(URL url) throws MalformedURLException {
     }
 
     @Override
-    public void rip() throws IOException {
+    public void rip() throws IOException, URISyntaxException {
         LOGGER.info("    Retrieving " + this.url);
         String html = Http.url(this.url).get().toString();
         if (html.contains("__fileurl = '")) {
@@ -62,7 +64,7 @@ public void rip() throws IOException {
             throw new IOException("Could not find video URL at " + url);
         }
         String vidUrl = vidUrls.get(0);
-        addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
+        addURLToDownload(new URI(vidUrl).toURL(), HOST + "_" + getGID(this.url));
         waitForThreads();
     }
-}
\ No newline at end of file
+}

From 42efc815df0ddcd38d272e6b1a94c340a2a4f6a7 Mon Sep 17 00:00:00 2001
From: MetaPrime <wheninthedark@gmail.com>
Date: Thu, 2 Jan 2025 01:17:31 -0800
Subject: [PATCH 2/6] Fix issues in new LusciousRipper class

---
 .../com/rarchives/ripme/ripper/rippers/LusciousRipper.java  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
index 5637ed1b6..9a57b06f2 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
@@ -10,6 +10,8 @@
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLEncoder;
 import java.util.ArrayList;
@@ -27,10 +29,10 @@ public LusciousRipper(URL url) throws IOException {
     }
 
     @Override
-    public URL sanitizeURL(URL url) throws MalformedURLException {
+    public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException{
         String URLToReturn = url.toExternalForm();
         URLToReturn = URLToReturn.replaceAll("https?://(?:www\\.)?luscious\\.", "https://old.luscious.");
-        URL san_url = new URL(URLToReturn);
+        URL san_url = new URI(URLToReturn).toURL();
         LOGGER.info("sanitized URL is " + san_url.toExternalForm());
         return san_url;
     }

From df975433494231d80971df44bb19adad5c5a5554 Mon Sep 17 00:00:00 2001
From: MetaPrime <wheninthedark@gmail.com>
Date: Thu, 2 Jan 2025 01:28:57 -0800
Subject: [PATCH 3/6] README: Add note about --info so users can get the most
 out of gradle's test runs

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 6334528cd..50885913c 100644
--- a/README.md
+++ b/README.md
@@ -121,6 +121,7 @@ the following combinations of tags:
 - testSlow runs tests with tag "slow".
 - tests can be run by test class, or single test. Use "testAll" so it does
   not matter if a test is tagged or not.
+- tests can give the full stack of an assertion, exception, or error if you pass `--info` to the command
 
 ```bash
 ./gradlew test
@@ -129,6 +130,7 @@ the following combinations of tags:
 ./gradlew testSlow
 ./gradlew testAll --tests XhamsterRipperTest
 ./gradlew testAll --tests XhamsterRipperTest.testXhamster2Album
+./gradlew testAll --tests ChanRipperTest --info
 ```
 
 Please note that some tests may fail as sites change and our rippers

From 29d46491f9dbda8e8d9cab03662b3a904627f5a1 Mon Sep 17 00:00:00 2001
From: MetaPrime <wheninthedark@gmail.com>
Date: Thu, 2 Jan 2025 01:30:13 -0800
Subject: [PATCH 4/6] Fix an issue with the XvideosRipper found by
 URISyntaxException after refactor

---
 .../java/com/rarchives/ripme/ripper/rippers/XvideosRipper.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XvideosRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XvideosRipper.java
index ea19d484b..6f591d18b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/XvideosRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XvideosRipper.java
@@ -82,7 +82,7 @@ public List<String> getURLsFromPage(Document doc) {
                     String[] lines = e.html().split("\n");
                     for (String line : lines) {
                         if (line.contains("html5player.setVideoUrlHigh")) {
-                            String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
+                            String videoURL = line.strip().replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
                             results.add(videoURL);
                         }
                     }

From b1e3771cc9f26b728b615f5ec6fd9e3300029080 Mon Sep 17 00:00:00 2001
From: MetaPrime <wheninthedark@gmail.com>
Date: Thu, 2 Jan 2025 02:13:20 -0800
Subject: [PATCH 5/6] Change to originalURL parsing resulted in a different
 exception if it's malformed, so handle those and refuse to rip

---
 .../java/com/rarchives/ripme/ripper/rippers/ChanRipper.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
index 7551d198e..c985f1612 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
@@ -256,7 +256,7 @@ public List<String> getURLsFromPage(Document page) throws URISyntaxException {
                 URL originalURL;
                 try {
                     originalURL = new URI(href).toURL();
-                } catch (MalformedURLException e) {
+                } catch (MalformedURLException | URISyntaxException | IllegalArgumentException e) {
                     continue;
                 }
 

From 692430cfcb2a5503c781d84acf2d532c079596eb Mon Sep 17 00:00:00 2001
From: MetaPrime <wheninthedark@gmail.com>
Date: Thu, 2 Jan 2025 02:29:08 -0800
Subject: [PATCH 6/6] Convert space to %20 before adding URL for later
 conversion

---
 .../java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
index ea145aad3..ae9faaedc 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
@@ -56,6 +56,7 @@ public List<String> getURLsFromPage(Document doc) {
         for (Element thumb : thumbs) {
             String link = thumb.attr("src").replaceAll("thumbs/th_", "");
             String imgSrc = "http://nude-gals.com/" + link;
+            imgSrc = imgSrc.replaceAll(" ", "%20");
             imageURLs.add(imgSrc);
         }