diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index e7b646e5a..3733fb153 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -28,7 +28,7 @@ * Simplified ripper, designed for ripping from sites by parsing HTML. */ public abstract class AbstractHTMLRipper extends AbstractRipper { - + private final Map itemsPending = Collections.synchronizedMap(new HashMap<>()); private final Map itemsCompleted = Collections.synchronizedMap(new HashMap<>()); private final Map itemsErrored = Collections.synchronizedMap(new HashMap<>()); @@ -60,11 +60,15 @@ protected Document getCachedFirstPage() throws IOException, URISyntaxException { public Document getNextPage(Document doc) throws IOException, URISyntaxException { return null; } - protected abstract List getURLsFromPage(Document page) throws UnsupportedEncodingException; + + protected abstract List getURLsFromPage(Document page) throws UnsupportedEncodingException, URISyntaxException; + protected List getDescriptionsFromPage(Document doc) throws IOException { throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function? } + protected abstract void downloadURL(URL url, int index); + protected DownloadThreadPool getThreadPool() { return null; } @@ -130,7 +134,7 @@ public void rip() throws IOException, URISyntaxException { List doclocation = new ArrayList<>(); LOGGER.info("Got doc location " + doc.location()); - + while (doc != null) { LOGGER.info("Processing a doc..."); @@ -167,7 +171,7 @@ public void rip() throws IOException, URISyntaxException { for (String imageURL : imageURLs) { index += 1; LOGGER.debug("Found image url #" + index + ": '" + imageURL + "'"); - downloadURL(new URL(imageURL), index); + downloadURL(new URI(imageURL).toURL(), index); if (isStopped() || isThisATest()) { break; } @@ -182,19 +186,26 @@ public void rip() throws IOException, URISyntaxException { if (isStopped() || isThisATest()) { break; } + textindex += 1; LOGGER.debug("Getting description from " + textURL); String[] tempDesc = getDescription(textURL,doc); + if (tempDesc != null) { - if (Utils.getConfigBoolean("file.overwrite", false) || !(new File( - workingDir.getCanonicalPath() - + "" - + File.separator - + getPrefix(index) - + (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL))) - + ".txt").exists())) { + URL url = new URI(textURL).toURL(); + String filename = fileNameFromURL(url); + + boolean fileExists = new File( + workingDir.getCanonicalPath() + + "" + + File.separator + + getPrefix(index) + + (tempDesc.length > 1 ? tempDesc[1] : filename) + + ".txt").exists(); + + if (Utils.getConfigBoolean("file.overwrite", false) || !fileExists) { LOGGER.debug("Got description from " + textURL); - saveText(new URL(textURL), "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL)))); + saveText(url, "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : filename)); sleep(descSleepTime()); } else { LOGGER.debug("Description from " + textURL + " already exists."); @@ -225,12 +236,12 @@ public void rip() throws IOException, URISyntaxException { } waitForThreads(); } - + /** * Gets the file name from the URL - * @param url + * @param url * URL that you want to get the filename from - * @return + * @return * Filename of the URL */ private String fileNameFromURL(URL url) { @@ -244,7 +255,7 @@ private String fileNameFromURL(URL url) { return saveAs; } /** - * + * * @param url * Target URL * @param subdirectory @@ -253,7 +264,7 @@ private String fileNameFromURL(URL url) { * Text you want to save * @param index * Index in something like an album - * @return + * @return * True if ripped successfully * False if failed */ @@ -295,12 +306,12 @@ private boolean saveText(URL url, String subdirectory, String text, int index, S } return true; } - + /** * Gets prefix based on where in the index it is - * @param index + * @param index * The index in question - * @return + * @return * Returns prefix for a file. (?) */ protected String getPrefix(int index) { @@ -313,9 +324,9 @@ protected String getPrefix(int index) { /* * ------ Methods copied from AlbumRipper. ------ - * This removes AlbumnRipper's usage from this class. + * This removes AlbumnRipper's usage from this class. */ - + protected boolean allowDuplicates() { return false; } diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java index 1d8e688a0..8b00cec37 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java @@ -8,6 +8,7 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; +import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.StandardCharsets; @@ -94,7 +95,7 @@ public void rip() throws IOException, URISyntaxException { index += 1; LOGGER.debug("Found image url #" + index+ ": " + imageURL); - downloadURL(new URL(imageURL), index); + downloadURL(new URI(imageURL).toURL(), index); } if (isStopped() || isThisATest()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index f1d41426a..7551d198e 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -6,6 +6,7 @@ import com.rarchives.ripme.utils.RipUtils; import java.io.IOException; import java.net.MalformedURLException; +import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; @@ -208,7 +209,7 @@ private boolean isURLBlacklisted(String url) { return false; } @Override - public List getURLsFromPage(Document page) { + public List getURLsFromPage(Document page) throws URISyntaxException { List imageURLs = new ArrayList<>(); Pattern p; Matcher m; for (Element link : page.select("a")) { @@ -254,7 +255,7 @@ public List getURLsFromPage(Document page) { //Copied code from RedditRipper, getFilesFromURL should also implement stuff like flickr albums URL originalURL; try { - originalURL = new URL(href); + originalURL = new URI(href).toURL(); } catch (MalformedURLException e) { continue; } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/MotherlessVideoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/MotherlessVideoRipper.java index 6af8840ba..035ab73ef 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/MotherlessVideoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/MotherlessVideoRipper.java @@ -2,6 +2,8 @@ import java.io.IOException; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.util.List; import java.util.regex.Matcher; @@ -51,7 +53,7 @@ public String getGID(URL url) throws MalformedURLException { } @Override - public void rip() throws IOException { + public void rip() throws IOException, URISyntaxException { LOGGER.info(" Retrieving " + this.url); String html = Http.url(this.url).get().toString(); if (html.contains("__fileurl = '")) { @@ -62,7 +64,7 @@ public void rip() throws IOException { throw new IOException("Could not find video URL at " + url); } String vidUrl = vidUrls.get(0); - addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); + addURLToDownload(new URI(vidUrl).toURL(), HOST + "_" + getGID(this.url)); waitForThreads(); } -} \ No newline at end of file +}