|
5 | 5 | import java.net.URI;
|
6 | 6 | import java.net.URISyntaxException;
|
7 | 7 | import java.net.URL;
|
| 8 | +import java.nio.file.Files; |
| 9 | +import java.nio.file.Paths; |
8 | 10 | import java.util.ArrayList;
|
9 | 11 | import java.util.List;
|
10 | 12 | import java.util.regex.Matcher;
|
@@ -61,14 +63,17 @@ protected Document getFirstPage() throws IOException {
|
61 | 63 | if (!notHome) {
|
62 | 64 | StringBuilder newPath = new StringBuilder(path);
|
63 | 65 | newPath.insert(2, "M");
|
64 |
| - firstURL = new URL(this.url, "https://" + DOMAIN + newPath); |
| 66 | + firstURL = URI.create("https://" + DOMAIN + newPath).toURL(); |
65 | 67 | LOGGER.info("Changed URL to " + firstURL);
|
66 | 68 | }
|
67 | 69 | return Http.url(firstURL).referrer("https://motherless.com").get();
|
68 | 70 | }
|
69 | 71 |
|
70 | 72 | @Override
|
71 | 73 | public Document getNextPage(Document doc) throws IOException, URISyntaxException {
|
| 74 | + |
| 75 | + Files.write(Paths.get("doc-next-page.txt"), doc.outerHtml().getBytes()); |
| 76 | + |
72 | 77 | Elements nextPageLink = doc.head().select("link[rel=next]");
|
73 | 78 | if (nextPageLink.isEmpty()) {
|
74 | 79 | throw new IOException("Last page reached");
|
@@ -111,7 +116,7 @@ protected List<String> getURLsFromPage(Document page) {
|
111 | 116 | @Override
|
112 | 117 | protected void downloadURL(URL url, int index) {
|
113 | 118 | // Create thread for finding image at "url" page
|
114 |
| - MotherlessImageThread mit = new MotherlessImageThread(url, index); |
| 119 | + MotherlessImageRunnable mit = new MotherlessImageRunnable(url, index); |
115 | 120 | motherlessThreadPool.addThread(mit);
|
116 | 121 | try {
|
117 | 122 | Thread.sleep(IMAGE_SLEEP_TIME);
|
@@ -150,15 +155,19 @@ public String getGID(URL url) throws MalformedURLException {
|
150 | 155 | throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
|
151 | 156 | }
|
152 | 157 |
|
153 |
| - |
| 158 | + @Override |
| 159 | + protected DownloadThreadPool getThreadPool() { |
| 160 | + return motherlessThreadPool; |
| 161 | + } |
| 162 | + |
154 | 163 | /**
|
155 | 164 | * Helper class to find and download images found on "image" pages
|
156 | 165 | */
|
157 |
| - private class MotherlessImageThread implements Runnable { |
| 166 | + private class MotherlessImageRunnable implements Runnable { |
158 | 167 | private final URL url;
|
159 | 168 | private final int index;
|
160 | 169 |
|
161 |
| - MotherlessImageThread(URL url, int index) { |
| 170 | + MotherlessImageRunnable(URL url, int index) { |
162 | 171 | super();
|
163 | 172 | this.url = url;
|
164 | 173 | this.index = index;
|
|
0 commit comments