Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Fapwiz Ripper #2086

Merged
merged 3 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 154 additions & 0 deletions src/main/java/com/rarchives/ripme/ripper/rippers/FapwizRipper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package com.rarchives.ripme.ripper.rippers;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;

public class FapwizRipper extends AbstractHTMLRipper {

private static final Logger logger = LogManager.getLogger(FapwizRipper.class);

private static final Pattern CATEGORY_PATTERN = Pattern.compile("https?://fapwiz.com/category/([a-zA-Z0-9_-]+)/?$");

private static final Pattern USER_PATTERN = Pattern.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/?$");

// Note that the last part of the pattern can contain unicode emoji which
// get encoded as %-encoded UTF-8 bytes in the URL, so we allow % characters.
private static final Pattern POST_PATTERN = Pattern
.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/([a-zA-Z0-9_%-]+)/?$");

public FapwizRipper(URL url) throws IOException {
super(url);
}

@Override
public String getHost() {
return "fapwiz";
}

@Override
public String getDomain() {
return "fapwiz.com";
}

@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m;

m = CATEGORY_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "category_" + m.group(1);
}

m = USER_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "user_" + m.group(1);
}

m = POST_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "post_" + m.group(1) + "_" + m.group(2);
}

throw new MalformedURLException("Expected fapwiz URL format: " +
"fapwiz.com/USER or fapwiz.com/USER/POST or " +
"fapwiz.com/CATEGORY - got " + url + " instead");
}

void processUserOrCategoryPage(Document doc, List<String> results) {
// The category page looks a lot like the structure of a user page,
// so processUserPage is written to be compatible with both.
doc.select(".post-items-holder img").forEach(e -> {
String imgSrc = e.attr("src");

// Skip the user profile picture thumbnail insets
if (imgSrc.endsWith("-thumbnail-icon.jpg")) {
return;
}

// Replace -thumbnail.jpg with .mp4
String videoSrc = imgSrc.replace("-thumbnail.jpg", ".mp4");
results.add(videoSrc);
});
}

void processCategoryPage(Document doc, List<String> results) {
logger.info("Processing category page: " + url);
processUserOrCategoryPage(doc, results);
}

void processUserPage(Document doc, List<String> results) {
logger.info("Processing user page: " + url);
processUserOrCategoryPage(doc, results);
}

void processPostPage(Document doc, List<String> results) {
logger.info("Processing post page: " + url);
doc.select("video source").forEach(video -> {
results.add(video.attr("src"));
});
}

@Override
public List<String> getURLsFromPage(Document doc) {
List<String> results = new ArrayList<>();
Matcher m;

m = CATEGORY_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processCategoryPage(doc, results);
}

m = USER_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processUserPage(doc, results);
}

m = POST_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processPostPage(doc, results);
}

return results;
}

private Document getDocument(String url, int retries) throws IOException {
return Http.url(url).userAgent(USER_AGENT).retries(retries).get();
}

private Document getDocument(String url) throws IOException {
return getDocument(url, 1);
}

@Override
public Document getNextPage(Document page) throws IOException {
logger.info("Getting next page for url: " + url);
Elements next = page.select("a.next");
if (!next.isEmpty()) {
String href = next.attr("href");
logger.info("Found next page: " + href);
return getDocument(href);
} else {
logger.info("No more pages");
throw new IOException("No more pages.");
}
}

@Override
public void downloadURL(URL url, int index) {
sleep(2000);
addURLToDownload(url, getPrefix(index));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package com.rarchives.ripme.tst.ripper.rippers;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

import org.jsoup.nodes.Document;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;

import com.rarchives.ripme.ripper.AbstractRipper;
import com.rarchives.ripme.ripper.rippers.FapwizRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;

public class FapwizRipperTest extends RippersTest {
@Test
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
public void testGetNextPage_NoNextPage() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/alison-esha/").toURL();
FapwizRipper ripper = new FapwizRipper(url);

Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
try {
ripper.getNextPage(firstPage);
// If we don't throw, we failed the text because there *was* a next
// page even though there shouldn't be.
Assertions.fail();
} catch (IOException exception) {
Assertions.assertTrue(true);
}
}

@Test
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
public void testGetNextPage_HasNextPage() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/miaipanema/").toURL();
FapwizRipper ripper = new FapwizRipper(url);

Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
try {
Document doc = ripper.getNextPage(firstPage);
Assertions.assertNotNull(doc);
} catch (IOException exception) {
// We should have found a next page but didn't.
Assertions.fail();
}
}

@Test
public void testRipPost() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

@Test
public void testRipPostWithNumbersInUsername1() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/desperate_bug_7776/lets-be-friends-that-secretly-fuck-thanks/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

@Test
public void testRipPostWithEmojiInShortUrl() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/miaipanema/my-grip-needs-a-name-%f0%9f%a4%ad%f0%9f%91%87%f0%9f%8f%bc/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

@Test
public void testRipPostWithEmojiInLongUrlAtEnd() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/bimeat1998/just-imagine-youre-out-with-your-girl-and-your-buddies-and-then-she-makes-this-move-%f0%9f%98%8d/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

@Test
public void testRipPostWithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

// TODO Test rip user

// TODO Test rip category

@Test
public void testPostGetGID1_Simple() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("post_petiteasiantravels_riding-at-9-months-pregnant", ripper.getGID(url));
}

// Test Post pages GetGID

@Test
public void testPostGetGID2_WithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);

// In this case the filesystem safe version of the GID is
// "post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-"
// but the GID doesn't truncate and doesn't remove non-filesystem-safe
// characters.
String gid = ripper.getGID(url);
Assertions.assertEquals(
"post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96",
gid);

// Test directory name on disk (filesystem safe sanitized as the ripper will
// do).
String directoryName = Utils.filesystemSafe(ripper.getHost() + "_" + gid);
Assertions.assertEquals(
"fapwiz_post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-",
directoryName);
}

// Test User pages GetGID

@Test
public void testUserGetGID1_Simple() throws IOException, URISyntaxException {
// Test a "simple" username that is all letters.
URL url = new URI("https://fapwiz.com/petiteasiantravels/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_petiteasiantravels", ripper.getGID(url));
}

@Test
public void testUserGetGID2_Numbers() throws IOException, URISyntaxException {
// Test a more complex username that contains numbers.
URL url = new URI("https://fapwiz.com/bimeat1998/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_bimeat1998", ripper.getGID(url));
}

@Test
public void testUserGetGID3_HyphensAndNumbers() throws IOException, URISyntaxException {
// Test a more complex username that contains hyphens and numbers.
URL url = new URI("https://fapwiz.com/used-airport-4076/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_used-airport-4076", ripper.getGID(url));
}

@Test
public void testUserGetGID4_Underscores() throws IOException, URISyntaxException {
// Test a more complex username that contains underscores.
URL url = new URI("https://fapwiz.com/desperate_bug_7776/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_desperate_bug_7776", ripper.getGID(url));
}
}
Loading