Skip to content

Commit

Permalink
Added scraping an entire series [#1]
Browse files Browse the repository at this point in the history
  • Loading branch information
mcpierce committed Dec 15, 2024
1 parent 2e46e6a commit 817bf3c
Show file tree
Hide file tree
Showing 12 changed files with 413 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@
import lombok.extern.log4j.Log4j2;
import org.comixedproject.metadata.MetadataException;
import org.comixedproject.metadata.adaptors.AbstractMetadataAdaptor;
import org.comixedproject.metadata.marvel.actions.AbstractMarvelScrapingAction;
import org.comixedproject.metadata.marvel.actions.MarvelGetIssueAction;
import org.comixedproject.metadata.marvel.actions.MarvelGetIssueDetailsAction;
import org.comixedproject.metadata.marvel.actions.MarvelGetVolumesAction;
import org.comixedproject.metadata.marvel.actions.*;
import org.comixedproject.metadata.model.IssueDetailsMetadata;
import org.comixedproject.metadata.model.IssueMetadata;
import org.comixedproject.metadata.model.VolumeMetadata;
Expand All @@ -54,6 +51,7 @@ public class MarvelMetadataAdaptor extends AbstractMetadataAdaptor {
MarvelGetVolumesAction getVolumesAction = new MarvelGetVolumesAction();
MarvelGetIssueAction getIssueAction = new MarvelGetIssueAction();
MarvelGetIssueDetailsAction getIssueDetailsAction = new MarvelGetIssueDetailsAction();
MarvelGetAllIssuesAction getAllIssuesAction = new MarvelGetAllIssuesAction();

public MarvelMetadataAdaptor() {
super("ComiXed Marvel Scraper", PROVIDER_NAME);
Expand All @@ -80,33 +78,42 @@ public List<VolumeMetadata> getVolumes(
final String seriesName, final Integer maxRecords, final MetadataSource metadataSource)
throws MetadataException {
log.debug("Fetching volumes from Marvel: name={}", seriesName);

getVolumesAction.setSeries(seriesName);
getVolumesAction.setMaxRecords(maxRecords);

doSetCommonProperties(getVolumesAction, metadataSource);

log.debug("Executing action");
log.debug("Getting all volumes");
return getVolumesAction.execute();
}

@Override
public List<IssueDetailsMetadata> getAllIssues(
final String volume, final MetadataSource metadataSource) throws MetadataException {
return List.of();
final String seriesId, final MetadataSource metadataSource) throws MetadataException {
log.debug("Setting series id: {}", seriesId);
this.getAllIssuesAction.setSeriesId(seriesId);

doSetCommonProperties(getAllIssuesAction, metadataSource);

log.debug("Getting all issues for series");
return this.getAllIssuesAction.execute();
}

@Override
public IssueDetailsMetadata getIssueDetails(
final String issueId, final MetadataSource metadataSource) throws MetadataException {
log.debug("Setting issue id: {}:", issueId);
this.getIssueDetailsAction.setComicId(issueId);

this.doSetCommonProperties(this.getIssueDetailsAction, metadataSource);

log.debug("Getting issue details");
return this.getIssueDetailsAction.execute();
}

@Override
public String getReferenceId(final String webAddress) {

log.debug("Parsing web addresss: {}", webAddress);
final Pattern pattern = Pattern.compile(REFERENCE_ID_PATTERN);
final Matcher matches = pattern.matcher(webAddress);
String referenceId = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import lombok.Getter;
import lombok.Setter;
Expand Down Expand Up @@ -87,13 +88,24 @@ protected String doCreateUrl(final String path, final String parameters) {
.getHashForRequest(timestamp, publicKey, privateKey));
}

protected Date doConverDate(final Optional<MarvelDate> date) {
if (date.isEmpty()) return null;
try {
return dateFormat.parse(date.get().getDate());
} catch (ParseException error) {
log.error("Failed to parse date", error);
return null;
protected Date getCoverDate(final List<MarvelDate> dates) {
return this.findDate("focDate", dates);
}

protected Date getStoreDate(final List<MarvelDate> dates) {
return this.findDate("onsaleDate", dates);
}

private Date findDate(final String dateType, final List<MarvelDate> dates) {
final Optional<MarvelDate> result =
dates.stream().filter(entry -> entry.getType().equals(dateType)).findFirst();
if (result.isPresent()) {
try {
return dateFormat.parse(result.get().getDate());
} catch (ParseException error) {
log.error("Failed to parse date", error);
}
}
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* ComiXed - A digital comic book library management application.
* Copyright (C) 2024, The ComiXed Project
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses>
*/

package org.comixedproject.metadata.marvel.actions;

import static org.comixedproject.metadata.marvel.MarvelMetadataAdaptor.PUBLISHER_NAME;

import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.comixedproject.metadata.MetadataException;
import org.comixedproject.metadata.marvel.models.MarvelGetAllIssuesQueryResponse;
import org.comixedproject.metadata.model.IssueDetailsMetadata;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.publisher.Mono;

/**
* <code>MarvelGetAllIssuesAction</code> retrieves the metadata for all comics for a given volume.
*
* @author Darryl L. Pierce
*/
@Log4j2
public class MarvelGetAllIssuesAction
extends AbstractMarvelScrapingAction<List<IssueDetailsMetadata>> {
// URL:
// https://gateway.marvel.com:443/v1/public/series/2069/comics?noVariants=true&apikey=763df8a7c3c0f6d3bb7fcf088bbf6ee1

@Getter @Setter private String seriesId;

@Override
public List<IssueDetailsMetadata> execute() throws MetadataException {
this.doCheckSetup();

if (StringUtils.isBlank(this.seriesId)) throw new MetadataException("Missing series id");

final List<IssueDetailsMetadata> result = new ArrayList<>();
boolean done = false;

while (!done) {
log.trace("Generating request URL: series id={}", this.seriesId);
final String url =
this.doCreateUrl(String.format("series/%s/comics", this.seriesId), "noVariants=true");
final WebClient client = this.createWebClient(url);
final Mono<MarvelGetAllIssuesQueryResponse> request =
client.get().uri(url).retrieve().bodyToMono(MarvelGetAllIssuesQueryResponse.class);
MarvelGetAllIssuesQueryResponse response = null;

try {
response = request.block();
} catch (Exception error) {
throw new MetadataException("Failed to get response", error);
}

if (response == null) {
throw new MetadataException("Failed to receive response");
}

log.debug("Received: {} volume(s)", response.getData().getResults().size());
response
.getData()
.getResults()
.forEach(
issue -> {
log.trace("Processing volume record: {} name={}", issue.getId(), issue.getTitle());
final IssueDetailsMetadata entry = new IssueDetailsMetadata();
entry.setSourceId(issue.getId());
entry.setPublisher(PUBLISHER_NAME);
entry.setSeries(issue.getSeries().getName());
entry.setIssueNumber(issue.getIssueNumber());
// TODO get the start year
// entry.setVolume(issue.getStartYear());
entry.setVolume("");
entry.setTitle(issue.getTitle());
entry.setCoverDate(this.getCoverDate(issue.getDates()));
entry.setStoreDate(this.getStoreDate(issue.getDates()));
result.add(entry);
});
done = isDone(response);
}

log.debug("Returning {} volume(s)", result.size());
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,8 @@ public List<IssueMetadata> execute() throws MetadataException {
metadata.setVolumeName(issue.getSeries().getName());
metadata.setIssueNumber(issue.getIssueNumber());
metadata.setDescription(issue.getDescription());
metadata.setCoverDate(
this.doConverDate(
issue.getDates().stream()
.filter(date -> date.getType().equals("onsaleDate"))
.findFirst()));
metadata.setCoverDate(
this.doConverDate(
issue.getDates().stream()
.filter(date -> date.getType().equals("onsaleDate"))
.findFirst()));
metadata.setCoverDate(this.getCoverDate(issue.getDates()));
metadata.setStoreDate(this.getStoreDate(issue.getDates()));
metadata.setCoverUrl(
String.format(
"%s.%s",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,13 @@

package org.comixedproject.metadata.marvel.actions;

import java.text.ParseException;
import java.util.Optional;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.comixedproject.metadata.MetadataException;
import org.comixedproject.metadata.marvel.MarvelMetadataAdaptor;
import org.comixedproject.metadata.marvel.models.MarvelDate;
import org.comixedproject.metadata.marvel.models.MarvelGetIssueQueryResponse;
import org.comixedproject.metadata.marvel.models.MarvelGetIssueRecord;
import org.comixedproject.metadata.marvel.models.MarvelUrl;
Expand Down Expand Up @@ -81,27 +79,8 @@ public IssueDetailsMetadata execute() throws MetadataException {
result.setSeries(detail.getSeries().getName());
// TODO where to get the volume result.setVolume(?);
result.setIssueNumber(detail.getIssueNumber());

Optional<MarvelDate> date =
detail.getDates().stream().filter(entry -> entry.getType().equals("focDate")).findFirst();
if (date.isEmpty()) {
try {
result.setCoverDate(dateFormat.parse(date.get().getDate()));
} catch (ParseException error) {
log.error("Failed to parse cover date", error);
}
}
date =
detail.getDates().stream()
.filter(entry -> entry.getType().equals("onsaleDate"))
.findFirst();
if (date.isPresent()) {
try {
result.setStoreDate(dateFormat.parse(date.get().getDate()));
} catch (ParseException error) {
log.error("Failed to parse store date", error);
}
}
result.setCoverDate(this.getCoverDate(detail.getDates()));
result.setStoreDate(this.getStoreDate(detail.getDates()));
result.setDescription(detail.getDescription());
Optional<MarvelUrl> address =
detail.getUrls().stream().filter(entry -> entry.getType().equals(("detail"))).findFirst();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* ComiXed - A digital comic book library management application.
* Copyright (C) 2024, The ComiXed Project
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses>
*/

package org.comixedproject.metadata.marvel.models;

/**
* <code>MarvelGetAllIssuesQueryResponse</code> represents the response body when retrieving all
* issues for a series.
*
* @author Darryl L. Pierce
*/
public class MarvelGetAllIssuesQueryResponse
extends BaseMarvelResponse<MarvelSeriesIssueDetailRecord> {}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public class MarvelIssueDetailRecord {
@JsonProperty("creators")
@Getter
private List<MarvelCreditEntry> creators;

/* TODO the following need fleshing out
@JsonProperty("characters") @Getter private List<?> characters;
@JsonProperty("stories") @Getter private List<?> stories;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* ComiXed - A digital comic book library management application.
* Copyright (C) 2024, The ComiXed Project
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses>
*/

package org.comixedproject.metadata.marvel.models;

import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.List;
import lombok.Getter;

/**
* <code>MarvelSeriesIssueCreators</code> represents all credits for a single issue when scraping a
* series.
*
* @author Darryl L. Pierc
*/
public class MarvelSeriesIssueCreators {
@JsonProperty("available")
@Getter
private int available;

@JsonProperty("collectionURI")
@Getter
private String collectionURI;

@JsonProperty("items")
@Getter
private List<MarvelCreditEntry> items;
}
Loading

0 comments on commit 817bf3c

Please sign in to comment.