Skip to content

Commit

Permalink
Adds a field to the Add Performer form
Browse files Browse the repository at this point in the history
The new field takes an URL, and scrapes a jococruise.com performer page to fill in the form data.

Uses SwiftSoup to do the scraping.
  • Loading branch information
challf committed Dec 18, 2024
1 parent 903939d commit feee63c
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 23 deletions.
19 changes: 14 additions & 5 deletions Package.resolved
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"originHash" : "eb3b3f145a012949abb7ceccce032c45bb3babfff1b407704107d689cca2fa3b",
"originHash" : "2ed775d8528a95e12cbe1c2e0fc86c6513483f021c36686a13e1d78c43a98d14",
"pins" : [
{
"identity" : "async-http-client",
Expand Down Expand Up @@ -195,8 +195,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-async-algorithms.git",
"state" : {
"revision" : "5c8bd186f48c16af0775972700626f0b74588278",
"version" : "1.0.2"
"revision" : "4c3ea81f81f0a25d0470188459c6d4bf20cf2f97",
"version" : "1.0.3"
}
},
{
Expand Down Expand Up @@ -334,13 +334,22 @@
"version" : "1.4.0"
}
},
{
"identity" : "swiftsoup",
"kind" : "remoteSourceControl",
"location" : "https://github.com/scinfu/SwiftSoup.git",
"state" : {
"revision" : "0837db354faf9c9deb710dc597046edaadf5360f",
"version" : "2.7.6"
}
},
{
"identity" : "vapor",
"kind" : "remoteSourceControl",
"location" : "https://github.com/vapor/vapor.git",
"state" : {
"revision" : "ec23f07eb2eda35f6f179a8c4607769287073f8d",
"version" : "4.107.0"
"revision" : "e1002f35edf92e2a579580f2d1df92e01287c6c7",
"version" : "4.108.0"
}
},
{
Expand Down
2 changes: 2 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ let package = Package(
.package(url: "https://github.com/johnsundell/ink.git", from: "0.6.0"),
.package(url: "https://github.com/weichsel/ZIPFoundation.git", .upToNextMajor(from: "0.9.0")),
.package(url: "https://github.com/challfry/CoreXLSX.git", .upToNextMinor(from: "0.14.1")),
.package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.6.0"),
],
targets: [
.systemLibrary(name: "gd", pkgConfig: "gdlib", providers: [.apt(["libgd-dev"]), .brew(["gd"]), .yum(["gd-devel"])]),
Expand All @@ -37,6 +38,7 @@ let package = Package(
.product(name: "Prometheus", package: "swift-prometheus"),
.product(name: "Ink", package: "ink"),
.product(name: "CoreXLSX", package: "CoreXLSX"),
.product(name: "SwiftSoup", package: "SwiftSoup"),
"gd",
"jpeg",
"gdOverrides",
Expand Down
23 changes: 17 additions & 6 deletions Sources/swiftarr/Resources/Assets/js/swiftarr.js
Original file line number Diff line number Diff line change
Expand Up @@ -335,22 +335,33 @@ function updatePhotoCardState(cardElement) {
let imgContainer = cardElement.querySelector('.img-for-upload-container');
let noImgElem = cardElement.querySelector('.no-image-marker');
let fileInputElem = cardElement.querySelector('.image-upload-input');
let hiddenFormElem = cardElement.querySelector('input[type="hidden"]');
let externalPhotoElem = cardElement.querySelector('input[type="hidden"].external-photo-url');
let serverPhotoElem = cardElement.querySelector('input[type="hidden"].current-server-photo');
let imageSwapButton = cardElement.querySelector('.twitarr-image-swap');
let imageRemoveButton = cardElement.querySelector('.twitarr-image-remove');
let imageVisible = true;
if (fileInputElem.files.length > 0) {
imgElem.src = window.URL.createObjectURL(fileInputElem.files[0]);
imgContainer.style.display = "block";
noImgElem.style.display = "none";
hiddenFormElem.value = "";
if (externalPhotoElem) {
externalPhotoElem.value = "";
}
if (serverPhotoElem) {
serverPhotoElem.value = "";
}
}
else if (hiddenFormElem.value) {
if (hiddenFormElem.value.startsWith('/api/v3') || hiddenFormElem.value.startsWith('/avatar')) {
imgElem.src = hiddenFormElem.value;
else if (externalPhotoElem?.value) {
imgElem.src = externalPhotoElem.value;
imgContainer.style.display = "block";
noImgElem.style.display = "none";
}
else if (serverPhotoElem?.value) {
if (serverPhotoElem.value.startsWith('/api/v3') || serverPhotoElem.value.startsWith('/avatar')) {
imgElem.src = serverPhotoElem.value;
}
else {
imgElem.src = "/api/v3/image/thumb/" + hiddenFormElem.value;
imgElem.src = "/api/v3/image/thumb/" + serverPhotoElem.value;
}
imgContainer.style.display = "block";
noImgElem.style.display = "none";
Expand Down
2 changes: 1 addition & 1 deletion Sources/swiftarr/Resources/Views/Fez/singleFez.html
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ <h5 class="card-header">Photo</h5>
<input class="image-upload-input d-none" type="file" accept="#(post.allowedImageTypes)" name="localPhoto1">
Choose Photo
</label>
<input type="hidden" value="" name="serverPhoto1">
<input type="hidden" class="current-server-photo" value="" name="serverPhoto1">
</div>
</div>
<div class="row justify-content-between m-0">
Expand Down
4 changes: 2 additions & 2 deletions Sources/swiftarr/Resources/Views/Performers/addOrganizer.html
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ <h3>to Post Their Bio</h3>
<input class="image-upload-input d-none" type="file" accept="image/png, image/jpeg" name="photo" aria-describedby="photoHelp">
Choose Photo
</label>
<input type="hidden" value="#(performer.header.photo)" name="serverPhoto">
<input type="hidden" id="photo-upload-url" name="photoURL" class="photo-upload-url">
<input type="hidden" class="external-photo-url photo-upload-url" name="photoURL" id="photo-upload-url">
<input type="hidden" class="current-server-photo" name="serverPhoto" value="#(performer.header.photo)">
</div>
</div>
<div class="row justify-content-between m-0">
Expand Down
25 changes: 21 additions & 4 deletions Sources/swiftarr/Resources/Views/Performers/addPerformer.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,28 @@
#if(performer.header.isOfficialPerformer):
<p>This form is for TwitarrTeam and above to manually add a bio page for an official performer. This form does not attach
the performer to any events (that step is done by bulk import of a Sched spreadsheet).</p>
<form action="/admin/performer/add">
<div class="card mb-2">
<div class="card-header bg-primary">
<span class="text-white">
Autofill from Performer's page on jococruise.com:
</span>
</div>
<div class="card-body bg-light">
<input type="text" class="form-control" id="performerurl" name="performerurl" aria-describedby="performerurlhelp" placeholder="jococruise.com URL">
<div id="performerurlhelp" class="form-text">
</div>
<div class="col col-auto">
<button type="submit" class="btn btn-primary">Fill Form<span class="spinner-border spinner-border-sm d-none" role="status" aria-hidden="true"></span></button>
</div>
</div>
</div>
</form>
#else:
<p>This form is for TwitarrTeam and above to edit the bio page for an shadow event organizer. This form does not attach
the performer to any events (Users need to do that themselves).</p>
#endif

<form class="ajax" action="#(formAction)" enctype="multipart/form-data" method="POST" data-successurl="/admin/performer/root">
#if(performer.header.id != nil):
<input type="hidden" name="performerID" id="performerID" value="#(performer.header.id)">
Expand Down Expand Up @@ -54,7 +71,7 @@
<div class="card" style="width: 312px;">
<div class="m-1 border img-for-upload-container">
<div class="ratio ratio-1x1">
<img src="data:," width=300 height=300 class="img-for-upload" alt="">
<img src="#if(performer.header.photo):#(performer.header.photo)#elseif(performerImageURL):#(performerImageURL)#else::data:,#endif" width=300 height=300 class="img-for-upload" alt="">
</div>
</div>
<p class="text-center no-image-marker"><i>no photo selected</i></p>
Expand All @@ -65,8 +82,8 @@
<input class="image-upload-input d-none" type="file" accept="image/png, image/jpeg" name="photo" aria-describedby="photoHelp">
Choose Photo
</label>
<input type="hidden" value="#(performer.header.photo)" name="serverPhoto">
<input type="hidden" id="photo-upload-url" name="photoURL" class="photo-upload-url">
<input type="hidden" class="external-photo-url photo-upload-url" id="photo-upload-url" name="photoURL" value="#(performerImageURL)">
<input type="hidden" class="current-server-photo" name="serverPhoto" value="#(performer.header.photo)">
</div>
</div>
<div class="row justify-content-between m-0">
Expand Down
2 changes: 1 addition & 1 deletion Sources/swiftarr/Resources/Views/User/userProfileEdit.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ <h5 class="card-header">New Avatar</h5>
<input class="image-upload-input" type="file" accept="image/*" name="avatarPhotoInput">
Choose Photo
</label>
<input type="hidden" value="/avatar/full/#(profile.header.userID)"
<input type="hidden" class="current-server-photo" value="/avatar/full/#(profile.header.userID)"
data-defaultvalue="/api/v3/image/user/identicon/#(profile.header.userID)"
data-originalvalue="/avatar/full/#(profile.header.userID)"
name="serverAvatarPhoto">
Expand Down
2 changes: 1 addition & 1 deletion Sources/swiftarr/Resources/Views/messagePostForm.html
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ <h5 class="card-header">Photo #(index + 1)</h5>
<input class="image-upload-input d-none" type="file" accept="#(post.allowedImageTypes)" name="localPhoto#(index + 1)">
Choose Photo
</label>
<input type="hidden" value="#(fn)" name="serverPhoto#(index + 1)">
<input type="hidden" class="current-server-photo" value="#(fn)" name="serverPhoto#(index + 1)">
</div>
</div>
<div class="row justify-content-between m-0">
Expand Down
106 changes: 103 additions & 3 deletions Sources/swiftarr/Site/SitePerformerController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import FluentSQL
import LeafKit
import Vapor
import RegexBuilder
import SwiftSoup

struct PerformersListContext: Encodable {
var trunk: TrunkContext
Expand Down Expand Up @@ -213,38 +214,48 @@ struct SitePerformerController: SiteControllerUtils {
// Adds an official performer to the performer list, or edits an existing official or shadow performer.
// Only TwitarrTeam and above can access.
// - Parameter performer: UUID. In URL Query. Set if this is an edit of an existing performer.
// - Parameter performerurl: String. In URL Query. Set if the form should be filled in with data from jococruise.com.
func upsertPerformer(_ req: Request) async throws -> View {
var performer: PerformerData?
var performerImageURL: String?
if let performerID = req.query[UUID.self, at: "performer"] {
performer = try await apiQuery(req, endpoint: "/performer/\(performerID)").content.decode(PerformerData.self)
}
else if let performerURL = req.query[String.self, at: "performerurl"] {
performer = try await buildPerformerFromURL(performerURL, on: req)
performerImageURL = performer?.header.photo
performer?.header.photo = nil
performer?.header.isOfficialPerformer = true
}
else {
performer = PerformerData()
performer?.header.isOfficialPerformer = true
}
struct AddPerformerPageContext: Encodable {
var trunk: TrunkContext
var performer: PerformerData?
var performerImageURL: String? // For 3rd party images that should appear on load
var formAction: String
var deleteAction: String?
var attendedYears: [String]

init(_ req: Request, performer: PerformerData?) throws {
init(_ req: Request, performer: PerformerData?, image: String?) throws {
trunk = .init(req, title: "Add/Modify Official Performer Bio", tab: .events)
self.performer = performer
self.performerImageURL = image
self.formAction = "/admin/performer/add"
if let _ = performer?.header.id {
self.deleteAction = "/admin/performer/delete"
}
let currentYear = Settings.shared.cruiseStartDateComponents.year ?? 2024
let currentYear = Settings.shared.cruiseStartDateComponents.year ?? 2025
var years = performer?.yearsAttended ?? [currentYear]
if !years.contains(currentYear) {
years.append(currentYear)
}
attendedYears = years.map { String($0) }
}
}
let ctx = try AddPerformerPageContext(req, performer: performer)
let ctx = try AddPerformerPageContext(req, performer: performer, image: performerImageURL)
return try await req.view.render("Performers/addPerformer", ctx)
}

Expand Down Expand Up @@ -337,6 +348,95 @@ struct SitePerformerController: SiteControllerUtils {
}
}

extension SitePerformerController {
// Scrapes the HTML found at the given url, builds a PerformerData out of what it finds.
// Meant to work with urls of the form: "https://jococruise.com/jonathan-coulton/"
// Like all scrapers, this code is fragile to changes in the HTML structure in the page being scraped.
fileprivate func buildPerformerFromURL(_ urlString: String, on req: Request) async throws -> PerformerData {
guard let _ = URL(string: urlString) else {
throw Abort(.badRequest, reason: "Invalid performer URL: \(urlString)")
}
let uri = URI(string: urlString)
var response = try await req.client.get(uri)
guard let bytes = response.body?.readableBytes, let html = response.body?.readString(length: bytes) else {
throw Abort(.badRequest, reason: "No HTML returned from URL: \(urlString)")
}
var result = PerformerData()
let doc = try SwiftSoup.parse(html)
result.youtubeURL = try doc.select("li.et-social-youtube a").first()?.attr("href")
result.instagramURL = try doc.select("li.et-social-instagram a").first()?.attr("href")
result.facebookURL = try doc.select("li.et-social-facebook a").first()?.attr("href")
result.website = try doc.select("li.et-social-google-plus a").first()?.attr("href")
result.xURL = try doc.select("li.et-social-twitter a").first()?.attr("href")
result.header.name = try doc.select("div.et_pb_text_0_tb_body div").first()?.text() ?? ""
result.pronouns = try doc.select("div.et_pb_text_1_tb_body div").first()?.text()
result.header.photo = try doc.select("div.et_pb_image_0_tb_body img").first()?.attr("src")
result.yearsAttended = try doc.select("div.et_pb_column_2_tb_body div.et_pb_blurb_description").first()?.text()
.split(separator: "").compactMap( { Int($0) } ) ?? []
if let bio = try doc.select("div.et_pb_text_2_tb_body div").first() {
result.bio = try processHTMLIntoMarkdown(bio)
}
// Clear out any values scraped from the page footer (they're all JoCo links, not specific to this performer)
if result.youtubeURL == "https://www.youtube.com/jococruise" {
result.youtubeURL = nil
}
if result.instagramURL == "https://www.instagram.com/jococruise/" {
result.instagramURL = nil
}
if result.facebookURL == "https://www.facebook.com/JoCoCruise" {
result.facebookURL = nil
}
return result
}

// A really bad implementation of HTML to Markdown. Only works with a few Markdonw tags, but these seem to be the only
// ones used by the Performer Bio html sections on jococruise.com.
//
// Any HTML tags not recognized and converted into their Markdown equivalent are removed from the output.
fileprivate func processHTMLIntoMarkdown(_ rootNode: Node) throws -> String {
let accum = StringBuilder()
var nextNode: Node = rootNode
var parentStack: [(Node, String)] = [(rootNode, "")]

traversal: while true {
let curNode = nextNode
var tailText: String = ""
if let textNode = curNode as? TextNode {
accum.append(textNode.getWholeText())
} else if let element = (curNode as? Element) {
switch element.tagName() {
case "strong": accum.append("**"); tailText = "**"
case "i", "em": accum.append("*"); tailText = "*"
case "p": tailText = "\n"
case "a": accum.append("["); tailText = try "](\(element.attr("href")))"
default: break
}
}
if curNode.childNodeSize() > 0 {
nextNode = curNode.childNode(0)
parentStack.append((curNode, tailText))
} else {
accum.append(tailText)
while true {
if let node = nextNode.nextSibling() {
nextNode = node
break
}
if parentStack.isEmpty {
break traversal
}
else {
var tail: String
(nextNode, tail) = parentStack.removeLast()
accum.append(tail)
}
}
}
}
return accum.toString()
}
}

// Used to create a PerformerUploadData from the web form. Used in a couple of places.
extension PerformerUploadData {
fileprivate init(form: AddPerformerFormContent, photo: ImageUploadData, overrideIsOfficial: Bool? = nil ) {
Expand Down

0 comments on commit feee63c

Please sign in to comment.