diff --git a/pkg/scraper/mapped.go b/pkg/scraper/mapped.go index a6b70565fd7..0ba58388a34 100644 --- a/pkg/scraper/mapped.go +++ b/pkg/scraper/mapped.go @@ -82,7 +82,7 @@ func (s mappedConfig) postProcess(ctx context.Context, q mappedQuery, attrConfig if attrConfig.hasSplit() { results := attrConfig.splitString(result) // skip cleaning when the query is used for searching - if q.getType() == SearchQuery { + if q.getType() == SearchQuery || attrConfig.hasDuplicate() { return results } results = attrConfig.cleanResults(results) @@ -100,7 +100,7 @@ func (s mappedConfig) postProcess(ctx context.Context, q mappedQuery, attrConfig ret = append(ret, text) } // skip cleaning when the query is used for searching - if q.getType() == SearchQuery { + if q.getType() == SearchQuery || attrConfig.hasDuplicate() { return ret } ret = attrConfig.cleanResults(ret) @@ -660,6 +660,7 @@ type mappedScraperAttrConfig struct { PostProcess []mappedPostProcessAction `yaml:"postProcess"` Concat string `yaml:"concat"` Split string `yaml:"split"` + Duplicate bool `yaml:"duplicate"` postProcessActions []postProcessAction @@ -743,6 +744,10 @@ func (c mappedScraperAttrConfig) hasSplit() bool { return c.Split != "" } +func (c mappedScraperAttrConfig) hasDuplicate() bool { + return c.Duplicate +} + func (c mappedScraperAttrConfig) concatenateResults(nodes []string) string { separator := c.Concat return strings.Join(nodes, separator) diff --git a/pkg/scraper/xpath_test.go b/pkg/scraper/xpath_test.go index 06b6ad5b686..0aa3e96f61b 100644 --- a/pkg/scraper/xpath_test.go +++ b/pkg/scraper/xpath_test.go @@ -464,13 +464,13 @@ const sceneHTML = `
Pornstars:  Alex D + data-mxptext="Alex D" data-gender="male" href="/pornstar/alex-d">Alex D , + data-mxptext="Mia Malkova" data-gender="female" href="/pornstar/mia-malkova"> , Riley Reid + data-mxptext="Riley Reid" data-gender="female" href="/pornstar/riley-reid">Riley Reid
+ @@ -570,6 +570,10 @@ func makeSceneXPathConfig() mappedScraper { performerConfig := make(mappedConfig) performerConfig["Name"] = makeSimpleAttrConfig(`$performerElem/@data-mxptext`) performerConfig["URL"] = makeSimpleAttrConfig(`$performerElem/@href`) + performerConfig["Gender"] = mappedScraperAttrConfig{ + Selector: `$performerElem/@data-gender`, + Duplicate: true, + } config.Performers.mappedConfig = performerConfig studioConfig := make(mappedConfig) @@ -636,7 +640,7 @@ func verifyMovies(t *testing.T, expectedMovieNames []string, actualMovies []*mod } } -func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []string, actualPerformers []*models.ScrapedPerformer) { +func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []string, expectedGenders []string, actualPerformers []*models.ScrapedPerformer) { t.Helper() i := 0 @@ -645,24 +649,35 @@ func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []strin actualName := "" expectedURL := "" actualURL := "" + expectedGender := "" + actualGender := "" if i < len(expectedNames) { expectedName = expectedNames[i] } if i < len(expectedURLs) { expectedURL = expectedURLs[i] } + if i < len(expectedGenders) { + expectedGender = expectedGenders[i] + } if i < len(actualPerformers) { actualName = *actualPerformers[i].Name if actualPerformers[i].URL != nil { actualURL = *actualPerformers[i].URL } + if actualPerformers[i].Gender != nil { + actualGender = *actualPerformers[i].Gender + } } if expectedName != actualName { t.Errorf("Expected performer name %s, got %s", expectedName, actualName) } if expectedURL != actualURL { - t.Errorf("Expected performer URL %s, got %s", expectedName, actualName) + t.Errorf("Expected performer URL %s, got %s", expectedURL, actualURL) + } + if expectedGender != actualGender { + t.Errorf("Expected performer Gender %s, got %s", expectedGender, actualGender) } i++ } @@ -729,7 +744,13 @@ func TestApplySceneXPathConfig(t *testing.T) { "/pornstar/riley-reid", } - verifyPerformers(t, expectedPerformerNames, expectedPerformerURLs, scene.Performers) + expectedPerformerGenders := []string{ + "male", + "female", + "female", + } + + verifyPerformers(t, expectedPerformerNames, expectedPerformerURLs, expectedPerformerGenders, scene.Performers) const expectedStudioName = "Sis Loves Me" const expectedStudioURL = "/channels/sis-loves-me" diff --git a/ui/v2.5/src/docs/en/Manual/ScraperDevelopment.md b/ui/v2.5/src/docs/en/Manual/ScraperDevelopment.md index caa3d41dc80..576f2a11075 100644 --- a/ui/v2.5/src/docs/en/Manual/ScraperDevelopment.md +++ b/ui/v2.5/src/docs/en/Manual/ScraperDevelopment.md @@ -420,6 +420,7 @@ Replaces `2001 to 2003` with `2001-2003`. Additionally, there are a number of fixed post-processing fields that are specified at the attribute level (not in `postProcess`) that are performed after the `postProcess` operations: * `concat`: if an xpath matches multiple elements, and `concat` is present, then all of the elements will be concatenated together * `split`: the inverse of `concat`. Splits a string to more elements using the separator given. For more info and examples have a look at PR [#579](https://github.com/stashapp/stash/pull/579) +* `duplicate`: if an xpath matches multiple elements, and `duplicate` is `true`, then all of the elements will be returned without removing duplicates. Example: ```yaml