Skip to content

Commit

Permalink
Merge pull request #4622 from owid/entity-selector-multi-lang
Browse files Browse the repository at this point in the history
feat(entitySelector): allow searching for entities in multiple languages
  • Loading branch information
marcelgerber authored Mar 4, 2025
2 parents 3c88d95 + 2ee269b commit d26fa4c
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 9 deletions.
70 changes: 68 additions & 2 deletions devTools/regionsUpdater/update.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,64 @@ const ETL_REGIONS_URL =
},
// we want to exclude income groups for now, until we can properly display the user's
// income group in the UI
REGIONS_TO_EXCLUDE = ["OWID_HIC", "OWID_UMC", "OWID_LMC", "OWID_LIC"]
REGIONS_TO_EXCLUDE = ["OWID_HIC", "OWID_UMC", "OWID_LMC", "OWID_LIC"],
// used for Intl.DisplayNames mapping
// see https://github.com/unicode-org/cldr/blob/480029bab5301d79e762b872b463e9101ba91a40/common/main/en.xml#L927-L957 for these codes
// note that these currently only work in Firefox and Safari, not Chrome: see https://issues.chromium.org/issues/40801814
TRANSLATION_CODES = {
OWID_WRL: "001",

// Continents & aggregates according to OWID
OWID_AFR: "002",
OWID_ASI: "142",
OWID_EUR: "150",
OWID_MNS: "054",
OWID_NAM: "003",
OWID_OCE: "009",
OWID_PYA: "061",
OWID_SAM: "005",

// Regions according to UNSD
UNSD_AUS: "053",
UNSD_CAM: "013",
UNSD_CAR: "029",
UNSD_CAS: "143",
UNSD_EAF: "014",
UNSD_EAS: "030",
UNSD_EEU: "151",
UNSD_MAF: "017",
UNSD_MEL: "054",
UNSD_MIC: "057",
UNSD_NAF: "015",
UNSD_NAM: "003",
UNSD_NEU: "154",
UNSD_POL: "061",
UNSD_SAF: "018",
UNSD_SAM: "005",
UNSD_SAS: "034",
UNSD_SEA: "035",
UNSD_SEU: "039",
UNSD_WAF: "011",
UNSD_WAS: "145",
UNSD_WEU: "155",

// Regions according to World Bank
WB_EAP: ["030", "009"], // East Asia; Pacific ≈ Oceania
WB_ECA: ["143", "150"],
WB_LAC: ["419", "029"],
WB_MENA: ["145", "015"], // Middle East ≈ Western Asia; Northern Africa
WB_NA: "003",
WB_SA: "034",
WB_SSA: "202",

// Regions according to WHO
WHO_AFR: "002",
WHO_AMR: "019",
WHO_EMR: "145",
WHO_EUR: "150",
WHO_SEAR: "035",
WHO_WPAC: ["030", "035", "009"], // Western Pacific ≈ East Asia + Southeast Asia + Oceania
}

interface Entity {
code: string
Expand All @@ -47,6 +104,7 @@ interface Entity {
is_historical?: boolean
is_unlisted?: boolean
variant_names?: string[]
translation_codes?: string[]
members?: string[]
}

Expand Down Expand Up @@ -236,8 +294,15 @@ async function main() {
]
}

// merge in alternate search terms
// merge in alternate search names and translation codes
entity.variant_names = _.get(SEARCH_ALIASES, entity.code)
entity.translation_codes = _.get(TRANSLATION_CODES, entity.code)

if (
entity.translation_codes &&
!Array.isArray(entity.translation_codes)
)
entity.translation_codes = [entity.translation_codes]

return _.chain(entity)
.mapKeys((_val, key) =>
Expand All @@ -260,6 +325,7 @@ async function main() {
"isHistorical",
"isUnlisted",
"variantNames",
"translationCodes",
"members"
)
.value()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import {
excludeUndefined,
intersection,
FuzzySearch,
getUserNavigatorLanguagesNonEnglish,
getRegionAlternativeNames,
} from "@ourworldindata/utils"
import {
Checkbox,
Expand Down Expand Up @@ -81,9 +83,10 @@ interface SortConfig {

type SearchableEntity = {
name: string
sortColumnValues: Record<ColumnSlug, CoreValueType | undefined>
local?: boolean
isWorld?: boolean
sortColumnValues: Record<ColumnSlug, CoreValueType | undefined>
alternativeNames?: string[]
}

interface PartitionedEntities {
Expand Down Expand Up @@ -443,11 +446,14 @@ export class EntitySelector extends React.Component<{
}

@computed private get availableEntities(): SearchableEntity[] {
const langs = getUserNavigatorLanguagesNonEnglish()

return this.availableEntityNames.map((entityName) => {
const searchableEntity: SearchableEntity = {
name: entityName,
isWorld: entityName === "World",
sortColumnValues: {},
alternativeNames: getRegionAlternativeNames(entityName, langs),
}

if (this.localEntityNames) {
Expand Down Expand Up @@ -548,8 +554,9 @@ export class EntitySelector extends React.Component<{
}

@computed get fuzzy(): FuzzySearch<SearchableEntity> {
return FuzzySearch.withKey(
return FuzzySearch.withKeyArray(
this.sortedAvailableEntities,
(entity) => [entity.name, ...(entity.alternativeNames ?? [])],
(entity) => entity.name
)
}
Expand Down
78 changes: 78 additions & 0 deletions packages/@ourworldindata/utils/src/FuzzySearch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,84 @@ describe(FuzzySearch, () => {
})
})

const countriesWithAliases = [
{
name: "Netherlands",
aliases: ["Netherlands", "Nederland", "Holland"],
},
{ name: "Spain", aliases: ["Spain", "España"] },
{ name: "Germany", aliases: ["Germany", "Deutschland"] },
]

describe("withKeyArray", () => {
it("creates a fuzzy search instance with multiple keys per object", () => {
const search = FuzzySearch.withKeyArray(
countriesWithAliases,
(country) => country.aliases
)
expect(search).toBeInstanceOf(FuzzySearch)
})

it("finds results based on any of the keys", () => {
const search = FuzzySearch.withKeyArray(
countriesWithAliases,
(country) => country.aliases
)

const hollandResults = search.search("holland")
expect(hollandResults).toHaveLength(1)
expect(hollandResults[0].name).toBe("Netherlands")

const espanaResults = search.search("españa")
expect(espanaResults).toHaveLength(1)
expect(espanaResults[0].name).toBe("Spain")
})

it("may return duplicate objects if multiple keys match", () => {
const duplicateKeyData = [
{ id: 1, keys: ["apple", "fruit", "red"] },
{ id: 2, keys: ["banana", "fruit", "yellow"] },
]

const search = FuzzySearch.withKeyArray(
duplicateKeyData,
(item) => item.keys
)
const results = search.search("fruit")
expect(results).toHaveLength(2)
})

it("can make use of a 'unique by' function", () => {
const search = FuzzySearch.withKeyArray(
countriesWithAliases,
(item) => item.aliases,
(item) => item.name
)
const results = search.search("land")
expect(results).toHaveLength(2)
})

it("handles case sensitivity in searches", () => {
const search = FuzzySearch.withKeyArray(
countriesWithAliases,
(item) => item.aliases
)
const results = search.search("NETHERLANDS")
expect(results).toHaveLength(1)
expect(results[0].name).toBe("Netherlands")
})

it("handles accented characters in searches", () => {
const search = FuzzySearch.withKeyArray(
countriesWithAliases,
(item) => item.aliases
)
const results = search.search("espana")
expect(results).toHaveLength(1)
expect(results[0].name).toBe("Spain")
})
})

describe("searchResults", () => {
it("returns raw fuzzysort results", () => {
const search = FuzzySearch.withKey(
Expand Down
40 changes: 35 additions & 5 deletions packages/@ourworldindata/utils/src/FuzzySearch.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,64 @@
import { groupBy } from "./Util.js"
import { PrimitiveType } from "@ourworldindata/types"
import { groupBy, uniqBy } from "./Util.js"
import fuzzysort from "fuzzysort"

export class FuzzySearch<T> {
strings: Fuzzysort.Prepared[]
datamap: Record<string, T[]>
uniqByFn: ((obj: T) => PrimitiveType) | undefined
opts: Fuzzysort.Options | undefined

private constructor(
datamap: Record<string, T[]>,
uniqByFn?: (obj: T) => PrimitiveType,
opts?: Fuzzysort.Options
) {
const rawStrings = Object.keys(datamap)
this.strings = rawStrings.map((s) => fuzzysort.prepare(s))
this.datamap = datamap
this.uniqByFn = uniqByFn
this.opts = opts
}

static withKey<T>(
data: T[],
key: (obj: T) => string,
keyFn: (obj: T) => string,
opts?: Fuzzysort.Options
): FuzzySearch<T> {
const datamap = groupBy(data, key)
return new FuzzySearch(datamap, opts)
const datamap = groupBy(data, keyFn)
return new FuzzySearch(datamap, undefined, opts)
}

// Allows for multiple keys per object, e.g. aliases:
// [
// { name: "Netherlands", "keys": ["Netherlands", "Nederland"] },
// { name: "Spain", "keys": ["Spain", "España"] },
// ]
static withKeyArray<T>(
data: T[],
keysFn: (obj: T) => string[],
uniqByFn?: (obj: T) => PrimitiveType,
opts?: Fuzzysort.Options
): FuzzySearch<T> {
const datamap: Record<string, T[]> = {}
data.forEach((d) => {
keysFn(d).forEach((key) => {
if (!datamap[key]) datamap[key] = [d]
else datamap[key].push(d)
})
})
return new FuzzySearch(datamap, uniqByFn, opts)
}

search(input: string): T[] {
return fuzzysort
const results = fuzzysort
.go(input, this.strings, this.opts)
.flatMap((result) => this.datamap[result.target])

if (this.uniqByFn) {
return uniqBy(results, this.uniqByFn)
}
return results
}

searchResults(input: string): Fuzzysort.Results {
Expand Down
8 changes: 8 additions & 0 deletions packages/@ourworldindata/utils/src/Util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2154,3 +2154,11 @@ export function readFromAssetMap(
throw new Error(`Entry for asset not found in asset map: ${path}`)
return assetValue
}

export const getUserNavigatorLanguages = (): readonly string[] => {
return navigator.languages ?? [navigator.language]
}

export const getUserNavigatorLanguagesNonEnglish = (): readonly string[] => {
return getUserNavigatorLanguages().filter((lang) => !lang.startsWith("en"))
}
3 changes: 3 additions & 0 deletions packages/@ourworldindata/utils/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ export {
isArrayDifferentFromReference,
readFromAssetMap,
downloadImage,
getUserNavigatorLanguages,
getUserNavigatorLanguagesNonEnglish,
} from "./Util.js"

export {
Expand Down Expand Up @@ -264,6 +266,7 @@ export {
type Aggregate,
getOthers,
countriesByName,
getRegionAlternativeNames,
} from "./regions.js"

export { getStylesForTargetHeight } from "./react-select.js"
Expand Down
Loading

0 comments on commit d26fa4c

Please sign in to comment.