From 4833c9a47521654f853d11641c87c7f3f3afc96a Mon Sep 17 00:00:00 2001 From: Bogdan Abaev Date: Thu, 13 Jul 2023 01:15:36 +0000 Subject: [PATCH] strip titles of html tags + update regex Strip all html tags from title in getSortTitle function Update the regex in getSortTitle to not skip double brackets Fixes: #159 --- misc/db-updates/2023-07-12/sortTitleClean | 34 +++++++++++++++++++++++ model/Items.inc.php | 3 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100755 misc/db-updates/2023-07-12/sortTitleClean diff --git a/misc/db-updates/2023-07-12/sortTitleClean b/misc/db-updates/2023-07-12/sortTitleClean new file mode 100755 index 00000000..765ab26e --- /dev/null +++ b/misc/db-updates/2023-07-12/sortTitleClean @@ -0,0 +1,34 @@ +#!/usr/bin/php -d mysqlnd.net_read_timeout=86400 += ? ORDER BY shardID", $startShard); + +foreach ($shardIDs as $shardID) { + echo "Shard: $shardID\n"; + + # Fetch all title fields with special characters: [, ], (, ), {, }, <, >, ", ', ”, ’, “, ‘ + $rows = Zotero_DB::query("SELECT i.itemID, i.value, s.sortTitle FROM itemData as i INNER JOIN itemSortFields as s ON s.itemID = i.itemID WHERE fieldID = 110 AND value REGEXP '[{}><()\\\\[\\\\]\"\'”’“‘]' ", false, $shardID); + Zotero_DB::beginTransaction(); + foreach($rows as $row) { + # Find the desired sort title for that title field + $sortTitle = Zotero_Items::getSortTitle($row['value']); + # Do nothing if first characters of existing sort title are the same as of desired sort title + if (isset($row['sortTitle']) && mb_substr($sortTitle ?? '', 0, 5) == mb_substr($row['sortTitle'] ?? '', 0, 5)) { + continue; + } + # Do nothing if first characters of desired sort title are the same as of title + if (mb_substr($sortTitle ?? '', 0, 5) == mb_substr($row['value'] ?? '', 0, 5)) { + continue; + } + # Update item sort title + Zotero_DB::query("UPDATE itemSortFields SET sortTitle = ? WHERE itemID = ?", [$sortTitle, $row['itemID']], $shardID); + } + Zotero_DB::commit(); + +} +echo "Done"; \ No newline at end of file diff --git a/model/Items.inc.php b/model/Items.inc.php index df08d0c6..9652a4b2 100644 --- a/model/Items.inc.php +++ b/model/Items.inc.php @@ -2580,7 +2580,8 @@ public static function getSortTitle($title) { if (!$title) { return ''; } - return mb_strcut(preg_replace('/^[[({\-"\'“‘ ]+(.*)[\])}\-"\'”’ ]*?$/Uu', '$1', $title), 0, Zotero_Notes::$MAX_TITLE_LENGTH); + $cleaned_of_characters = preg_replace('/[\[({\\-"\'“‘\])}\-"\'”’]/u', '$1', $title); + return mb_strcut(strip_tags($cleaned_of_characters), 0, Zotero_Notes::$MAX_TITLE_LENGTH); } }