diff --git a/.cspell.json b/.cspell.json
index f74492eb55..928068b9fb 100644
--- a/.cspell.json
+++ b/.cspell.json
@@ -150,6 +150,7 @@
     "graphviz",
     "greenbutton",
     "gte",
+    "guillemets",
     "Gunter",
     "Gzip",
     "hardcoded",
@@ -206,6 +207,7 @@
     "lookup",
     "lte",
     "lxml",
+    "malcriado",
     "MapItem",
     "mappable",
     "mapquest",
@@ -225,6 +227,7 @@
     "MyBrowser",
     "MyModel",
     "namespace",
+    "Ñaño",
     "natively",
     "netcat",
     "newdomain",
@@ -307,6 +310,7 @@
     "runtime",
     "salesforce",
     "scalable",
+    "schön",
     "seeddb",
     "seedorg",
     "seedpass",
@@ -371,6 +375,7 @@
     "tsts",
     "tuples",
     "typechecking",
+    "über",
     "ubid",
     "UBID",
     "ubidmodel",
@@ -416,6 +421,7 @@
     "webdriver",
     "webserver",
     "webservers",
+    "Welstone",
     "whitelist",
     "wildcards",
     "workflow",
@@ -432,7 +438,9 @@
     "xpaths",
     "XSLX",
     "yasg",
-    "yml"
+    "yml",
+    "اضافية",
+    "بيانات"
   ],
   "flagWords": [
     "hte"
diff --git a/requirements/base.txt b/requirements/base.txt
index 1efc35f0d5..3cc4f626bb 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -42,7 +42,6 @@ Markdown==3.1.1
 polling==0.3.2
 pyyaml==6.0.1
 street-address==0.4.0
-unidecode==1.1.1
 xlrd==1.2.0
 xlsxwriter==1.2.7
 xmltodict==0.12.0
diff --git a/seed/data_importer/tasks.py b/seed/data_importer/tasks.py
index ff78a25c02..47d536eed9 100644
--- a/seed/data_importer/tasks.py
+++ b/seed/data_importer/tasks.py
@@ -33,7 +33,6 @@
 from django.utils import timezone as tz
 from django.utils.timezone import make_naive
 from past.builtins import basestring
-from unidecode import unidecode
 
 from seed.building_sync import validation_client
 from seed.building_sync.building_sync import BuildingSync
@@ -50,6 +49,7 @@
 from seed.data_importer.sensor_readings_parser import SensorsReadingsParser
 from seed.data_importer.utils import usage_point_id
 from seed.lib.mcm import cleaners, mapper, reader
+from seed.lib.mcm.cleaners import normalize_unicode_and_characters
 from seed.lib.mcm.mapper import expand_rows
 from seed.lib.mcm.utils import batch
 from seed.lib.progress_data.progress_data import ProgressData
@@ -741,7 +741,7 @@ def _save_raw_data_chunk(chunk, file_pk, progress_key):
                     elif key == "_source_filename":  # grab source filename (for BSync)
                         source_filename = v
                     elif isinstance(v, basestring):
-                        new_chunk[key] = unidecode(v)
+                        new_chunk[key] = normalize_unicode_and_characters(v)
                     elif isinstance(v, (datetime, date)):
                         raise TypeError(
                             "Datetime class not supported in Extra Data. Needs to be a string.")
@@ -1559,9 +1559,10 @@ def add_dictionary_repr_to_hash(hash_obj, dict_obj):
             if isinstance(value, dict):
                 add_dictionary_repr_to_hash(hash_obj, value)
             else:
-                hash_obj.update(str(unidecode(key)).encode('utf-8'))
+                # TODO: Do we need to normalize_unicode_and_characters (formerly unidecode) here?
+                hash_obj.update(str(normalize_unicode_and_characters(key)).encode('utf-8'))
                 if isinstance(value, basestring):
-                    hash_obj.update(unidecode(value).encode('utf-8'))
+                    hash_obj.update(normalize_unicode_and_characters(value).encode('utf-8'))
                 else:
                     hash_obj.update(str(value).encode('utf-8'))
         return hash_obj
diff --git a/seed/data_importer/tests/data/example-data-properties-2-invalid-footprints.xlsx b/seed/data_importer/tests/data/example-data-properties-2-invalid-footprints.xlsx
index d71b885280..b56624661d 100644
Binary files a/seed/data_importer/tests/data/example-data-properties-2-invalid-footprints.xlsx and b/seed/data_importer/tests/data/example-data-properties-2-invalid-footprints.xlsx differ
diff --git a/seed/data_importer/tests/data/example-data-properties-small-changes.xlsx b/seed/data_importer/tests/data/example-data-properties-small-changes.xlsx
index e87eb934f5..eb9ed26e69 100644
Binary files a/seed/data_importer/tests/data/example-data-properties-small-changes.xlsx and b/seed/data_importer/tests/data/example-data-properties-small-changes.xlsx differ
diff --git a/seed/data_importer/tests/data/example-data-properties-unicode.xlsx b/seed/data_importer/tests/data/example-data-properties-unicode.xlsx
new file mode 100644
index 0000000000..64e00c2e24
Binary files /dev/null and b/seed/data_importer/tests/data/example-data-properties-unicode.xlsx differ
diff --git a/seed/data_importer/tests/data/example-data-properties.xlsx b/seed/data_importer/tests/data/example-data-properties.xlsx
index 7a3e590069..1f9ebe8770 100644
Binary files a/seed/data_importer/tests/data/example-data-properties.xlsx and b/seed/data_importer/tests/data/example-data-properties.xlsx differ
diff --git a/seed/data_importer/tests/integration/test_merge_duplicate_rows.py b/seed/data_importer/tests/integration/test_merge_duplicate_rows.py
index 66553b48a7..5d38fc5a70 100644
--- a/seed/data_importer/tests/integration/test_merge_duplicate_rows.py
+++ b/seed/data_importer/tests/integration/test_merge_duplicate_rows.py
@@ -120,7 +120,9 @@ def test_hash_quantity_unicode(self):
             data_state=DATA_STATE_IMPORT,
             import_file_id=0,
         )
-        self.assertEqual(ps1.hash_object, ps2.hash_object)
+        # Not that we support unicode in the fields, then the hashes should not be the
+        # same anymore. #TODO: Should we strip all unicode characters in extra data fields?
+        self.assertNotEqual(ps1.hash_object, ps2.hash_object)
 
     def test_hash_release_date(self):
         """The hash_state_object method makes the timezones naive, so this should work because
diff --git a/seed/data_importer/tests/test_mapping.py b/seed/data_importer/tests/test_mapping.py
index 2f6d79b6f6..687a966bb6 100644
--- a/seed/data_importer/tests/test_mapping.py
+++ b/seed/data_importer/tests/test_mapping.py
@@ -88,7 +88,7 @@ def test_mapping(self):
         # for p in props:
         #     pp(p)
 
-    def test_remapping_with_and_without_unit_aware_columns_doesnt_lose_data(self):
+    def test_remapping_with_and_without_unit_aware_columns_does_not_lose_data(self):
         """
         During import, when the initial -State objects are created from the extra_data values,
         ColumnMapping objects are used to take the extra_data dictionary values and create the
diff --git a/seed/data_importer/tests/test_match_unicode.py b/seed/data_importer/tests/test_match_unicode.py
new file mode 100644
index 0000000000..e14646e05e
--- /dev/null
+++ b/seed/data_importer/tests/test_match_unicode.py
@@ -0,0 +1,156 @@
+
+# !/usr/bin/env python
+# encoding: utf-8
+"""
+SEED Platform (TM), Copyright (c) Alliance for Sustainable Energy, LLC, and other contributors.
+See also https://github.com/seed-platform/seed/main/LICENSE.md
+"""
+import logging
+import os.path as osp
+import pathlib
+
+from django.core.files.uploadedfile import SimpleUploadedFile
+
+from seed.data_importer import tasks
+from seed.data_importer.tests.util import FAKE_MAPPINGS
+from seed.lib.mcm.cleaners import normalize_unicode_and_characters
+from seed.models import (
+    ASSESSED_RAW,
+    DATA_STATE_MAPPING,
+    Column,
+    PropertyState,
+    PropertyView
+)
+from seed.test_helpers.fake import (
+    FakePropertyStateFactory,
+    FakeTaxLotStateFactory
+)
+from seed.tests.util import DataMappingBaseTestCase
+
+logger = logging.getLogger(__name__)
+
+
+class TestUnicodeNormalization(DataMappingBaseTestCase):
+    def test_unicode_normalization(self):
+        """Test a few cases. The unicodedata.normalize('NFC', text) method combines the
+        the letter and diacritics, which seems to provide the best compatibility."""
+        # Guillemets
+        unicode_text = "Café «Déjà Vu»"
+        expected_out = "Café \"Déjà Vu\""
+        normalized_text = normalize_unicode_and_characters(unicode_text)
+        self.assertEqual(normalized_text, expected_out)
+
+        # This passes straight through (no diacritics)
+        unicode_text = "شكرا لك"
+        normalized_text = normalize_unicode_and_characters(unicode_text)
+        self.assertEqual(normalized_text, unicode_text)
+
+        # mdash to `--`
+        unicode_text = "– über schön! —"
+        expected_out = "- über schön! --"
+        normalized_text = normalize_unicode_and_characters(unicode_text)
+        self.assertEqual(normalized_text, expected_out)
+
+        # \u004E\u0303 is Ñ (N + tilde) and the normalization converts it to a
+        # single unicode character. ñ stays and combines the diacritic and letter
+        unicode_text = "\u004E\u0303a\u006E\u0303o malcriado"
+        expected_out = "Ñaño malcriado"
+        normalized_text = normalize_unicode_and_characters(unicode_text)
+        self.assertEqual(normalized_text, expected_out)
+
+
+class TestUnicodeImport(DataMappingBaseTestCase):
+    def setUp(self):
+        filename = getattr(self, 'filename', 'example-data-properties-unicode.xlsx')
+        import_file_source_type = ASSESSED_RAW
+        self.fake_mappings = FAKE_MAPPINGS['unicode']
+        selfvars = self.set_up(import_file_source_type)
+        self.user, self.org, self.import_file, self.import_record, self.cycle = selfvars
+        filepath = osp.join(osp.dirname(__file__), 'data', filename)
+        self.import_file.file = SimpleUploadedFile(
+            name=filename,
+            content=pathlib.Path(filepath).read_bytes()
+        )
+        self.import_file.save()
+
+    def test_unicode_import(self):
+        """Test that unicode characters are imported correctly"""
+        tasks.save_raw_data(self.import_file.pk)
+        Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk)
+        tasks.map_data(self.import_file.pk)
+
+        # Check to make sure all the properties imported
+        ps = PropertyState.objects.filter(
+            data_state=DATA_STATE_MAPPING,
+            organization=self.org,
+            import_file=self.import_file,
+        )
+        self.assertEqual(len(ps), 3)
+
+        # check that the property has the unicode characters
+        ps = PropertyState.objects.filter(
+            data_state=DATA_STATE_MAPPING,
+            organization=self.org,
+            import_file=self.import_file,
+            custom_id_1='unicode-1',
+        )[0]
+        self.assertEqual(ps.property_name, 'Déjà vu Café')
+        # check if there is an extra data key with unicode
+        self.assertEqual('بيانات اضافية' in ps.extra_data, True)
+
+        # check that we can query on unicode character
+        ps = PropertyState.objects.filter(
+            data_state=DATA_STATE_MAPPING,
+            organization=self.org,
+            import_file=self.import_file,
+            property_name='🏦 Bank',
+        )[0]
+        self.assertIsNotNone(ps)
+
+        tasks.geocode_and_match_buildings_task(self.import_file.id)
+
+        qry = PropertyView.objects.filter(state__custom_id_1='unicode-1')
+        self.assertEqual(qry.count(), 1)
+        state = qry.first().state
+
+        self.assertEqual(state.property_name, "Déjà vu Café")
+
+
+class TestUnicodeMatching(DataMappingBaseTestCase):
+    """Test the matching of two properties with unicode characters
+    and changing one of the matching criteria with a unicode character and
+    having it fail."""
+
+    def setUp(self):
+        selfvars = self.set_up(ASSESSED_RAW)
+        self.user, self.org, self.import_file_1, self.import_record_1, self.cycle_1 = selfvars
+
+        self.property_state_factory = FakePropertyStateFactory(organization=self.org)
+        self.taxlot_state_factory = FakeTaxLotStateFactory(organization=self.org)
+
+    def test_unicode_matching(self):
+        """If the file did not come from excel or a csv, then the unicode characters will
+        not be normalized."""
+        base_state_details = {
+            'pm_property_id': 'Building — 1',  # <- that is an m-dash
+            'city': 'City 1',
+            'import_file_id': self.import_file_1.id,
+            'data_state': DATA_STATE_MAPPING,
+            'no_default_data': False,
+        }
+        self.property_state_factory.get_property_state(**base_state_details)
+
+        # Should normalize some characters, eg. mdash to `--`
+        base_state_details['pm_property_id'] = 'Building — 1'  # <- new state with mdash normalized
+        base_state_details['city'] = 'New City'
+        self.property_state_factory.get_property_state(**base_state_details)
+
+        # Import file and create -Views and canonical records.
+        self.import_file_1.mapping_done = True
+        self.import_file_1.save()
+        tasks.geocode_and_match_buildings_task(self.import_file_1.id)
+
+        # there should only be one property view
+        self.assertEqual(PropertyView.objects.count(), 1)
+        only_view = PropertyView.objects.first()
+        self.assertEqual(only_view.state.city, 'New City')
diff --git a/seed/data_importer/tests/util.py b/seed/data_importer/tests/util.py
index 1a4de200af..ce9b6e8058 100644
--- a/seed/data_importer/tests/util.py
+++ b/seed/data_importer/tests/util.py
@@ -249,6 +249,33 @@
             "to_field": 'Double Tester',
         }
     ],
+    'unicode': [
+        {
+            "from_field": 'Custom ID 1',
+            "to_table_name": 'PropertyState',
+            "to_field": 'custom_id_1',
+        }, {
+            "from_field": 'Property Name',
+            "to_table_name": 'PropertyState',
+            "to_field": 'property_name',
+        }, {
+            "from_field": 'Extra Data - String',
+            "to_table_name": 'PropertyState',
+            "to_field": 'Extra Data - String',
+        }, {
+            "from_field": 'Extra Data - Float',
+            "to_table_name": 'PropertyState',
+            "to_field": 'Extra Data - Float',
+        }, {
+            "from_field": 'بيانات اضافية',
+            "to_table_name": 'PropertyState',
+            "to_field": 'بيانات اضافية',
+        }, {
+            "from_field": 'Notes',
+            "to_table_name": 'PropertyState',
+            "to_field": 'Notes',
+        }
+    ],
     'short': {  # Short should no longer be used and probably does not work anymore.
         'property_name': 'Name',
         'address_line_1': 'Address Line 1',
@@ -261,6 +288,7 @@
     "to_table_name": 'TaxLotState',
     "to_field": 'taxlot_footprint',
 }
+
 PROPERTY_FOOTPRINT_MAPPING = {
     "from_field": 'Property Coordinates',
     "to_table_name": 'PropertyState',
diff --git a/seed/lib/mappings/mapper.py b/seed/lib/mappings/mapper.py
index 5c76376684..1fb553ec61 100644
--- a/seed/lib/mappings/mapper.py
+++ b/seed/lib/mappings/mapper.py
@@ -14,11 +14,13 @@
 from os.path import dirname, join, realpath
 
 from past.builtins import basestring
-from unidecode import unidecode
+
+from seed.lib.mcm.cleaners import normalize_unicode_and_characters
 
 LINEAR_UNITS = set(['ft', 'm', 'in'])
 MAPPING_DATA_DIR = join(dirname(realpath(__file__)), 'data')
 
+
 _log = logging.getLogger(__name__)
 
 
@@ -34,7 +36,7 @@ def _sanitize_and_convert_keys_to_regex(key):
     # force unicode
     # TODO: python3 check if this to run in python3
     if isinstance(key, basestring):
-        key = unidecode(key)
+        key = normalize_unicode_and_characters(key)
 
     # fix superscripts - copied from old code
     found = False
diff --git a/seed/lib/mcm/cleaners.py b/seed/lib/mcm/cleaners.py
index 923dd2109b..47830906a8 100644
--- a/seed/lib/mcm/cleaners.py
+++ b/seed/lib/mcm/cleaners.py
@@ -6,6 +6,7 @@
 """
 import re
 import string
+import unicodedata
 from datetime import date, datetime
 
 import dateutil
@@ -33,6 +34,46 @@
 PUNCT_REGEX = re.compile('[{0}]'.format(
     re.escape(string.punctuation.replace('.', '').replace('-', '')))
 )
+# Mapping of specific characters to their normalized versions (need to expand this list)
+CHAR_MAPPING = {
+    ord('“'): '"',
+    ord('”'): '"',
+    ord('‘'): "'",
+    ord('’'): "'",
+    ord('′'): "'",
+    ord('″'): '"',
+    ord('‴'): "'''",
+    ord('…'): '...',
+    ord('•'): '*',
+    ord('⁄'): '/',
+    ord('×'): 'x',
+    ord('⁓'): '~',
+    # mdash, ndash, horizontal bar
+    ord('–'): '-',
+    ord('—'): '--',
+    ord('―'): '-',
+    ord('¬'): '-',
+    # guillemets to single and double quotes
+    ord('‹'): ''',
+    ord('›'): ''',
+    ord('«'): '"',
+    ord('»'): '"',
+}
+
+
+def normalize_unicode_and_characters(text):
+    """Method to normalize unicode characters and replace specific characters with their normalized versions."""
+    # Normalize Unicode characters to their canonical form (NFC decomposition) --
+    # Combines characters and diacritics when possible.
+
+    # Unicode standardizes on a single code point for accented characters such as é, ü, and ñ.
+    # More info can be seed here: https://docs.python.org/2/library/unicodedata.html#unicodedata.normalize
+    normalized_text = unicodedata.normalize('NFC', text)
+
+    # Apply CHAR_MAPPINGS to remove certain characters to be normalized.
+    normalized_text = normalized_text.translate(CHAR_MAPPING)
+
+    return normalized_text
 
 
 def default_cleaner(value, *args):
diff --git a/seed/lib/mcm/reader.py b/seed/lib/mcm/reader.py
index 502a13dcde..e88395a3da 100644
--- a/seed/lib/mcm/reader.py
+++ b/seed/lib/mcm/reader.py
@@ -18,11 +18,11 @@
 
 import xmltodict
 from past.builtins import basestring
-from unidecode import unidecode
 from xlrd import XLRDError, empty_cell, open_workbook, xldate
 from xlrd.xldate import XLDateAmbiguous
 
 from seed.data_importer.utils import kbtu_thermal_conversion_factors
+from seed.lib.mcm.cleaners import normalize_unicode_and_characters
 
 # Create a list of Excel cell types. This is copied
 # directly from the xlrd source code.
@@ -56,7 +56,7 @@ def clean_fieldnames(fieldnames):
     num_generated_headers = 0
     new_fieldnames = []
     for fieldname in fieldnames:
-        new_fieldname = unidecode(fieldname)
+        new_fieldname = normalize_unicode_and_characters(fieldname)
         if fieldname == '':
             num_generated_headers += 1
             new_fieldname = f'{SEED_GENERATED_HEADER_PREFIX} {num_generated_headers}'
@@ -389,7 +389,7 @@ def get_value(self, item, **kwargs):
                 value = " ".join(value.split())
             else:
                 value = item.value
-            return unidecode(value)
+            return normalize_unicode_and_characters(value)
 
         # only remaining items should be booleans
         return item.value
@@ -605,7 +605,7 @@ def first_five_rows(self):
             for x in first_row:
                 row_field = r[x]
                 if isinstance(row_field, basestring):
-                    row_field = unidecode(r[x])
+                    row_field = normalize_unicode_and_characters(r[x])
                 else:
                     row_field = str(r[x])
                 row_arr.append(row_field.strip())
diff --git a/seed/views/v3/data_quality_checks.py b/seed/views/v3/data_quality_checks.py
index 07de4ee228..071bf06342 100644
--- a/seed/views/v3/data_quality_checks.py
+++ b/seed/views/v3/data_quality_checks.py
@@ -11,10 +11,10 @@
 from drf_yasg.utils import swagger_auto_schema
 from rest_framework import status, viewsets
 from rest_framework.decorators import action
-from unidecode import unidecode
 
 from seed.data_importer.tasks import do_checks
 from seed.decorators import ajax_request_class
+from seed.lib.mcm.cleaners import normalize_unicode_and_characters
 from seed.lib.superperms.orgs.decorators import has_perm_class
 from seed.models import PropertyView, TaxLotView
 from seed.models.data_quality import DataQualityCheck
@@ -149,8 +149,9 @@ def results_csv(self, request):
                     result['formatted_field'],
                     result.get('label', None),
                     result['condition'],
-                    # the detailed_message field can have units which has superscripts/subscripts, so unidecode it!
-                    unidecode(result['detailed_message']),
+                    # the detailed_message field can have units which has superscripts/subscripts,
+                    # so normalize_unicode_and_characters it!
+                    normalize_unicode_and_characters(result['detailed_message']),
                     result['severity']
                 ])