diff --git a/examples/example_data/mappings_equipment.csv b/examples/example_data/mappings_equipment.csv
new file mode 100644
index 0000000..caed027
--- /dev/null
+++ b/examples/example_data/mappings_equipment.csv
@@ -0,0 +1,25 @@
+in,out_
+combiner,combiner
+comb,combiner
+cb,combiner
+battery,battery
+bess,battery
+inverter,inverter
+invert,inverter
+inv,inverter
+met,met
+meter,meter
+module,module
+mod,module
+recloser,recloser
+reclose,recloser
+relay,relay
+substation,substation
+switchgear,switchgear
+switch,switchgear
+tracker,tracker
+transformer,transformer
+xfmr,transformer
+wiring,wiring
+wire,wiring
+wires,wiring
\ No newline at end of file
diff --git a/examples/example_data/mappings_pv_terms.csv b/examples/example_data/mappings_pv_terms.csv
new file mode 100644
index 0000000..79acb06
--- /dev/null
+++ b/examples/example_data/mappings_pv_terms.csv
@@ -0,0 +1,20 @@
+in,out_
+comm,communication
+energy,energy
+kwh,energy
+mwh,energy
+grid,grid
+curtailment,grid
+curtail,grid
+poi,grid
+offline,outage
+solar,solar
+pv,solar
+photovoltaic,solar
+system,system
+site,system
+farm,system
+project,system
+sma,make_model
+cm,corrective_maintence
+pm,preventative_maintence
\ No newline at end of file
diff --git a/examples/tutorial_text_classify_regex_example.ipynb b/examples/tutorial_text_classify_regex_example.ipynb
new file mode 100644
index 0000000..d7c9a67
--- /dev/null
+++ b/examples/tutorial_text_classify_regex_example.ipynb
@@ -0,0 +1,187 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Adding keyword labels to O&M data\n",
+    "This notebook demonstrates the use of the `pvops.classify.get_attributes_from_keywords` module for adding asset labels based off O&M notes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "\n",
+    "from pvops.text import utils, preprocess\n",
+    "from pvops.text.classify import get_attributes_from_keywords\n",
+    "from pvops.text.visualize import visualize_classification_confusion_matrix"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 0: Get sample data, remap assets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pull in sample data and remap assets for ease of comparison\n",
+    "\n",
+    "om_df = pd.read_csv('example_data/example_ML_ticket_data.csv')\n",
+    "col_dict = {\n",
+    "    \"data\" : \"CompletionDesc\",\n",
+    "    \"eventstart\" : \"Date_EventStart\",\n",
+    "    \"save_data_column\" : \"processed_data\",\n",
+    "    \"save_date_column\" : \"processed_date\",\n",
+    "    \"attribute_col\" : \"Asset\",\n",
+    "    \"predicted_col\" : \"Keyword_Asset\",\n",
+    "    \"remapping_col_from\": \"in\",\n",
+    "    \"remapping_col_to\": \"out_\"\n",
+    "}\n",
+    "\n",
+    "# remap assets\n",
+    "remapping_df = pd.read_csv('example_data/remappings_asset.csv')\n",
+    "remapping_df['out_'] = remapping_df['out_'].replace({'met station': 'met',\n",
+    "                                                     'energy storage': 'battery',\n",
+    "                                                     'energy meter': 'meter'})\n",
+    "om_df = utils.remap_attributes(om_df, remapping_df, col_dict, allow_missing_mappings=True)\n",
+    "om_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 1: Text preprocessing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# preprocessing steps\n",
+    "om_df[col_dict['attribute_col']] = om_df.apply(lambda row: row[col_dict['attribute_col']].lower(), axis=1)\n",
+    "om_df = preprocess.preprocessor(om_df, lst_stopwords=[], col_dict=col_dict, print_info=False, extract_dates_only=False)\n",
+    "\n",
+    "DATA_COL = col_dict['data']\n",
+    "om_df[DATA_COL] = om_df['processed_data']\n",
+    "\n",
+    "# replace terms\n",
+    "equipment_df = pd.read_csv('~/pvOps/examples/example_data/mappings_equipment.csv')\n",
+    "pv_terms_df = pd.read_csv('~/pvOps/examples/example_data/mappings_pv_terms.csv')\n",
+    "pv_reference_df = pd.concat([equipment_df, pv_terms_df])\n",
+    "om_df = utils.remap_words_in_text(om_df=om_df, remapping_df=pv_reference_df, remapping_col_dict=col_dict)\n",
+    "\n",
+    "om_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 2: Search for keywords to use as labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add asset labels from keyword reference dict\n",
+    "om_df = get_attributes_from_keywords(om_df=om_df,\n",
+    "                                     col_dict=col_dict,\n",
+    "                                     reference_df=equipment_df)\n",
+    "om_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 3: Metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get accuracy measures and count metrics\n",
+    "PREDICT_COL = col_dict['predicted_col']\n",
+    "LABEL_COL = col_dict['attribute_col']\n",
+    "\n",
+    "# entries with some keyword over interest, over all entries\n",
+    "label_count = om_df[PREDICT_COL].count() / len(om_df)\n",
+    "\n",
+    "# replace 'Other' values with 'Unknown'\n",
+    "om_df[LABEL_COL] = om_df[LABEL_COL].replace('other', 'unknown')\n",
+    "# replace NaN values to use accuracy score\n",
+    "om_df[[LABEL_COL, PREDICT_COL]] = om_df[[LABEL_COL, PREDICT_COL]].fillna('unknown')\n",
+    "acc_score = accuracy_score(y_true=om_df[LABEL_COL], y_pred=om_df[PREDICT_COL])\n",
+    "\n",
+    "msg = f'{label_count:.2%} of entries had a keyword of interest, with {acc_score:.2%} accuracy.'\n",
+    "print(msg)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 4: Visualization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plot confusion matrix\n",
+    "title = 'Confusion Matrix of Actual and Predicted Asset Labels'\n",
+    "visualize_classification_confusion_matrix(om_df, col_dict, title)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pvops/text/classify.py b/pvops/text/classify.py
index 066600e..2756b2b 100644
--- a/pvops/text/classify.py
+++ b/pvops/text/classify.py
@@ -8,6 +8,7 @@
 import pandas as pd
 import copy
 
+from pvops.text.preprocess import get_keywords_of_interest
 
 def classification_deployer(
     X,
@@ -187,3 +188,53 @@ def classification_deployer(
             best_gs_instance = gs_clf
 
     return pd.concat(rows, axis=1).T, best_gs_instance.best_estimator_
+
+def get_attributes_from_keywords(om_df, col_dict, reference_df, reference_col_dict):
+    """Find keywords of interest in specified column of dataframe, return as new column value.
+
+    If keywords of interest given in a reference dataframe are in the specified column of the
+    dataframe, return the keyword category, or categories.
+    For example, if the string 'inverter' is in the list of text, return ['inverter'].
+
+    Parameters
+    ----------
+    om_df : pd.DataFrame
+        Dataframe to search for keywords of interest, must include text_col.
+    col_dict : dict of {str : str}
+        A dictionary that contains the column names needed:
+
+        - data : string, should be assigned to associated column which stores the tokenized text logs
+        - predicted_col : string, will be used to create keyword search label column
+    reference_df : DataFrame
+        Holds columns that define the reference dictionary to search for keywords of interest,
+        Note: This function can currently only handle single words, no n-gram functionality.
+    reference_col_dict : dict of {str : str}
+        A dictionary that contains the column names that describes how
+        referencing is going to be done
+
+        - reference_col_from : string, should be assigned to
+          associated column name in reference_df that are possible input reference values
+          Example: pd.Series(['inverter', 'invert', 'inv'])
+        - reference_col_to : string, should be assigned to
+          associated column name in reference_df that are the output reference values
+          of interest
+          Example: pd.Series(['inverter', 'inverter', 'inverter'])
+
+    Returns
+    -------
+    om_df: pd.DataFrame
+        Input df with new_col added, where each found keyword is its own row, may result in
+        duplicate rows if more than one keywords of interest was found in text_col.
+    """
+    om_df[col_dict['predicted_col']] = om_df[col_dict['data']].apply(get_keywords_of_interest,
+                                                                     reference_df=reference_df,
+                                                                     reference_col_dict=reference_col_dict)
+
+    # each multi-category now in its own row, some logs have multiple equipment issues
+    multiple_keywords_df = om_df[om_df[col_dict['predicted_col']].str.len() > 1]
+    om_df = om_df.explode(col_dict['predicted_col'])
+
+    msg = f'{len(multiple_keywords_df)} entries had multiple keywords of interest. Reference: {multiple_keywords_df.index} in original dataframe.'
+    print(msg)
+
+    return om_df
\ No newline at end of file
diff --git a/pvops/text/preprocess.py b/pvops/text/preprocess.py
index 7384785..3936d95 100644
--- a/pvops/text/preprocess.py
+++ b/pvops/text/preprocess.py
@@ -449,3 +449,47 @@ def text_remove_numbers_stopwords(document, lst_stopwords):
     document = " ".join(document)
 
     return document
+
+
+def get_keywords_of_interest(document_tok, reference_df, reference_col_dict):
+    """Find keywords of interest in list of strings from reference dict.
+
+    If keywords of interest given in a reference dict are in the list of
+    strings, return the keyword category, or categories. For example,
+    if the string 'inverter' is in the list of text, return ['inverter'].
+
+    Parameters
+    ----------
+    document_tok : list of str
+        Tokenized text, functionally a list of string values.
+    reference_df : DataFrame
+        Holds columns that define the reference dictionary to search for keywords of interest,
+        Note: This function can currently only handle single words, no n-gram functionality.
+    reference_col_dict : dict of {str : str}
+        A dictionary that contains the column names that describes how
+        referencing is going to be done
+
+        - reference_col_from : string, should be assigned to
+          associated column name in reference_df that are possible input reference values
+          Example: pd.Series(['inverter', 'invert', 'inv'])
+        - reference_col_to : string, should be assigned to
+          associated column name in reference_df that are the output reference values
+          of interest
+          Example: pd.Series(['inverter', 'inverter', 'inverter'])
+
+    Returns
+    -------
+    included_equipment: list of str
+        List of keywords from reference_dict found in list_of_txt, can be more than one value.
+    """
+    REFERENCE_COL_FROM = reference_col_dict["reference_col_from"]
+    REFERENCE_COL_TO = reference_col_dict["reference_col_to"]
+
+    reference_dict = dict(
+        zip(reference_df[REFERENCE_COL_FROM], reference_df[REFERENCE_COL_TO])
+    )
+
+    # keywords of interest
+    overlap_keywords = reference_dict.keys() & document_tok
+    included_keywords = list({reference_dict[x] for x in overlap_keywords})
+    return included_keywords
diff --git a/pvops/text/utils.py b/pvops/text/utils.py
index f4095c3..7f411d6 100644
--- a/pvops/text/utils.py
+++ b/pvops/text/utils.py
@@ -80,3 +80,52 @@ def remap_attributes(om_df, remapping_df, remapping_col_dict,
               "{sum(df[ATTRIBUTE_COL].isna())}")
 
     return df
+
+def remap_words_in_text(om_df, remapping_df, remapping_col_dict):
+    """A utility function which remaps a text column of om_df using columns
+       within remapping_df.
+
+    Parameters
+    ----------
+    om_df : DataFrame
+        A pandas dataframe containing O&M note data
+    remapping_df : DataFrame
+        Holds columns that define the remappings
+    remapping_col_dict : dict of {str : str}
+        A dictionary that contains the column names that describes how
+        remapping is going to be done
+
+        - data : string, should be assigned to associated
+          column name in om_df which will have its text tokenized and remapped
+        - remapping_col_from : string, should be assigned
+          to associated column name in remapping_df that matches
+          original attribute of interest in om_df
+        - remapping_col_to : string, should be assigned to
+          associated column name in remapping_df that contains the
+          final mapped entries
+
+    Returns
+    -------
+    DataFrame
+        dataframe with remapped columns populated
+    """
+    df = om_df.copy()
+    TEXT_COL = remapping_col_dict["data"]
+    REMAPPING_COL_FROM = remapping_col_dict["remapping_col_from"]
+    REMAPPING_COL_TO = remapping_col_dict["remapping_col_to"]
+
+    # drop any values where input value is equal to output value
+    remapping_df = remapping_df[remapping_df[REMAPPING_COL_FROM] != remapping_df[REMAPPING_COL_TO]]
+
+    # case-sensitive
+    remapping_df[REMAPPING_COL_FROM] = remapping_df[REMAPPING_COL_FROM].str.lower()
+    remapping_df[REMAPPING_COL_TO] = remapping_df[REMAPPING_COL_TO].str.lower()
+    df[TEXT_COL] = df[TEXT_COL].str.lower()
+
+    renamer = dict(
+        zip(remapping_df[REMAPPING_COL_FROM], remapping_df[REMAPPING_COL_TO])
+    )
+
+    df[TEXT_COL] = df[TEXT_COL].replace(renamer, regex=True)
+
+    return df
\ No newline at end of file
diff --git a/pvops/text/visualize.py b/pvops/text/visualize.py
index 062ecd5..467ed73 100644
--- a/pvops/text/visualize.py
+++ b/pvops/text/visualize.py
@@ -2,6 +2,7 @@
 import matplotlib
 import matplotlib.pyplot as plt
 import networkx as nx
+from sklearn.metrics import ConfusionMatrixDisplay
 
 # data structures
 import numpy as np
@@ -381,3 +382,46 @@ def visualize_word_frequency_plot(
     fig = plt.figure(figsize=(12, 6))
     fd.plot(30, cumulative=False, title=title, figure=fig, **graph_aargs)
     return fd
+
+
+def visualize_classification_confusion_matrix(om_df, col_dict, title=''):
+    """Visualize confusion matrix comparing known categorical values, and predicted categorical values.
+
+    Parameters
+    ----------
+    om_df : DataFrame
+        A pandas dataframe containing O&M data, which contains columns specified in om_col_dict
+    col_dict : dict of {str : str}
+        A dictionary that contains the column names needed:
+
+        - data : string, should be assigned to associated column which stores the tokenized text logs
+        - attribute_col : string, will be assigned to attribute column and used to create new attribute_col
+        - predicted_col : string, will be used to create keyword search label column
+
+    title : str
+        Optional, title of plot
+
+    Returns
+    -------
+    Matplotlib figure instance
+    """
+    act_col = col_dict['attribute_col']
+    pred_col = col_dict['predicted_col']
+
+    # drop any predicted labels with no actual labels in the data, for a cleaner visual
+    no_real_values = [cat for cat in om_df[pred_col].unique() if cat not in om_df[act_col].unique()]
+    no_real_values_mask = om_df[pred_col].isin(no_real_values)
+    om_df = om_df[~no_real_values_mask]
+    caption_txt = f'NOTE: Predicted values{no_real_values} had no actual values in the dataset.'
+
+    plt.rcParams.update({'font.size': 8})
+    cm_display = ConfusionMatrixDisplay.from_predictions(y_true=om_df[act_col],
+                                                         y_pred=om_df[pred_col],
+                                                         normalize='true',
+                                                         )
+    fig = cm_display.plot()
+    plt.xticks(rotation=90)
+    plt.tight_layout()
+    plt.figtext(0.00, 0.01, caption_txt, wrap=True, fontsize=7)
+    plt.title(title)
+    return fig