diff --git a/code/markdown_to_json_parser.py b/code/markdown_to_json_parser.py
index f782408..20671a3 100644
--- a/code/markdown_to_json_parser.py
+++ b/code/markdown_to_json_parser.py
@@ -346,7 +346,30 @@ def parse_paper_links(html):
 def extract_paper_data(paper_section, columns):
     title_column = columns[0]
     # title = title_column.get_text(strip=True)
-    title = title_column.a.encode_contents().decode("utf-8")
+    title = (
+        title_column.a.encode_contents().decode("utf-8")
+        if title_column.a is not None
+        else (
+            title_column.encode_contents().decode("utf-8")
+            if title_column.get_text(strip=True) is not None
+            else None
+        )
+    )
+
+    title = re.sub(r"<(?:br\s*/?>|img[^>]*>)", "", title)
+    title = title.strip()
+
+    html_entities = {
+        "&amp;": "&",
+        "&lt;": "<",
+        "&gt;": ">",
+        "&quot;": '"',
+        "&apos;": "'",
+    }
+    title = re.sub(
+        r"(&\w+;)", lambda x: html_entities.get(x.group(0), x.group(0)), title
+    )
+
     title_link = title_column.find("a")
     title_page = title_link["href"] if title_link else None