From 9d50c87d5dcc193a2a39a1044863bb185337dc35 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 11:38:08 +1300
Subject: [PATCH 01/16] don't send covers: [None] to infobase

---
 openlibrary/catalog/add_book/__init__.py | 1 +
 openlibrary/plugins/importapi/code.py    | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/openlibrary/catalog/add_book/__init__.py b/openlibrary/catalog/add_book/__init__.py
index 8e3c48e07da..f7bc0b7e2f1 100644
--- a/openlibrary/catalog/add_book/__init__.py
+++ b/openlibrary/catalog/add_book/__init__.py
@@ -542,6 +542,7 @@ def load_data(rec, account=None):
     cover_id = None
     if cover_url:
         cover_id = add_cover(cover_url, ekey, account=account)
+    if cover_id:
         edition['covers'] = [cover_id]
 
     edits = []  # Things (Edition, Work, Authors) to be saved
diff --git a/openlibrary/plugins/importapi/code.py b/openlibrary/plugins/importapi/code.py
index 4d2ba2b9800..9106bafccd8 100644
--- a/openlibrary/plugins/importapi/code.py
+++ b/openlibrary/plugins/importapi/code.py
@@ -313,8 +313,8 @@ def populate_edition_data(self, edition, identifier):
         :return: Edition record
         """
         edition['ocaid'] = identifier
-        edition['source_records'] = "ia:" + identifier
-        edition['cover'] = "{0}/download/{1}/{1}/page/title.jpg".format(IA_BASE_URL, identifier)
+        edition['source_records'] = 'ia:' + identifier
+        edition['cover'] = '{0}/download/{1}/page/title.jpg'.format(IA_BASE_URL, identifier)
         return edition
 
     def get_marc_record(self, identifier):

From 1d4e612e2e8433cc7f9964b30744b97059023e55 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 14:43:03 +1300
Subject: [PATCH 02/16] remove fake subjects from ia code

---
 openlibrary/core/ia.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 40a12680bac..fe3b84d8bb9 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -1,4 +1,4 @@
-"""Library for interacting wih archive.org.
+"""Library for interacting with archive.org.
 """
 import os
 import urllib2
@@ -18,6 +18,7 @@
 
 VALID_READY_REPUB_STATES = ['4', '19', '20', '22']
 
+
 def get_item_json(itemid):
     itemid = web.safestr(itemid.strip())
     url = 'http://archive.org/metadata/%s' % itemid
@@ -30,6 +31,7 @@ def get_item_json(itemid):
         stats.end()
         return {}
 
+
 def extract_item_metadata(item_json):
     metadata = process_metadata_dict(item_json.get('metadata', {}))
     if metadata:
@@ -42,12 +44,14 @@ def extract_item_metadata(item_json):
         metadata['_filenames'] = [f['name'] for f in files]
     return metadata
 
+
 def get_metadata(itemid):
     item_json = get_item_json(itemid)
     return extract_item_metadata(item_json)
 
 get_metadata = cache.memcache_memoize(get_metadata, key_prefix='ia.get_metadata', timeout=5*60)
 
+
 def process_metadata_dict(metadata):
     """Process metadata dict to make sure multi-valued fields like
     collection and external-identifier are always lists.
@@ -66,6 +70,7 @@ def process_item(k, v):
         return (k, v)
     return dict(process_item(k, v) for k, v in metadata.items() if v)
 
+
 def _old_get_meta_xml(itemid):
     """Returns the contents of meta_xml as JSON.
     """
@@ -92,10 +97,12 @@ def _old_get_meta_xml(itemid):
         logger.error("Failed to parse metaxml for %s", itemid, exc_info=True)
         return web.storage()
 
+
 def get_meta_xml(itemid):
     # use metadata API instead of parsing meta xml manually
     return get_metadata(itemid)
 
+
 def xml2dict(xml, **defaults):
     """Converts xml to python dictionary assuming that the xml is not nested.
 
@@ -122,9 +129,9 @@ def xml2dict(xml, **defaults):
                 d[key].add(value)
             else:
                 d[key] = value
-
     return d
 
+
 def _get_metadata(itemid):
     """Returns metadata by querying the archive.org metadata API.
     """
@@ -140,12 +147,14 @@ def _get_metadata(itemid):
 # cache the results in memcache for a minute
 _get_metadata = web.memoize(_get_metadata, expires=60)
 
+
 def locate_item(itemid):
     """Returns (hostname, path) for the item.
     """
     d = _get_metadata(itemid)
     return d.get('server'), d.get('dir')
 
+
 def edition_from_item_metadata(itemid, metadata):
     """Converts the item metadata into a form suitable to be used as edition
     in Open Library.
@@ -158,6 +167,7 @@ def edition_from_item_metadata(itemid, metadata):
         e.add_metadata(metadata)
         return e
 
+
 def get_item_manifest(item_id, item_server, item_path):
     url = 'https://%s/BookReader/BookReaderJSON.php' % item_server
     url += "?itemPath=%s&itemId=%s&server=%s" % (item_path, item_id, item_server)
@@ -170,12 +180,14 @@ def get_item_manifest(item_id, item_server, item_path):
         stats.end()
         return {}
 
+
 def get_item_status(itemid, metadata, **server):
     item_server = server.pop('item_server', None)
     item_path = server.pop('item_path', None)
     return ItemEdition.get_item_status(itemid, metadata, item_server=item_server,
                                        item_path=item_path)
 
+
 class ItemEdition(dict):
     """Class to convert item metadata into edition dict.
     """
@@ -264,15 +276,12 @@ def is_valid_item(cls, itemid, metadata):
 
     def add_metadata(self, metadata):
         self.metadata = metadata
-
         self.add('title')
         self.add('description', 'description')
         self.add_list('publisher', 'publishers')
-        self.add_list("creator", "author_names")
+        self.add_list('creator', 'author_names')
         self.add('date', 'publish_date')
-
         self.add_isbns()
-        self.add_subjects()
 
     def add(self, key, key2=None):
         metadata = self.metadata
@@ -298,7 +307,7 @@ def add_list(self, key, key2):
         metadata = self.metadata
 
         key2 = key2 or key
-        # sometimes the empty values are represneted as {} in metadata API. Avoid them.
+        # sometimes the empty values are represented as {} in metadata API. Avoid them.
         if key in metadata and metadata[key] != {}:
             value = metadata[key]
             if not isinstance(value, list):
@@ -321,15 +330,6 @@ def add_isbns(self):
         if isbn_13:
             self["isbn_13"] = isbn_13
 
-    def add_subjects(self):
-        collections = self.metadata.get("collection", [])
-        mapping = {
-            "inlibrary": "In library",
-            "lendinglibrary": "Lending library"
-        }
-        subjects = [subject for c, subject in mapping.items() if c in collections]
-        if subjects:
-            self['subjects'] = subjects
 
 _ia_db = None
 def get_ia_db(configfile=None):

From eb5e215a0e9b06c0189006cbe61e1655fac6d4b5 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 14:43:47 +1300
Subject: [PATCH 03/16] remove unused old method

---
 openlibrary/core/ia.py | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index fe3b84d8bb9..30855561ce9 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -71,33 +71,6 @@ def process_item(k, v):
     return dict(process_item(k, v) for k, v in metadata.items() if v)
 
 
-def _old_get_meta_xml(itemid):
-    """Returns the contents of meta_xml as JSON.
-    """
-    itemid = web.safestr(itemid.strip())
-    url = 'http://www.archive.org/download/%s/%s_meta.xml' % (itemid, itemid)
-    try:
-        stats.begin('archive.org', url=url)
-        metaxml = urllib2.urlopen(url).read()
-        stats.end()
-    except IOError:
-        logger.error("Failed to download _meta.xml for %s", itemid, exc_info=True)
-        stats.end()
-        return web.storage()
-
-    # archive.org returns html on internal errors.
-    # Checking for valid xml before trying to parse it.
-    if not metaxml.strip().startswith("<?xml"):
-        return web.storage()
-
-    try:
-        defaults = {"collection": [], "external-identifier": []}
-        return web.storage(xml2dict(metaxml, **defaults))
-    except Exception as e:
-        logger.error("Failed to parse metaxml for %s", itemid, exc_info=True)
-        return web.storage()
-
-
 def get_meta_xml(itemid):
     # use metadata API instead of parsing meta xml manually
     return get_metadata(itemid)

From b91ec533269c79bcf7823092b18dac3ddaae3cd2 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 14:52:02 +1300
Subject: [PATCH 04/16] remove custom, and unused, XML parser

---
 openlibrary/core/ia.py            | 30 ------------------------------
 openlibrary/tests/core/test_ia.py | 10 ----------
 2 files changed, 40 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 30855561ce9..8240b86770c 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -3,7 +3,6 @@
 import os
 import urllib2
 import datetime
-from xml.dom import minidom
 import simplejson
 import web
 import logging
@@ -76,35 +75,6 @@ def get_meta_xml(itemid):
     return get_metadata(itemid)
 
 
-def xml2dict(xml, **defaults):
-    """Converts xml to python dictionary assuming that the xml is not nested.
-
-    To get some tag as a list/set, pass a keyword argument with list/set as value.
-
-        >>> xml2dict('<doc><x>1</x><x>2</x></doc>')
-        {'x': 2}
-        >>> xml2dict('<doc><x>1</x><x>2</x></doc>', x=[])
-        {'x': [1, 2]}
-    """
-    d = defaults
-    dom = minidom.parseString(xml)
-
-    for node in dom.documentElement.childNodes:
-        if node.nodeType == node.TEXT_NODE or len(node.childNodes) == 0:
-            continue
-        else:
-            key = node.tagName
-            value = node.childNodes[0].data
-
-            if key in d and isinstance(d[key], list):
-                d[key].append(value)
-            elif key in d and isinstance(d[key], set):
-                d[key].add(value)
-            else:
-                d[key] = value
-    return d
-
-
 def _get_metadata(itemid):
     """Returns metadata by querying the archive.org metadata API.
     """
diff --git a/openlibrary/tests/core/test_ia.py b/openlibrary/tests/core/test_ia.py
index 6a57a22ec26..41b0d9286d1 100644
--- a/openlibrary/tests/core/test_ia.py
+++ b/openlibrary/tests/core/test_ia.py
@@ -1,13 +1,5 @@
-from __future__ import print_function
 from openlibrary.core import ia
 
-def test_xml2dict():
-    assert ia.xml2dict("<metadata><x>1</x><y>2</y></metadata>") == {"x": "1", "y": "2"}
-    assert ia.xml2dict("<metadata><x>1</x><y>2</y></metadata>", x=[]) == {"x": ["1"], "y": "2"}
-
-    assert ia.xml2dict("<metadata><x>1</x><x>2</x></metadata>") == {"x": "2"}
-    assert ia.xml2dict("<metadata><x>1</x><x>2</x></metadata>", x=[]) == {"x": ["1", "2"]}
-
 def test_get_metaxml(monkeypatch, mock_memcache):
     import StringIO
     import urllib2
@@ -27,8 +19,6 @@ def urlopen(url):
         }
     }
     """
-
-    print(ia.get_meta_xml("foo00bar"))
     assert ia.get_meta_xml("foo00bar") == {
         "title": "Foo",
         "identifier": "foo00bar",

From 0e2983c6bdd45282f7b7b22d0832d0d6ed4e4a4c Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 15:05:03 +1300
Subject: [PATCH 05/16] use ia URL from config, and move near duplicate methods
 close together

---
 openlibrary/core/ia.py | 59 +++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 8240b86770c..eb189a66c18 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -15,12 +15,13 @@
 
 logger = logging.getLogger('openlibrary.ia')
 
+IA_BASE_URL = config.get('ia_base_url')
 VALID_READY_REPUB_STATES = ['4', '19', '20', '22']
 
 
 def get_item_json(itemid):
     itemid = web.safestr(itemid.strip())
-    url = 'http://archive.org/metadata/%s' % itemid
+    url = '%s/metadata/%s' % (IA_BASE_URL, itemid)
     try:
         stats.begin('archive.org', url=url)
         metadata_json = urllib2.urlopen(url).read()
@@ -31,6 +32,34 @@ def get_item_json(itemid):
         return {}
 
 
+def get_metadata(itemid):
+    item_json = get_item_json(itemid)
+    return extract_item_metadata(item_json)
+
+get_metadata = cache.memcache_memoize(get_metadata, key_prefix='ia.get_metadata', timeout=5*60)
+
+
+def _get_metadata(itemid):
+    """Returns metadata by querying the archive.org metadata API.
+    """
+    url = '%s/metadata/%s' % (IA_BASE_URL, itemid)
+    try:
+        stats.begin("archive.org", url=url)
+        text = urllib2.urlopen(url).read()
+        stats.end()
+        return simplejson.loads(text)
+    except (IOError, ValueError):
+        return None
+
+# cache the results in memcache for a minute
+_get_metadata = web.memoize(_get_metadata, expires=60)
+
+
+def get_meta_xml(itemid):
+    # use metadata API instead of parsing meta xml manually
+    return get_metadata(itemid)
+
+
 def extract_item_metadata(item_json):
     metadata = process_metadata_dict(item_json.get('metadata', {}))
     if metadata:
@@ -44,13 +73,6 @@ def extract_item_metadata(item_json):
     return metadata
 
 
-def get_metadata(itemid):
-    item_json = get_item_json(itemid)
-    return extract_item_metadata(item_json)
-
-get_metadata = cache.memcache_memoize(get_metadata, key_prefix='ia.get_metadata', timeout=5*60)
-
-
 def process_metadata_dict(metadata):
     """Process metadata dict to make sure multi-valued fields like
     collection and external-identifier are always lists.
@@ -70,27 +92,6 @@ def process_item(k, v):
     return dict(process_item(k, v) for k, v in metadata.items() if v)
 
 
-def get_meta_xml(itemid):
-    # use metadata API instead of parsing meta xml manually
-    return get_metadata(itemid)
-
-
-def _get_metadata(itemid):
-    """Returns metadata by querying the archive.org metadata API.
-    """
-    url = "http://www.archive.org/metadata/%s" % itemid
-    try:
-        stats.begin("archive.org", url=url)
-        text = urllib2.urlopen(url).read()
-        stats.end()
-        return simplejson.loads(text)
-    except (IOError, ValueError):
-        return None
-
-# cache the results in memcache for a minute
-_get_metadata = web.memoize(_get_metadata, expires=60)
-
-
 def locate_item(itemid):
     """Returns (hostname, path) for the item.
     """

From 0f2dedfa3ef06ebd24c87003acd7cfa0ff40b81f Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 15:32:06 +1300
Subject: [PATCH 06/16] deprecation warning to one method for getting metadata

---
 openlibrary/core/ia.py            | 12 ++++++++----
 openlibrary/tests/core/test_ia.py | 24 +++++++++---------------
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index eb189a66c18..6de3a7b576c 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -1,14 +1,17 @@
 """Library for interacting with archive.org.
 """
+
+import cache
+import logging
 import os
-import urllib2
-import datetime
 import simplejson
+import urllib2
+import warnings
 import web
-import logging
+
 from infogami import config
 from infogami.utils import stats
-import cache
+
 from openlibrary.utils.dateutil import date_n_days_ago
 
 import six
@@ -57,6 +60,7 @@ def _get_metadata(itemid):
 
 def get_meta_xml(itemid):
     # use metadata API instead of parsing meta xml manually
+    warnings.warn('Deprecated, use ia.get_metadata(itemid) instead.', DeprecationWarning)
     return get_metadata(itemid)
 
 
diff --git a/openlibrary/tests/core/test_ia.py b/openlibrary/tests/core/test_ia.py
index 41b0d9286d1..d58cf2e55f4 100644
--- a/openlibrary/tests/core/test_ia.py
+++ b/openlibrary/tests/core/test_ia.py
@@ -1,16 +1,9 @@
-from openlibrary.core import ia
-
-def test_get_metaxml(monkeypatch, mock_memcache):
-    import StringIO
-    import urllib2
+from StringIO import StringIO
+import urllib2
 
-    metadata_json = None
-    def urlopen(url):
-        return StringIO.StringIO(metadata_json)
-
-    monkeypatch.setattr(urllib2, "urlopen", urlopen)
+from openlibrary.core import ia
 
-    # test with correct xml
+def test_get_metadata(monkeypatch, mock_memcache):
     metadata_json = """{
         "metadata": {
             "title": "Foo",
@@ -19,7 +12,8 @@ def urlopen(url):
         }
     }
     """
-    assert ia.get_meta_xml("foo00bar") == {
+    monkeypatch.setattr(urllib2, "urlopen", lambda url: StringIO(metadata_json))
+    assert ia.get_metadata('foo00bar') == {
         "title": "Foo",
         "identifier": "foo00bar",
         "collection": ["printdisabled", "inlibrary"],
@@ -27,6 +21,6 @@ def urlopen(url):
         "_filenames": []
     }
 
-    # test with metadata errors
-    metadata_json = "{}"
-    assert ia.get_meta_xml("foo02bar") == {}
+def test_get_metadata_empty(monkeypatch, mock_memcache):
+    monkeypatch.setattr(urllib2, "urlopen", lambda url: StringIO("{}"))
+    assert ia.get_metadata('foo02bar') == {}

From 3f5dbd459ed2165a7252b06f234d776d1c6743cd Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 15:54:55 +1300
Subject: [PATCH 07/16] replace deprecated metadata call

---
 openlibrary/plugins/books/dynlinks.py            | 2 +-
 openlibrary/plugins/books/tests/test_dynlinks.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/openlibrary/plugins/books/dynlinks.py b/openlibrary/plugins/books/dynlinks.py
index 6d31c9e22be..cc4e1d6a244 100644
--- a/openlibrary/plugins/books/dynlinks.py
+++ b/openlibrary/plugins/books/dynlinks.py
@@ -364,7 +364,7 @@ def process_result_for_viewapi(result):
 
 
 def get_ia_availability(itemid):
-    collections = ia.get_meta_xml(itemid).get("collection", [])
+    collections = ia.get_metadata(itemid).get('collection', [])
 
     if 'lendinglibrary' in collections or 'inlibrary' in collections:
         return 'borrow'
diff --git a/openlibrary/plugins/books/tests/test_dynlinks.py b/openlibrary/plugins/books/tests/test_dynlinks.py
index fe7f2795962..42e476e6d47 100644
--- a/openlibrary/plugins/books/tests/test_dynlinks.py
+++ b/openlibrary/plugins/books/tests/test_dynlinks.py
@@ -236,7 +236,7 @@ def monkeypatch_ol(monkeypatch):
     mock.default = []
     monkeypatch.setattr(dynlinks, "ol_get_many", mock)
 
-    monkeypatch.setattr(ia, "get_meta_xml", lambda itemid: web.storage())
+    monkeypatch.setattr(ia, "get_metadata", lambda itemid: web.storage())
 
 def test_query_keys(monkeypatch):
     monkeypatch_ol(monkeypatch)

From fdea6497803fb42446e89d9e2b8e8214e97c678e Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 16:11:07 +1300
Subject: [PATCH 08/16] replace deprecated metadata method

---
 openlibrary/mocks/mock_ia.py            | 18 +++++++++---------
 openlibrary/plugins/books/readlinks.py  |  2 +-
 openlibrary/plugins/openlibrary/home.py |  2 +-
 openlibrary/plugins/upstream/models.py  |  6 +++---
 openlibrary/solr/process_stats.py       |  2 +-
 openlibrary/tests/core/test_ia.py       |  4 ++--
 scripts/2011/09/generate_deworks.py     |  2 +-
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/openlibrary/mocks/mock_ia.py b/openlibrary/mocks/mock_ia.py
index c596663030b..032f6322f88 100644
--- a/openlibrary/mocks/mock_ia.py
+++ b/openlibrary/mocks/mock_ia.py
@@ -10,21 +10,21 @@ def mock_ia(request, monkeypatch):
         from openlibrary.core import ia
 
         def test_ia(mock_ia):
-            assert ia.get_meta_xml("foo") == {}
+            assert ia.get_metadata("foo") == {}
 
-            mock_ia.set_meta_xml("foo", {"collection": ["a", "b"]})
-            assert ia.get_meta_xml("foo") == {"collection": ["a", "b"]}
+            mock_ia.set_metadata("foo", {"collection": ["a", "b"]})
+            assert ia.get_metadata("foo") == {"collection": ["a", "b"]}
     """
-    metaxml = {}
+    metadata = {}
 
     class IA:
-        def set_meta_xml(self, itemid, meta):
-            metaxml[itemid] = meta
+        def set_metadata(self, itemid, meta):
+            metadata[itemid] = meta
 
-        def get_meta_xml(self, itemid):
-            return metaxml.get(itemid, {})
+        def get_metadata(self, itemid):
+            return metadata.get(itemid, {})
 
     mock_ia = IA()
-    monkeypatch.setattr(ia, "get_meta_xml", ia.get_meta_xml)
+    monkeypatch.setattr(ia, 'get_metadata', ia.get_metadata)
 
     return mock_ia
diff --git a/openlibrary/plugins/books/readlinks.py b/openlibrary/plugins/books/readlinks.py
index 011bcffe5ca..e9ef54da173 100644
--- a/openlibrary/plugins/books/readlinks.py
+++ b/openlibrary/plugins/books/readlinks.py
@@ -331,7 +331,7 @@ def process(self, req):
         self.wkey_to_iaids = dict((wkey, get_work_iaids(wkey)[:iaid_limit])
                                   for wkey in self.works)
         iaids = sum(self.wkey_to_iaids.values(), [])
-        self.iaid_to_meta = dict((iaid, ia.get_meta_xml(iaid)) for iaid in iaids)
+        self.iaid_to_meta = dict((iaid, ia.get_metadata(iaid)) for iaid in iaids)
 
         def lookup_iaids(iaids):
             step = 10
diff --git a/openlibrary/plugins/openlibrary/home.py b/openlibrary/plugins/openlibrary/home.py
index ff7ce08eae9..d21c079c99c 100644
--- a/openlibrary/plugins/openlibrary/home.py
+++ b/openlibrary/plugins/openlibrary/home.py
@@ -248,7 +248,7 @@ def get_authors(doc):
         d.cover_url = 'https://archive.org/services/img/%s' % d.ocaid
 
     if d.ocaid:
-        collections = ia.get_meta_xml(d.ocaid).get("collection", [])
+        collections = ia.get_metadata(d.ocaid).get('collection', [])
 
         if 'lendinglibrary' in collections or 'inlibrary' in collections:
             d.borrow_url = book.url("/borrow")
diff --git a/openlibrary/plugins/upstream/models.py b/openlibrary/plugins/upstream/models.py
index 3bfe36c5a3a..275639c732b 100644
--- a/openlibrary/plugins/upstream/models.py
+++ b/openlibrary/plugins/upstream/models.py
@@ -148,9 +148,9 @@ def get_ia_meta_fields(self):
         if not self.get('ocaid', None):
             meta = {}
         else:
-            meta = ia.get_meta_xml(self.ocaid)
-            meta.setdefault("external-identifier", [])
-            meta.setdefault("collection", [])
+            meta = ia.get_metadata(self.ocaid)
+            meta.setdefault('external-identifier', [])
+            meta.setdefault('collection', [])
 
         self._ia_meta_fields = meta
         return self._ia_meta_fields
diff --git a/openlibrary/solr/process_stats.py b/openlibrary/solr/process_stats.py
index 64ff3b4c3d4..99994d83e21 100644
--- a/openlibrary/solr/process_stats.py
+++ b/openlibrary/solr/process_stats.py
@@ -76,7 +76,7 @@ def _get_metadata(ia_id):
         if meta:
             meta['collection'] = meta['collection'].split(";")
     else:
-        meta = ia.get_meta_xml(ia_id)
+        meta = ia.get_metadata(ia_id)
     return meta
 
 def preload(entries):
diff --git a/openlibrary/tests/core/test_ia.py b/openlibrary/tests/core/test_ia.py
index d58cf2e55f4..72a6d36f535 100644
--- a/openlibrary/tests/core/test_ia.py
+++ b/openlibrary/tests/core/test_ia.py
@@ -12,7 +12,7 @@ def test_get_metadata(monkeypatch, mock_memcache):
         }
     }
     """
-    monkeypatch.setattr(urllib2, "urlopen", lambda url: StringIO(metadata_json))
+    monkeypatch.setattr(urllib2, 'urlopen', lambda url: StringIO(metadata_json))
     assert ia.get_metadata('foo00bar') == {
         "title": "Foo",
         "identifier": "foo00bar",
@@ -22,5 +22,5 @@ def test_get_metadata(monkeypatch, mock_memcache):
     }
 
 def test_get_metadata_empty(monkeypatch, mock_memcache):
-    monkeypatch.setattr(urllib2, "urlopen", lambda url: StringIO("{}"))
+    monkeypatch.setattr(urllib2, 'urlopen', lambda url: StringIO('{}'))
     assert ia.get_metadata('foo02bar') == {}
diff --git a/scripts/2011/09/generate_deworks.py b/scripts/2011/09/generate_deworks.py
index be6d74e817e..5ee642529e2 100644
--- a/scripts/2011/09/generate_deworks.py
+++ b/scripts/2011/09/generate_deworks.py
@@ -315,7 +315,7 @@ def make_ia_db(editions_dump_file):
             print >> f, ocaid
             """
             if ocaid:
-                metaxml = ia.get_meta_xml(ocaid)
+                metaxml = ia.get_metadata(ocaid)
                 db[ocaid] = simplejson.dumps(metaxml)
             """
     f.close()

From c93d0bb16a5cf746d01114183c047ee0ab0db867 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 16:16:52 +1300
Subject: [PATCH 09/16] remove deprecated def now nothing uses it

---
 openlibrary/core/ia.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 6de3a7b576c..b843c761625 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -6,7 +6,6 @@
 import os
 import simplejson
 import urllib2
-import warnings
 import web
 
 from infogami import config
@@ -58,12 +57,6 @@ def _get_metadata(itemid):
 _get_metadata = web.memoize(_get_metadata, expires=60)
 
 
-def get_meta_xml(itemid):
-    # use metadata API instead of parsing meta xml manually
-    warnings.warn('Deprecated, use ia.get_metadata(itemid) instead.', DeprecationWarning)
-    return get_metadata(itemid)
-
-
 def extract_item_metadata(item_json):
     metadata = process_metadata_dict(item_json.get('metadata', {}))
     if metadata:

From f6f6f49a99c2061be62cce22078e1495b2de6eca Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 16:18:11 +1300
Subject: [PATCH 10/16] fix typo in variable name

---
 openlibrary/core/ia.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index b843c761625..84698d0233e 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -79,11 +79,11 @@ def process_metadata_dict(metadata):
     non-list cases. This function makes sure the known multi-valued fields are
     always lists.
     """
-    mutlivalued = set(['collection', 'external-identifier', 'isbn', 'subject', 'oclc-id'])
+    multivalued = set(['collection', 'external-identifier', 'isbn', 'subject', 'oclc-id'])
     def process_item(k, v):
-        if k in mutlivalued and not isinstance(v, list):
+        if k in multivalued and not isinstance(v, list):
             v = [v]
-        elif k not in mutlivalued and isinstance(v, list):
+        elif k not in multivalued and isinstance(v, list):
             v = v[0]
         return (k, v)
     return dict(process_item(k, v) for k, v in metadata.items() if v)

From 7ffc79f634497bfdc9b1dec786582cb9615d68c1 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 16:48:08 +1300
Subject: [PATCH 11/16] DRY up get_metadata methods

---
 openlibrary/core/ia.py                | 25 +++++++------------------
 openlibrary/plugins/importapi/code.py | 19 ++++++-------------
 2 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 84698d0233e..307875e69fc 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -21,7 +21,9 @@
 VALID_READY_REPUB_STATES = ['4', '19', '20', '22']
 
 
-def get_item_json(itemid):
+def _get_metadata(itemid):
+    """Returns metadata by querying the archive.org metadata API.
+    """
     itemid = web.safestr(itemid.strip())
     url = '%s/metadata/%s' % (IA_BASE_URL, itemid)
     try:
@@ -33,30 +35,17 @@ def get_item_json(itemid):
         stats.end()
         return {}
 
+# cache the results in memcache for a minute
+_get_metadata = web.memoize(_get_metadata, expires=60)
+
 
 def get_metadata(itemid):
-    item_json = get_item_json(itemid)
+    item_json = _get_metadata(itemid)
     return extract_item_metadata(item_json)
 
 get_metadata = cache.memcache_memoize(get_metadata, key_prefix='ia.get_metadata', timeout=5*60)
 
 
-def _get_metadata(itemid):
-    """Returns metadata by querying the archive.org metadata API.
-    """
-    url = '%s/metadata/%s' % (IA_BASE_URL, itemid)
-    try:
-        stats.begin("archive.org", url=url)
-        text = urllib2.urlopen(url).read()
-        stats.end()
-        return simplejson.loads(text)
-    except (IOError, ValueError):
-        return None
-
-# cache the results in memcache for a minute
-_get_metadata = web.memoize(_get_metadata, expires=60)
-
-
 def extract_item_metadata(item_json):
     metadata = process_metadata_dict(item_json.get('metadata', {}))
     if metadata:
diff --git a/openlibrary/plugins/importapi/code.py b/openlibrary/plugins/importapi/code.py
index 9106bafccd8..1cac17f37f4 100644
--- a/openlibrary/plugins/importapi/code.py
+++ b/openlibrary/plugins/importapi/code.py
@@ -206,31 +206,24 @@ def get_subfield(field, id_subfield):
             return json.dumps(result)
 
         # Case 1 - Is this a valid Archive.org item?
-        try:
-            item_json = ia.get_item_json(identifier)
-            item_server = item_json['server']
-            item_path = item_json['dir']
-        except KeyError:
-            return self.error("invalid-ia-identifier", "%s not found" % identifier)
-        metadata = ia.extract_item_metadata(item_json)
+        metadata = ia.get_metadata(identifier)
         if not metadata:
-            return self.error("invalid-ia-identifier")
+            return self.error('invalid-ia-identifier', '%s not found' % identifier)
 
         # Case 2 - Does the item have an openlibrary field specified?
         # The scan operators search OL before loading the book and add the
         # OL key if a match is found. We can trust them and attach the item
         # to that edition.
-        if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"):
+        if metadata.get('mediatype') == 'texts' and metadata.get('openlibrary'):
             edition_data = self.get_ia_record(metadata)
-            edition_data["openlibrary"] = metadata["openlibrary"]
+            edition_data['openlibrary'] = metadata['openlibrary']
             edition_data = self.populate_edition_data(edition_data, identifier)
             return self.load_book(edition_data)
 
         # Case 3 - Can the item be loaded into Open Library?
-        status = ia.get_item_status(identifier, metadata,
-                                    item_server=item_server, item_path=item_path)
+        status = ia.get_item_status(identifier, metadata)
         if status != 'ok':
-            return self.error(status, "Prohibited Item")
+            return self.error(status, 'Prohibited Item %s' % identifier)
 
         # Case 4 - Does this item have a marc record?
         marc_record = self.get_marc_record(identifier)

From 25e40f0f7bd5878cea1ea2a9dcc6029ece846745 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 19:57:16 +1300
Subject: [PATCH 12/16] move ia cover logic to ia module

---
 openlibrary/core/ia.py                |  6 ++++++
 openlibrary/plugins/importapi/code.py | 25 +++++++------------------
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 307875e69fc..80586084af0 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -98,6 +98,12 @@ def edition_from_item_metadata(itemid, metadata):
         return e
 
 
+def get_cover_url(item_id):
+    """Gets the URL of the archive.org item's title (or cover) page.
+    """
+    return '{0}/download/{1}/page/title.jpg'.format(IA_BASE_URL, item_id)
+
+
 def get_item_manifest(item_id, item_server, item_path):
     url = 'https://%s/BookReader/BookReaderJSON.php' % item_server
     url += "?itemPath=%s&itemId=%s&server=%s" % (item_path, item_id, item_server)
diff --git a/openlibrary/plugins/importapi/code.py b/openlibrary/plugins/importapi/code.py
index 1cac17f37f4..99a9debfb7f 100644
--- a/openlibrary/plugins/importapi/code.py
+++ b/openlibrary/plugins/importapi/code.py
@@ -2,15 +2,14 @@
 """
 
 from infogami.plugins.api.code import add_hook
-from infogami import config
+
 from openlibrary.plugins.openlibrary.code import can_write
 from openlibrary.catalog.marc.marc_binary import MarcBinary, MarcException
 from openlibrary.catalog.marc.marc_xml import MarcXml
 from openlibrary.catalog.marc.parse import read_edition
 from openlibrary.catalog import add_book
 from openlibrary.catalog.get_ia import get_marc_record_from_ia, get_from_archive_bulk
-from openlibrary import accounts
-from openlibrary import records
+from openlibrary import accounts, records
 from openlibrary.core import ia
 
 import web
@@ -26,7 +25,6 @@
 from lxml import etree
 import logging
 
-IA_BASE_URL = config.get('ia_base_url')
 MARC_LENGTH_POS = 5
 logger = logging.getLogger('openlibrary.importapi')
 
@@ -226,18 +224,16 @@ def get_subfield(field, id_subfield):
             return self.error(status, 'Prohibited Item %s' % identifier)
 
         # Case 4 - Does this item have a marc record?
-        marc_record = self.get_marc_record(identifier)
+        marc_record = get_marc_record_from_ia(identifier)
         if marc_record:
             self.reject_non_book_marc(marc_record)
             try:
                 edition_data = read_edition(marc_record)
             except MarcException as e:
-                logger.error("failed to read from MARC record %s: %s", identifier, str(e))
-                return self.error("invalid-marc-record")
-
+                logger.error('failed to read from MARC record %s: %s', identifier, str(e))
+                return self.error('invalid-marc-record')
         elif require_marc:
-            return self.error("no-marc-record")
-
+            return self.error('no-marc-record')
         else:
             try:
                 edition_data = self.get_ia_record(metadata)
@@ -246,7 +242,6 @@ def get_subfield(field, id_subfield):
 
         # Add IA specific fields: ocaid, source_records, and cover
         edition_data = self.populate_edition_data(edition_data, identifier)
-
         return self.load_book(edition_data)
 
     def get_ia_record(self, metadata):
@@ -307,15 +302,9 @@ def populate_edition_data(self, edition, identifier):
         """
         edition['ocaid'] = identifier
         edition['source_records'] = 'ia:' + identifier
-        edition['cover'] = '{0}/download/{1}/page/title.jpg'.format(IA_BASE_URL, identifier)
+        edition['cover'] = ia.get_cover_url(identifier)
         return edition
 
-    def get_marc_record(self, identifier):
-        try:
-            return get_marc_record_from_ia(identifier)
-        except IOError:
-            return None
-
     def find_edition(self, identifier):
         """
         Checks if the given identifier has already been imported into OL.

From 52d9765f91423c6a12c238ff68907c0d0b7b0ca9 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 20:21:48 +1300
Subject: [PATCH 13/16] use requests instead of urllib2 for ia module

---
 openlibrary/core/ia.py            | 16 +++++++---------
 openlibrary/tests/core/test_ia.py | 11 ++++-------
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 80586084af0..55c8532b49d 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -4,8 +4,7 @@
 import cache
 import logging
 import os
-import simplejson
-import urllib2
+import requests
 import web
 
 from infogami import config
@@ -28,9 +27,9 @@ def _get_metadata(itemid):
     url = '%s/metadata/%s' % (IA_BASE_URL, itemid)
     try:
         stats.begin('archive.org', url=url)
-        metadata_json = urllib2.urlopen(url).read()
+        metadata = requests.get(url)
         stats.end()
-        return simplejson.loads(metadata_json)
+        return metadata.json()
     except IOError:
         stats.end()
         return {}
@@ -106,12 +105,12 @@ def get_cover_url(item_id):
 
 def get_item_manifest(item_id, item_server, item_path):
     url = 'https://%s/BookReader/BookReaderJSON.php' % item_server
-    url += "?itemPath=%s&itemId=%s&server=%s" % (item_path, item_id, item_server)
+    url += '?itemPath=%s&itemId=%s&server=%s' % (item_path, item_id, item_server)
     try:
-        stats.begin("archive.org", url=url)
-        manifest_json = urllib2.urlopen(url).read()
+        stats.begin('archive.org', url=url)
+        manifest = requests.get(url)
         stats.end()
-        return simplejson.loads(manifest_json)
+        return manifest.json()
     except IOError:
         stats.end()
         return {}
@@ -197,7 +196,6 @@ def get_item_status(cls, itemid, metadata, item_server=None, item_path=None):
         # items with metadata no_ol_import=true will be not imported
         if metadata.get("no_ol_import", '').lower() == 'true':
             return "no-ol-import"
-
         return "ok"
 
     @classmethod
diff --git a/openlibrary/tests/core/test_ia.py b/openlibrary/tests/core/test_ia.py
index 72a6d36f535..13efe3aebc2 100644
--- a/openlibrary/tests/core/test_ia.py
+++ b/openlibrary/tests/core/test_ia.py
@@ -1,18 +1,15 @@
-from StringIO import StringIO
-import urllib2
-
 from openlibrary.core import ia
 
 def test_get_metadata(monkeypatch, mock_memcache):
-    metadata_json = """{
+    metadata = {
         "metadata": {
             "title": "Foo",
             "identifier": "foo00bar",
             "collection": ["printdisabled", "inlibrary"]
         }
     }
-    """
-    monkeypatch.setattr(urllib2, 'urlopen', lambda url: StringIO(metadata_json))
+
+    monkeypatch.setattr(ia, '_get_metadata', lambda _id: metadata)
     assert ia.get_metadata('foo00bar') == {
         "title": "Foo",
         "identifier": "foo00bar",
@@ -22,5 +19,5 @@ def test_get_metadata(monkeypatch, mock_memcache):
     }
 
 def test_get_metadata_empty(monkeypatch, mock_memcache):
-    monkeypatch.setattr(urllib2, 'urlopen', lambda url: StringIO('{}'))
+    monkeypatch.setattr(ia, '_get_metadata', lambda _id: {})
     assert ia.get_metadata('foo02bar') == {}

From 43ee8ac402e7f92d102bfd78a0995cbd83399c81 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 20:43:52 +1300
Subject: [PATCH 14/16] fallback to archive.org cover.jpg if title.jpg does not
 exist

---
 openlibrary/core/ia.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 55c8532b49d..40dc2eb2ba7 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -100,7 +100,11 @@ def edition_from_item_metadata(itemid, metadata):
 def get_cover_url(item_id):
     """Gets the URL of the archive.org item's title (or cover) page.
     """
-    return '{0}/download/{1}/page/title.jpg'.format(IA_BASE_URL, item_id)
+    base_url = '{0}/download/{1}/page/'.format(IA_BASE_URL, item_id)
+    title_response = requests.head(base_url + 'title.jpg', allow_redirects=True)
+    if title_response.status_code == 404:
+        return base_url + 'cover.jpg'
+    return base_url + 'title.jpg'
 
 
 def get_item_manifest(item_id, item_server, item_path):

From b58237abc0136e0a7c99a9a7ea042b88e0028f40 Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Fri, 10 Jan 2020 20:55:10 +1300
Subject: [PATCH 15/16] add TODO for better response codes

---
 openlibrary/plugins/importapi/code.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openlibrary/plugins/importapi/code.py b/openlibrary/plugins/importapi/code.py
index 99a9debfb7f..01e19a85a1f 100644
--- a/openlibrary/plugins/importapi/code.py
+++ b/openlibrary/plugins/importapi/code.py
@@ -118,12 +118,13 @@ def POST(self):
 
         try:
             reply = add_book.load(edition)
+            # TODO: If any records have been created, return a 201, otherwise 200
+            return json.dumps(reply)
         except add_book.RequiredField as e:
             return self.error('missing-required-field', str(e))
-        return json.dumps(reply)
 
     def reject_non_book_marc(self, marc_record, **kwargs):
-        details = "Item rejected"
+        details = 'Item rejected'
         # Is the item a serial instead of a book?
         marc_leaders = marc_record.leader()
         if marc_leaders[7] == 's':

From 06df165003912899a264548f68bb2d6573f14d6c Mon Sep 17 00:00:00 2001
From: Charles Horn <charles.horn@gmail.com>
Date: Sat, 11 Jan 2020 01:39:46 +1300
Subject: [PATCH 16/16] improve import:

---
 openlibrary/core/ia.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openlibrary/core/ia.py b/openlibrary/core/ia.py
index 40dc2eb2ba7..a972c115f36 100644
--- a/openlibrary/core/ia.py
+++ b/openlibrary/core/ia.py
@@ -1,7 +1,6 @@
 """Library for interacting with archive.org.
 """
 
-import cache
 import logging
 import os
 import requests
@@ -10,6 +9,7 @@
 from infogami import config
 from infogami.utils import stats
 
+from openlibrary.core import cache
 from openlibrary.utils.dateutil import date_n_days_ago
 
 import six