diff --git a/README.rst b/README.rst
index 0d9f9a0..0386865 100644
--- a/README.rst
+++ b/README.rst
@@ -263,17 +263,23 @@ datalogistik_metadata.ini
         Schema of the table.
 
     ``url``
-        Download url in case this is a single-file table.
-
-    ``base_url``
-        Base download url in case this is a multi-file table. Each file will append
-        their `rel_path` to this to form the full download url.
+        Download url for the table. This can be: 
+        * A URL specifying the file to be downloaded for that table (which could be a 
+          single file, or a directory that contains many files to be downloaded)
+        * A base URL that is concatenated with ``rel_url_path``s in the ``files`` attribute 
+          if the table is a multi-file table and it is preferable to list out the files
 
     ``files``
         A list of files in this table. Each entry in the list has the following properties:
 
         ``rel_path``
-            Path to the file, relative to the directory of this table.
+            Path to the file(s), relative to the directory of this table. This is the 
+            location on disk in the cache.
+        
+        ``rel_url_path``
+            URL path to the file(s), relative to the directory of this table where it is stored 
+            remotely. This is used only when downloading the file. This is only necesary when a 
+            multi table file has the files that make up the table listed out individually. 
 
         ``file_size``
             Size of the file.
diff --git a/datalogistik/dataset.py b/datalogistik/dataset.py
index 0c6f2a8..f412cae 100644
--- a/datalogistik/dataset.py
+++ b/datalogistik/dataset.py
@@ -357,7 +357,6 @@ def validate_table_files(self, table):
                 f"No metadata found for table {table}, could not perform validation (assuming valid)"
             )
             return True
-
         new_file_listing = self.create_file_listing(table)
         # we can't perform a simple equality check on the whole listing,
         # because the orig_file_listing does not contain the metadata file.
@@ -372,6 +371,8 @@ def validate_table_files(self, table):
             found = None
             for new_file in new_file_listing:
                 if new_file["rel_path"] == orig_file["rel_path"]:
+                    # drop the rel_url_path for comparison, because it's not relevant!
+                    orig_file.pop("rel_url_path", None)
                     found = new_file
                     break
 
@@ -448,7 +449,7 @@ def download(self):
             msg = (
                 "No table entries were found. "
                 "To download a dataset, at least 1 table entry must exist "
-                "that has a 'url' or 'base_url' property."
+                "that has a 'url' property."
             )
             log.error(msg)
             raise ValueError(msg)
@@ -460,51 +461,48 @@ def download(self):
 
             # For now, we always download all tables. So we need to loop through each table
             for table in self.tables:
-
-                # There are 2 possible types downloads:
-                # 1 - The table entry has a url property. Either this table is a single-file, or it is a single
-                # download that will produce multiple files.
-                # 2 - multi file (either single or multi table). The table entry has a base_url property,
-                # and each file has a rel_path property. This is appended to the base_url to form
-                # the download link. The files will be placed in the table directory (generated from the table name).
-
                 # create table dir
                 table_path = self.ensure_table_loc(table)
 
-                # Type 1
-                if table.url:
-                    # Note that table_path will be a file if this is a single-file table,
-                    # and a dir if it is a multi-file table (download_file will produce multiple files)
-                    util.download_file(table.url, output_path=table_path)
-                    util.set_readonly(table_path)
-
-                # Type 2
-                elif table.base_url:
-                    if len(table.files) <= 1:
-                        msg = f"Single-file table '{table.table}' has 'base_url' property set. It should only have a 'url'."
+                for file in table.files:
+                    # Validate that there is a url at all
+                    if not table.url:
+                        msg = (
+                            f"Could not find a url property for Table '{table.table}'."
+                        )
                         log.error(msg)
-                        raise ValueError(msg)
-                    for file in table.files:
-                        # contains the suffix for the download url
-                        rel_path = file.get("rel_path")
-                        if not rel_path:
-                            msg = f"Missing rel_path property for multi-file table '{table.table}'."
+                        raise RuntimeError(msg)
+
+                    # contains the suffix for the download url
+                    rel_url_path = file.get("rel_url_path")
+
+                    # if the filename is not at the end of full_path, join
+                    table_url = table.url
+                    if rel_url_path and not table_url.endswith(rel_url_path):
+                        table_url = table_url + "/" + rel_url_path
+
+                    download_path = table_path
+
+                    # Set the rel_path
+                    file["rel_path"] = download_path.name
+
+                    # if this is a multi file table, then we need to do validate that
+                    # there are rel_paths + append them. All files constituting a table
+                    # must be in a dir with name table.name (created by ensure_table_loc)
+                    # note that the resulting dir structure is not necessarily flat,
+                    # because the table can have multiple levels of partitioning.
+                    if len(table.files) > 1 or table.multi_file:
+                        if not rel_url_path:
+                            msg = f"Missing rel_url_path property for multi-file table '{table.table}'."
                             log.error(msg)
                             raise ValueError(msg)
+                        download_path = download_path / rel_url_path
 
-                        # All files constituting a table must be in a dir with name table.name (created by ensure_table_loc)
-                        # note that the resulting dir structure is not necessarily flat,
-                        # because the table can have multiple levels of partitioning.
-                        download_path = table_path / rel_path
-                        url = table.base_url + "/" + rel_path
-
-                        util.download_file(url, output_path=download_path)
-                        util.set_readonly(download_path)
+                        # but for multi-file tables, we override this with the rel_url_path
+                        file["rel_path"] = rel_url_path
 
-                else:
-                    msg = f"Could not find a url or base_url property for Table '{table.table}'."
-                    log.error(msg)
-                    raise RuntimeError(msg)
+                    util.download_file(table_url, output_path=download_path)
+                    util.set_readonly(download_path)
 
                 # Try validation in case the dataset info contained checksums
                 if not self.validate_table_files(table):
diff --git a/datalogistik/table.py b/datalogistik/table.py
index eb9a0ef..f5b4245 100644
--- a/datalogistik/table.py
+++ b/datalogistik/table.py
@@ -33,4 +33,3 @@ class Table:
     header_line: Optional[bool] = None
     dim: Optional[List] = field(default_factory=list)
     url: Optional[str] = None
-    base_url: Optional[str] = None
diff --git a/repo.json b/repo.json
index b2e30b7..4d7d49c 100644
--- a/repo.json
+++ b/repo.json
@@ -12,7 +12,6 @@
                 "header_line": false,
                 "files": [
                     {
-                        "rel_path": "2016Q4.csv.gz",
                         "file_size": 262125134,
                         "md5": "6e8ff7ae76033c452a9b72350461a4e6"
                     }
@@ -77,7 +76,6 @@
                 ],
                 "files": [
                     {
-                        "rel_path": "nyctaxi_2010-01.csv.gz",
                         "file_size": 591876633,
                         "md5": "22bd86bf54da91faf26fa8a243644a9f"
                     }
@@ -98,7 +96,6 @@
                 ],
                 "files": [
                     {
-                        "rel_path": "chi_traffic_2020_Q1.parquet",
                         "file_size": 182895135,
                         "md5": "b53ea738eda7ee051bcf33b3056b83f6"
                     }
@@ -119,7 +116,6 @@
                 ],
                 "files": [
                     {
-                        "rel_path": "type_strings.parquet",
                         "file_size": 87174822,
                         "md5": "927c0cf481fb3c4870806b096b525d90"
                     }
@@ -140,7 +136,6 @@
                 ],
                 "files": [
                     {
-                        "rel_path": "type_dict.parquet",
                         "file_size": 2890770,
                         "md5": "4186ad2f3072db6558aba3c535da1b97"
                     }
@@ -161,7 +156,6 @@
                 ],
                 "files": [
                     {
-                        "rel_path": "type_integers.parquet",
                         "file_size": 15882666,
                         "md5": "7d44caa83025c9c7ef2ded67081d6d68"
                     }
@@ -182,7 +176,6 @@
                 ],
                 "files": [
                     {
-                        "rel_path": "type_floats.parquet",
                         "file_size": 23851672,
                         "md5": "d083e46ddeff5cc5a3be3ea5c2f7f9d7"
                     }
@@ -203,7 +196,6 @@
                 ],
                 "files": [
                     {
-                        "rel_path": "type_nested.parquet",
                         "file_size": 130538033,
                         "md5": "4bcf6735b00b32fccb6da338262def23"
                     }
@@ -224,7 +216,6 @@
                 ],
                 "files": [
                     {
-                        "rel_path": "type_simple_features.parquet",
                         "file_size": 28637722,
                         "md5": "7776bea2a08e0466f77058676e2bb567"
                     }
@@ -232,4 +223,4 @@
             }
         ]
     }
-]
+]
\ No newline at end of file
diff --git a/tests/fixtures/test_cache/taxi_2013/face7ed/datalogistik_metadata.ini b/tests/fixtures/test_cache/taxi_2013/face7ed/datalogistik_metadata.ini
index 3f47b6b..37e636d 100644
--- a/tests/fixtures/test_cache/taxi_2013/face7ed/datalogistik_metadata.ini
+++ b/tests/fixtures/test_cache/taxi_2013/face7ed/datalogistik_metadata.ini
@@ -7,7 +7,7 @@
     "tables": [
         {
             "table": "taxi_2013",
-            "base_url": "http://www.example.com",
+            "url": "http://www.example.com",
             "files": [
                 {
                     "file_size": 5653,
diff --git a/tests/test_datalogistik.py b/tests/test_datalogistik.py
index d0c3ea1..1597c34 100644
--- a/tests/test_datalogistik.py
+++ b/tests/test_datalogistik.py
@@ -297,9 +297,12 @@ def test_compress(comp_string):
 
 
 # Integration-style tests
+@pytest.mark.skipif(sys.platform == "win32", reason="windows path seperator")
 def test_main(capsys):
     # This should be in the cache already, so no conversion needed
-    exact_dataset = dataset.Dataset(name="fanniemae_sample", format="csv", delim="|")
+    exact_dataset = dataset.Dataset(
+        name="fanniemae_sample", format="csv", delim="|", compression="gzip"
+    )
 
     with pytest.raises(SystemExit) as e:
         datalogistik.main(exact_dataset)
@@ -310,6 +313,11 @@ def test_main(capsys):
     assert captured["name"] == "fanniemae_sample"
     assert captured["format"] == "csv"
     assert isinstance(captured["tables"], dict)
+    # this is the path from the fixtures, if this doesn't match, we've actualy converted and not just found the extant one
+    assert (
+        captured["tables"]["fanniemae_sample"]["path"]
+        == "tests/fixtures/test_cache/fanniemae_sample/a77e575/fanniemae_sample.csv.gz"
+    )
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="windows errors on the cleanup")
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 74b8247..a85c80d 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import json
 import os
 import pathlib
@@ -249,17 +250,71 @@ def test_validate():
     multi_file_ds.tables[0].files[0]["md5"] = orig_md5  # restore for following tests
 
 
-def test_download_dataset(monkeypatch):
+@pytest.mark.parametrize(
+    "table_url,rel_url_path,final_url",
+    [
+        (
+            "https://ursa-qa.s3.amazonaws.com/chitraffic/chi_traffic_2020_Q1.parquet",
+            None,
+            "https://ursa-qa.s3.amazonaws.com/chitraffic/chi_traffic_2020_Q1.parquet",
+        ),
+        # we can append rel_url_path
+        (
+            "https://ursa-qa.s3.amazonaws.com/chitraffic",
+            "chi_traffic_2020_Q1.parquet",
+            "https://ursa-qa.s3.amazonaws.com/chitraffic/chi_traffic_2020_Q1.parquet",
+        ),
+        # we don't duplicate rel_url_path when it's there
+        (
+            "https://ursa-qa.s3.amazonaws.com/chitraffic/chi_traffic_2020_Q1.parquet",
+            "chi_traffic_2020_Q1.parquet",
+            "https://ursa-qa.s3.amazonaws.com/chitraffic/chi_traffic_2020_Q1.parquet",
+        ),
+        # we can have a tablename that is differnet from rel_url_path
+        (
+            "https://ursa-qa.s3.amazonaws.com/chitraffic",
+            "traffic.parquet",
+            "https://ursa-qa.s3.amazonaws.com/chitraffic/traffic.parquet",
+        ),
+    ],
+)
+def test_download_single_dataset(monkeypatch, table_url, rel_url_path, final_url):
     def _fake_download(url, output_path):
-        assert (
-            url
-            == "https://ursa-qa.s3.amazonaws.com/chitraffic/chi_traffic_2020_Q1.parquet"
-        )
+        assert url == final_url
+
         assert output_path == pathlib.Path(
             "tests/fixtures/test_cache/chi_traffic_2020_Q1/raw/chi_traffic_2020_Q1.parquet"
         )
 
     monkeypatch.setattr("datalogistik.util.download_file", _fake_download)
+
+    # We probably should move write_metadata out from download, but let's mock it for now
+    def _fake_write_metadata(self):
+        return True
+
+    monkeypatch.setattr(Dataset, "write_metadata", _fake_write_metadata)
+
+    # This probably also means that set_readonly isn't in the right place, but let's mock it away nonetheless
+    def _fake_set_readonly(path):
+        return True
+
+    monkeypatch.setattr("datalogistik.util.set_readonly", _fake_set_readonly)
+
+    # Don't even set rel_url_path if it's none:
+    if rel_url_path is None:
+        files_dict = {}
+    else:
+        files_dict = {"rel_url_path": rel_url_path}
+
+    ds_variant_available = Dataset(
+        name="chi_traffic_2020_Q1",
+        format="parquet",
+        compression=None,
+        tables=[Table(table="chi_traffic_2020_Q1", url=table_url, files=[files_dict])],
+    )
+    ds_variant_available.download()
+
+    # but errors as well
     ds_variant_not_available = Dataset(
         name="chi_traffic_2020_Q1",
         format="csv",
@@ -268,7 +323,46 @@ def _fake_download(url, output_path):
     with pytest.raises(ValueError):
         ds_variant_not_available.download()
 
-    # download a dataset with wrong checksums, verify that validation fails
+
+def test_multi_file_download_dataset(monkeypatch):
+    def _fake_multi_download(url, output_path):
+        file_numbers = [1, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9]
+        file_number = file_numbers[_fake_multi_download.file_index]
+        assert url == f"http://www.example.com/taxi_2013_{file_number}.csv.gz"
+        assert output_path == pathlib.Path(
+            f"tests/fixtures/test_cache/taxi_2013/face7ed/taxi_2013/taxi_2013_{file_number}.csv.gz"
+        )
+        _fake_multi_download.file_index += 1
+
+    _fake_multi_download.file_index = 0  # init "static variable"
+    monkeypatch.setattr("datalogistik.util.download_file", _fake_multi_download)
+
+    # We probably should move write_metadata out from download, but let's mock it for now
+    def _fake_write_metadata(self):
+        return True
+
+    monkeypatch.setattr(Dataset, "write_metadata", _fake_write_metadata)
+
+    # This probably also means that set_readonly isn't in the right place, but let's mock it away nonetheless
+    def _fake_set_readonly(path):
+        return True
+
+    monkeypatch.setattr("datalogistik.util.set_readonly", _fake_set_readonly)
+
+    # add rel_url_path as if this were coming from a repo file with that.
+    ds = copy.deepcopy(multi_file_ds)
+    files = ds.tables[0].files
+    # add rel_url_path
+    [fl.update({"rel_url_path": fl["rel_path"]}) for fl in files]
+    # remote rel_path
+    [fl.pop("rel_path") for fl in files]
+
+    ds.tables[0].files = files
+
+    ds.download()
+
+
+def test_failed_validation_download_dataset(monkeypatch):
     with tempfile.TemporaryDirectory() as tmpcachedir:
         tmpcachepath = pathlib.Path(tmpcachedir)
 
@@ -298,7 +392,8 @@ def _simulate_download_by_copying(url, output_path):
             # Note that if we were not working in a tmp dir, the dir should have been deleted
             # and the exception message would be: "File integrity check for newly downloaded dataset failed."
 
-    # Test a succeeding download
+
+def test_validated_download_dataset(monkeypatch):
     with tempfile.TemporaryDirectory() as tmpcachedir:
         tmpcachepath = pathlib.Path(tmpcachedir)
         dataset = Dataset.from_json(
@@ -324,20 +419,6 @@ def _simulate_download_by_copying(url, output_path):
             tmp_ds.ensure_table_loc()
         ) == util.calculate_checksum(dataset.ensure_table_loc())
 
-    # multi-file download
-    def _fake_multi_download(url, output_path):
-        file_numbers = [1, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9]
-        file_number = file_numbers[_fake_multi_download.file_index]
-        assert url == f"http://www.example.com/taxi_2013_{file_number}.csv.gz"
-        assert output_path == pathlib.Path(
-            f"tests/fixtures/test_cache/taxi_2013/face7ed/taxi_2013/taxi_2013_{file_number}.csv.gz"
-        )
-        _fake_multi_download.file_index += 1
-
-    _fake_multi_download.file_index = 0  # init "static variable"
-    monkeypatch.setattr("datalogistik.util.download_file", _fake_multi_download)
-    multi_file_ds.download()
-
 
 def test_to_json():
     json_string = simple_parquet_ds.to_json()
@@ -498,21 +579,6 @@ def test_find_dataset():
     assert dataset_search.find_exact_dataset(ds_variant_not_found) is None
 
 
-# def test_find_close_dataset():
-#     ds_variant_not_found = Dataset(
-#         name="chi_traffic_sample", format="csv", compression="gzip"
-#     )
-#     close = find_close_dataset(ds_variant_not_found)
-#     # We prefer Parquet if we have it
-#     assert close.format == "parquet"
-#     assert simple_parquet_ds == close
-
-#     ds_variant_not_found = Dataset(name="nyctaxi_sample", format="parquet")
-#     close = find_close_dataset(ds_variant_not_found)
-#     # But can fall back
-#     assert close.format == "csv"
-
-
 def test_find_close_dataset_sf_mismatch(monkeypatch):
     # Mock the generation, cause all we care about here is that that would be called
     good_return = Dataset(name="tpc-h", scale_factor=10, format="tpc-raw")