Skip to content

Commit

Permalink
Minor bug fixes - fixed a proxy issue in EsriConnector and data downl…
Browse files Browse the repository at this point in the history
…oading in SmartLinker
  • Loading branch information
Ilkka-LBL committed Nov 26, 2024
1 parent 8f9b615 commit 1468034
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 71 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,17 @@ TODO and future improvements
9. Rework LocalMerger module. It currently isn't fully implemented.


Version 1.1.4
-------------

Bug: Fixed a bug where ``EsriConnector()`` instances would not run the unless proxy was provided or set by the ``ConfigManager()``.
Bug: ``SmartLinker()`` class did not handle cases where no geographic limitations were set and there were two or more tables to download data for.

Version 1.1.3
-------------

Bug: Fixed a bug where SmartLinker() would not initialise.


Version 1.1.2
-------------

Expand Down
6 changes: 5 additions & 1 deletion Consensus/EsriConnector.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,11 @@ def __init__(self, max_retries: int = 10, retry_delay: int = 2, server_type: str
self.services = []
self.service_table = None
config = load_config()
self.proxy = proxy if proxy is not None else config.get('proxies', None).get('https', None)
try:
self.proxy = proxy if proxy is not None else config.get('proxies', None).get('https', None)
except Exception:
print("No proxy found in config file. Using no proxy.")
self.proxy = None

async def field_matching_condition(self, field: Dict[str, str]) -> bool:
"""
Expand Down
28 changes: 17 additions & 11 deletions Consensus/GeocodeMerger.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,23 +373,29 @@ async def geodata(self, selected_path: int = None, retun_all: bool = False, **kw
else:
for enum, pathway in enumerate(chosen_path[1:]):
connecting_column = pathway[1]
string_list = [f'{i}' for i in start_table[connecting_column].unique()]
next_chunks = []
for enum, i in enumerate(range(0, len(string_list), 100)):
print(f"Downloading tranche {i}-{i+100} of connected table {pathway[0]}")
print(f"Total items to download: {len(string_list)}")
string_chunk = string_list[i:i + 100]
where_clause = where_clause_maker(string_chunk, connecting_column)
next_chunk = await self._get_ogp_table(pathway[0], where_clause=where_clause, **kwargs)
next_chunks.append(next_chunk)

next_table = pd.concat(next_chunks)
if self.geographic_areas:
string_list = [f'{i}' for i in start_table[connecting_column].unique()]
next_chunks = []
for enum, i in enumerate(range(0, len(string_list), 100)):
print(f"Downloading tranche {i}-{i+100} of connected table {pathway[0]}")
print(f"Total items to download: {len(string_list)}")
string_chunk = string_list[i:i + 100]
where_clause = where_clause_maker(string_chunk, connecting_column)
next_chunk = await self._get_ogp_table(pathway[0], where_clause=where_clause, **kwargs)
next_chunks.append(next_chunk)

next_table = pd.concat(next_chunks)

else:
next_table = await self._get_ogp_table(pathway[0], **kwargs)

next_table.columns = [col.upper() for col in list(next_table.columns)]
table_downloads['table_name'].append(pathway[0])
table_downloads['download_order'].append(enum + 1)
table_downloads['connected_to_previous_table_by_column'].append(pathway[1])
table_downloads['data'].append(next_table)
start_table = start_table.merge(next_table, on=connecting_column, how='left', suffixes=('', '_DROP')).filter(regex='^(?!.*_DROP)') # always perform left join on the common column (based on its name), add "_DROP" to column names that are duplicated and then filter them out.

start_table = start_table.drop_duplicates()
start_table.dropna(axis='columns', how='all', inplace=True)
if "GEOMETRY" in start_table.columns:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
project = 'Consensus'
copyright = '2024, Ilkka Sipila - Lewisham Council Data Science and Insight team'
author = 'Ilkka Sipila'
release = '1.1.3'
release = '1.1.4'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def package_files(directory):

setup(
name='Consensus',
version='1.1.3',
version='1.1.4',
author='Ilkka Sipila',
author_email='[email protected]',
url='https://ilkka-lbl.github.io/Consensus/',
Expand Down
54 changes: 0 additions & 54 deletions tests/test.py

This file was deleted.

15 changes: 13 additions & 2 deletions tests/test_GeocodeMerger.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ async def test_3_smart_coding(self):
gss.run_graph(starting_column='LAD21CD', ending_column='OA21CD', geographic_areas=['Lewisham'], geographic_area_columns=['LAD21NM']) # the starting and ending columns should end in CD

async def test_4_smart_coding(self):
gss = SmartLinker(server='OGP')
gss = SmartLinker(server='OGP', max_retries=25)
await gss.initialise()
gss.allow_geometry()
gss.run_graph(starting_column='WD22CD', ending_column='LAD22CD', geographic_areas=['Lewisham', 'Southwark'], geographic_area_columns=['LAD22NM']) # the starting and ending columns should end in CD
Expand All @@ -51,7 +51,18 @@ async def test_4_smart_coding(self):
assert gss.fs.chunk_size == 5
assert codes['table_data'][0]['WD22CD'].nunique() == 42

def test_5_geo_helper(self):
async def test_5_smart_coding(self):
gss = SmartLinker(server='OGP', max_retries=25)
await gss.initialise()
gss.allow_geometry()
gss.run_graph(starting_column='LAD21CD', ending_column='OA21CD') # the starting and ending columns should end in CD

codes = await gss.geodata(selected_path=0, chunk_size=2500)
print(codes['table_data'][0])
print(codes['table_data'][0]['OA21CD'].nunique())
assert codes['table_data'][0]['OA21CD'].nunique() == 188880

def test_6_geo_helper(self):
geo_help = GeoHelper(server='OGP')
print(geo_help.available_geographies())
geo_keys = geo_help.geography_keys()
Expand Down

0 comments on commit 1468034

Please sign in to comment.