Skip to content

Commit

Permalink
reduce chunkstore memory footprint
Browse files Browse the repository at this point in the history
  • Loading branch information
TomTaylorLondon committed Apr 19, 2019
1 parent 57e110b commit d4ccf47
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 5 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
### 1.75
* Bugfix: pypandoc not rendering README correctly for PYPI
* Bugfix: #744 get_info on an empty dataframe raises an exception
* Feature: Chunkstore: Removed duplication error when filtering by columns
* Feature: Chunkstore: Reduced memory footprint when reading data

### 1.74 (2019-02-28)
* Bugfix: #712 Pandas deprecation warning in chunkstore serializer
Expand Down
1 change: 1 addition & 0 deletions arctic/chunkstore/chunkstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def read(self, symbol, chunk_range=None, filter_data=True, **kwargs):
chunks[segments[0][SYMBOL]].append({DATA: chunk_data, METADATA: mdata})

skip_filter = not filter_data or chunk_range is None
kwargs['inplace'] = kwargs.get('inplace', True)

if len(symbol) > 1:
return {sym: deser(chunks[sym], **kwargs) if skip_filter else chunker.filter(deser(chunks[sym], **kwargs), chunk_range) for sym in symbol}
Expand Down
17 changes: 12 additions & 5 deletions arctic/serialization/numpy_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def serialize(self, df):
ret[METADATA][TYPE] = dtype
return ret

def deserialize(self, data, columns=None):
def deserialize(self, data, columns=None, inplace=False):
"""
Deserializes SON to a DataFrame
Expand All @@ -203,13 +203,17 @@ def deserialize(self, data, columns=None):
columns: None, or list of strings
optionally you can deserialize a subset of the data in the SON. Index
columns are ALWAYS deserialized, and should not be specified
inplace: Convert and remove items from data in-place
this will modify data
Returns
-------
pandas dataframe or series
"""
if not data:
return pd.DataFrame()
if not inplace:
data = data[:]

meta = data[0][METADATA] if isinstance(data, list) else data[METADATA]
index = INDEX in meta
Expand All @@ -218,16 +222,19 @@ def deserialize(self, data, columns=None):
if index:
columns = columns[:]
columns.extend(meta[INDEX])
if len(columns) > len(set(columns)):
raise Exception("Duplicate columns specified, cannot de-serialize")
columns = list(set(columns))

if not isinstance(data, list):
df = self.converter.objify(data, columns)
else:
df = pd.concat([self.converter.objify(d, columns) for d in data], ignore_index=not index)
dfs = []
while len(data):
dfs.append(self.converter.objify(data.pop(0), columns))
df = pd.concat(dfs, ignore_index=not index)
del dfs

if index:
df = df.set_index(meta[INDEX])
df = df.set_index(meta[INDEX], inplace=True)
if meta[TYPE] == 'series':
return df[df.columns[0]]
return df
Expand Down

0 comments on commit d4ccf47

Please sign in to comment.