Skip to content

Commit

Permalink
Added functionality to Datasource, to add an empty extract, and to fi…
Browse files Browse the repository at this point in the history
…lter the extract
  • Loading branch information
JustinGrilli committed Mar 19, 2024
1 parent 6c71c47 commit 4b01b81
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 6 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ requests
pandas
tabulate
pytest
tableauhyperapi
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
long_description=readme,
long_description_content_type='text/markdown',
name="tableau_utilities",
version="2.1.16",
version="2.1.2",
packages=[
'tableau_utilities',
'tableau_utilities.general',
Expand All @@ -26,7 +26,8 @@
'pyyaml>=6.0,<7.0.0',
'requests>=2.27.1,<3.0.0',
'pandas>=1.4.1,<2.0.0',
'tabulate>=0.8.9,<1.0.0'],
'tabulate>=0.8.9,<1.0.0',
'tableauhyperapi==0.0.18825'],
entry_points={
'console_scripts': [
'tableau_utilities = tableau_utilities.scripts.cli:main',
Expand Down
5 changes: 5 additions & 0 deletions tableau_utilities/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@
help='The datatype persona of the column. Required for adding a new column')
parser_datasource.add_argument('--desc', help='A Tableau column description')
parser_datasource.add_argument('--calculation', help='A Tableau calculation')
parser_datasource.add_argument('-E', '--empty_extract', action='store_true',
help='Adds an empty extract to the Datasource if specified.')
parser_datasource.add_argument('-F', '--filter_extract',
help='Deletes data from the extract based on the condition string provided. '
"""E.g. "CREATED_AT" < '1/1/2024'""")
parser_datasource.set_defaults(func=datasource)

# GENERATE CONFIG
Expand Down
19 changes: 17 additions & 2 deletions tableau_utilities/scripts/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def datasource(args, server=None):
project_name = args.project_name
location = args.location
enforce_connection = args.enforce_connection
empty_extract = args.empty_extract
filter_extract = args.filter_extract

# Folder/Fields Args
persona = args.persona
Expand Down Expand Up @@ -69,6 +71,18 @@ def datasource(args, server=None):
datasource_file_name = os.path.basename(datasource_path)
ds = Datasource(datasource_path)

# Add an empty .hyper file to the Datasource; Useful for publishing without data
if empty_extract:
ds.empty_extract()
print(f'{color.fg_green}Added empty .hyper extract for {datasource_path}{color.reset}')
# Otherwise, filter the extract if filter_extract string provided
elif filter_extract:
start = time()
print(f'{color.fg_cyan}...Filtering extract data...{color.reset}')
ds.filter_extract(filter_extract)
print(f'{color.fg_green}{symbol.success} (Done in {round(time() - start)} sec) '
f'Filtered extract data for {datasource_path}{color.reset}')

if save_tds:
start = time()
print(f'{color.fg_cyan}...Extracting {datasource_file_name}...{color.reset}')
Expand Down Expand Up @@ -168,11 +182,12 @@ def datasource(args, server=None):
ds.connection.update(connection)

# Save the datasource if an edit may have happened
if column_name or folder_name or delete or enforce_connection:
if column_name or folder_name or delete or enforce_connection or empty_extract:
start = time()
print(f'{color.fg_cyan}...Saving datasource changes...{color.reset}')
ds.save()
print(f'{color.fg_green}{symbol.success} (Done in {round(time() - start)} sec) '
f'Saved changes to: {color.fg_yellow}{datasource_path}{color.reset}')
f'Saved datasource changes: {color.fg_yellow}{datasource_path}{color.reset}')

if save_tds:
start = time()
Expand Down
104 changes: 102 additions & 2 deletions tableau_utilities/tableau_file/tableau_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import xml.etree.ElementTree as ET
import os
import shutil

import xmltodict
from zipfile import ZipFile, ZIP_DEFLATED
from tableauhyperapi import HyperProcess, Connection, Telemetry, CreateMode, TableDefinition, TableName, SqlType
from zipfile import ZipFile

import tableau_utilities.tableau_file.tableau_file_objects as tfo
from tableau_utilities.general.funcs import transform_tableau_object
Expand Down Expand Up @@ -302,6 +302,106 @@ def enforce_column(self, column, folder_name=None, remote_name=None):
if not found:
self.extract.connection.cols.append(extract_col)

def empty_extract(self):
""" Creates an empty extract (.hyper file) for the Tableau file.
If the extract exists, it will be overwritten. """
# Get relevant paths, and create a temp folder and move the Tableau file into it
temp_folder = os.path.join(self.file_directory, f'__TEMP_{self.file_name}')
extract_folder = os.path.join(temp_folder, 'Data', 'Extracts')
hyper_rel_path = os.path.join('Data', 'Extracts', f'{self.file_name}.hyper')
temp_path = os.path.join(temp_folder, self.file_basename)
tdsx_basename = f'{self.file_name}.tdsx'
tdsx_path = os.path.join(temp_folder, tdsx_basename)
os.makedirs(extract_folder, exist_ok=True)
shutil.move(self.file_path, temp_path)
if self.extension == 'tdsx':
# Unzip the TDS file
with ZipFile(temp_path) as z:
for f in z.filelist:
ext = f.filename.split('.')[-1]
if ext in ['tds', 'twb']:
tds_path = z.extract(member=f, path=temp_folder)
else:
tds_path = temp_path
hyper_path = os.path.join(extract_folder, f'{self.file_name}.hyper')
params = {"default_database_version": "2"}
# Get columns from the metadata
columns = list()
for metadata in self.connection.metadata_records:
if metadata.local_type == 'integer':
columns.append(TableDefinition.Column(metadata.remote_name, SqlType.int()))
elif metadata.local_type == 'real':
columns.append(TableDefinition.Column(metadata.remote_name, SqlType.double()))
elif metadata.local_type == 'string':
columns.append(TableDefinition.Column(metadata.remote_name, SqlType.varchar(metadata.width or 1020)))
elif metadata.local_type == 'boolean':
columns.append(TableDefinition.Column(metadata.remote_name, SqlType.bool()))
elif metadata.local_type == 'datetime':
columns.append(TableDefinition.Column(metadata.remote_name, SqlType.timestamp()))
elif metadata.local_type == 'date':
columns.append(TableDefinition.Column(metadata.remote_name, SqlType.date()))
else:
raise TableauFileError(f'Got unexpected metadata type for hyper table: {metadata.local_type}')
# Create an empty .hyper file based on the metadata of the Tableau file
with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU, parameters=params) as hyper:
with Connection(hyper.endpoint, hyper_path, CreateMode.CREATE_AND_REPLACE) as connection:
# Create an `Extract` table inside an `Extract` schema
connection.catalog.create_schema('Extract')
table = TableDefinition(TableName('Extract', 'Extract'), columns)
connection.catalog.create_table(table)
# Archive the extract with the TDS file
with ZipFile(tdsx_path, 'w') as z:
z.write(tds_path, arcname=os.path.basename(tds_path))
z.write(hyper_path, arcname=hyper_rel_path)
# Update datasource extract to reference .hyper file
if self.extract:
self.extract.connection.class_name = 'hyper'
self.extract.connection.authentication = 'auth-none'
self.extract.connection.author_locale = 'en_US'
self.extract.connection.extract_engine = None
self.extract.connection.dbname = hyper_rel_path
# Move the tdsx out of the temp_folder and delete temp_folder
self.file_path = os.path.join(self.file_directory, tdsx_basename)
self.file_basename = tdsx_basename
self.extension = 'tdsx'
shutil.move(tdsx_path, self.file_path)
shutil.rmtree(temp_folder, ignore_errors=True)

def filter_extract(self, delete_condition: str):
""" Filters the data in the extract (.hyper file) for the Tableau file.
Args:
delete_condition (str): A condition string to add to the WHERE clause of data to delete.
"""
if self.extension != 'tdsx' or not self.has_extract_data:
return None
# Get relevant paths, and create a temp folder and move the Tableau file into it
temp_folder = os.path.join(self.file_directory, f'__TEMP_{self.file_name}')
temp_path = os.path.join(temp_folder, self.file_basename)
os.makedirs(temp_folder, exist_ok=True)
shutil.move(self.file_path, temp_path)
# Unzip the TDS file
unzipped_files = list()
with ZipFile(temp_path) as z:
for f in z.filelist:
ext = f.filename.split('.')[-1]
path = z.extract(member=f, path=temp_folder)
unzipped_files.append(path)
if ext == 'hyper':
hyper_path = path
# Update .hyper file based on the filter condition
with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(hyper.endpoint, hyper_path, CreateMode.NONE) as connection:
connection.execute_command(f'DELETE FROM "Extract"."Extract" WHERE {delete_condition}')
# Archive the extract with the TDS file
with ZipFile(temp_path, 'w') as z:
for file in unzipped_files:
arcname = file.split(temp_folder)[-1]
z.write(file, arcname=arcname)
# Move the tdsx out of the temp_folder and delete temp_folder
shutil.move(temp_path, self.file_path)
shutil.rmtree(temp_folder, ignore_errors=True)

def save(self):
""" Save all changes made to each section of the Datasource """
parent = self._root.find('.')
Expand Down

0 comments on commit 4b01b81

Please sign in to comment.