diff --git a/requirements.txt b/requirements.txt index 43e8f4d8..a4ede22c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ requests pandas tabulate pytest +tableauhyperapi diff --git a/setup.py b/setup.py index be063c60..0e27074c 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ long_description=readme, long_description_content_type='text/markdown', name="tableau_utilities", - version="2.1.16", + version="2.1.2", packages=[ 'tableau_utilities', 'tableau_utilities.general', @@ -26,7 +26,8 @@ 'pyyaml>=6.0,<7.0.0', 'requests>=2.27.1,<3.0.0', 'pandas>=1.4.1,<2.0.0', - 'tabulate>=0.8.9,<1.0.0'], + 'tabulate>=0.8.9,<1.0.0', + 'tableauhyperapi==0.0.18825'], entry_points={ 'console_scripts': [ 'tableau_utilities = tableau_utilities.scripts.cli:main', diff --git a/tableau_utilities/scripts/cli.py b/tableau_utilities/scripts/cli.py index 7f9cb173..df602011 100644 --- a/tableau_utilities/scripts/cli.py +++ b/tableau_utilities/scripts/cli.py @@ -157,6 +157,11 @@ help='The datatype persona of the column. Required for adding a new column') parser_datasource.add_argument('--desc', help='A Tableau column description') parser_datasource.add_argument('--calculation', help='A Tableau calculation') +parser_datasource.add_argument('-E', '--empty_extract', action='store_true', + help='Adds an empty extract to the Datasource if specified.') +parser_datasource.add_argument('-F', '--filter_extract', + help='Deletes data from the extract based on the condition string provided. ' + """E.g. "CREATED_AT" < '1/1/2024'""") parser_datasource.set_defaults(func=datasource) # GENERATE CONFIG diff --git a/tableau_utilities/scripts/datasource.py b/tableau_utilities/scripts/datasource.py index 58a2bd03..b22cb819 100644 --- a/tableau_utilities/scripts/datasource.py +++ b/tableau_utilities/scripts/datasource.py @@ -32,6 +32,8 @@ def datasource(args, server=None): project_name = args.project_name location = args.location enforce_connection = args.enforce_connection + empty_extract = args.empty_extract + filter_extract = args.filter_extract # Folder/Fields Args persona = args.persona @@ -69,6 +71,18 @@ def datasource(args, server=None): datasource_file_name = os.path.basename(datasource_path) ds = Datasource(datasource_path) + # Add an empty .hyper file to the Datasource; Useful for publishing without data + if empty_extract: + ds.empty_extract() + print(f'{color.fg_green}Added empty .hyper extract for {datasource_path}{color.reset}') + # Otherwise, filter the extract if filter_extract string provided + elif filter_extract: + start = time() + print(f'{color.fg_cyan}...Filtering extract data...{color.reset}') + ds.filter_extract(filter_extract) + print(f'{color.fg_green}{symbol.success} (Done in {round(time() - start)} sec) ' + f'Filtered extract data for {datasource_path}{color.reset}') + if save_tds: start = time() print(f'{color.fg_cyan}...Extracting {datasource_file_name}...{color.reset}') @@ -168,11 +182,12 @@ def datasource(args, server=None): ds.connection.update(connection) # Save the datasource if an edit may have happened - if column_name or folder_name or delete or enforce_connection: + if column_name or folder_name or delete or enforce_connection or empty_extract: start = time() + print(f'{color.fg_cyan}...Saving datasource changes...{color.reset}') ds.save() print(f'{color.fg_green}{symbol.success} (Done in {round(time() - start)} sec) ' - f'Saved changes to: {color.fg_yellow}{datasource_path}{color.reset}') + f'Saved datasource changes: {color.fg_yellow}{datasource_path}{color.reset}') if save_tds: start = time() diff --git a/tableau_utilities/tableau_file/tableau_file.py b/tableau_utilities/tableau_file/tableau_file.py index 8bbdaebf..7af3a0ea 100644 --- a/tableau_utilities/tableau_file/tableau_file.py +++ b/tableau_utilities/tableau_file/tableau_file.py @@ -2,9 +2,9 @@ import xml.etree.ElementTree as ET import os import shutil - import xmltodict -from zipfile import ZipFile, ZIP_DEFLATED +from tableauhyperapi import HyperProcess, Connection, Telemetry, CreateMode, TableDefinition, TableName, SqlType +from zipfile import ZipFile import tableau_utilities.tableau_file.tableau_file_objects as tfo from tableau_utilities.general.funcs import transform_tableau_object @@ -302,6 +302,106 @@ def enforce_column(self, column, folder_name=None, remote_name=None): if not found: self.extract.connection.cols.append(extract_col) + def empty_extract(self): + """ Creates an empty extract (.hyper file) for the Tableau file. + If the extract exists, it will be overwritten. """ + # Get relevant paths, and create a temp folder and move the Tableau file into it + temp_folder = os.path.join(self.file_directory, f'__TEMP_{self.file_name}') + extract_folder = os.path.join(temp_folder, 'Data', 'Extracts') + hyper_rel_path = os.path.join('Data', 'Extracts', f'{self.file_name}.hyper') + temp_path = os.path.join(temp_folder, self.file_basename) + tdsx_basename = f'{self.file_name}.tdsx' + tdsx_path = os.path.join(temp_folder, tdsx_basename) + os.makedirs(extract_folder, exist_ok=True) + shutil.move(self.file_path, temp_path) + if self.extension == 'tdsx': + # Unzip the TDS file + with ZipFile(temp_path) as z: + for f in z.filelist: + ext = f.filename.split('.')[-1] + if ext in ['tds', 'twb']: + tds_path = z.extract(member=f, path=temp_folder) + else: + tds_path = temp_path + hyper_path = os.path.join(extract_folder, f'{self.file_name}.hyper') + params = {"default_database_version": "2"} + # Get columns from the metadata + columns = list() + for metadata in self.connection.metadata_records: + if metadata.local_type == 'integer': + columns.append(TableDefinition.Column(metadata.remote_name, SqlType.int())) + elif metadata.local_type == 'real': + columns.append(TableDefinition.Column(metadata.remote_name, SqlType.double())) + elif metadata.local_type == 'string': + columns.append(TableDefinition.Column(metadata.remote_name, SqlType.varchar(metadata.width or 1020))) + elif metadata.local_type == 'boolean': + columns.append(TableDefinition.Column(metadata.remote_name, SqlType.bool())) + elif metadata.local_type == 'datetime': + columns.append(TableDefinition.Column(metadata.remote_name, SqlType.timestamp())) + elif metadata.local_type == 'date': + columns.append(TableDefinition.Column(metadata.remote_name, SqlType.date())) + else: + raise TableauFileError(f'Got unexpected metadata type for hyper table: {metadata.local_type}') + # Create an empty .hyper file based on the metadata of the Tableau file + with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU, parameters=params) as hyper: + with Connection(hyper.endpoint, hyper_path, CreateMode.CREATE_AND_REPLACE) as connection: + # Create an `Extract` table inside an `Extract` schema + connection.catalog.create_schema('Extract') + table = TableDefinition(TableName('Extract', 'Extract'), columns) + connection.catalog.create_table(table) + # Archive the extract with the TDS file + with ZipFile(tdsx_path, 'w') as z: + z.write(tds_path, arcname=os.path.basename(tds_path)) + z.write(hyper_path, arcname=hyper_rel_path) + # Update datasource extract to reference .hyper file + if self.extract: + self.extract.connection.class_name = 'hyper' + self.extract.connection.authentication = 'auth-none' + self.extract.connection.author_locale = 'en_US' + self.extract.connection.extract_engine = None + self.extract.connection.dbname = hyper_rel_path + # Move the tdsx out of the temp_folder and delete temp_folder + self.file_path = os.path.join(self.file_directory, tdsx_basename) + self.file_basename = tdsx_basename + self.extension = 'tdsx' + shutil.move(tdsx_path, self.file_path) + shutil.rmtree(temp_folder, ignore_errors=True) + + def filter_extract(self, delete_condition: str): + """ Filters the data in the extract (.hyper file) for the Tableau file. + + Args: + delete_condition (str): A condition string to add to the WHERE clause of data to delete. + """ + if self.extension != 'tdsx' or not self.has_extract_data: + return None + # Get relevant paths, and create a temp folder and move the Tableau file into it + temp_folder = os.path.join(self.file_directory, f'__TEMP_{self.file_name}') + temp_path = os.path.join(temp_folder, self.file_basename) + os.makedirs(temp_folder, exist_ok=True) + shutil.move(self.file_path, temp_path) + # Unzip the TDS file + unzipped_files = list() + with ZipFile(temp_path) as z: + for f in z.filelist: + ext = f.filename.split('.')[-1] + path = z.extract(member=f, path=temp_folder) + unzipped_files.append(path) + if ext == 'hyper': + hyper_path = path + # Update .hyper file based on the filter condition + with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: + with Connection(hyper.endpoint, hyper_path, CreateMode.NONE) as connection: + connection.execute_command(f'DELETE FROM "Extract"."Extract" WHERE {delete_condition}') + # Archive the extract with the TDS file + with ZipFile(temp_path, 'w') as z: + for file in unzipped_files: + arcname = file.split(temp_folder)[-1] + z.write(file, arcname=arcname) + # Move the tdsx out of the temp_folder and delete temp_folder + shutil.move(temp_path, self.file_path) + shutil.rmtree(temp_folder, ignore_errors=True) + def save(self): """ Save all changes made to each section of the Datasource """ parent = self._root.find('.')