diff --git a/HISTORY.rst b/HISTORY.rst index e0c89430..83c03602 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,12 @@ History ------- +5.8.0 (2024-04-05) +~~~~~~~~~~~~~~~~~~ + +- Adding option to bigmler whizzml subcommand that stores scripts and + libraries as packages in the local file system. + 5.7.0 (2023-12-14) ~~~~~~~~~~~~~~~~~~ diff --git a/bigmler/__init__.py b/bigmler/__init__.py index 98aa24aa..5411dd58 100644 --- a/bigmler/__init__.py +++ b/bigmler/__init__.py @@ -1,2 +1,2 @@ # -*- coding: utf-8 -*- -__version__ = '5.7.0' +__version__ = '5.8.0' diff --git a/bigmler/options/whizzml.py b/bigmler/options/whizzml.py index e093ffc5..6b60206d 100644 --- a/bigmler/options/whizzml.py +++ b/bigmler/options/whizzml.py @@ -43,6 +43,14 @@ def get_whizzml_options(defaults=None): "default": defaults.get('upgrade', False), "help": "Create resource only if it doesn't exist."}, + # export script or library to be stored in a package directory + '--from': { + "action": 'store', + "dest": 'from_id', + "default": defaults.get('from_id', None), + "help": ("Script or library ID to be exported" + " to a package directory")}, + # any imported library's code will be embedded in the script '--embed-libs': { "action": 'store_true', diff --git a/bigmler/whizzml/dispatcher.py b/bigmler/whizzml/dispatcher.py index 8f8d1d39..c68f50a1 100644 --- a/bigmler/whizzml/dispatcher.py +++ b/bigmler/whizzml/dispatcher.py @@ -27,7 +27,7 @@ import bigmler.utils as u -from bigmler.whizzml.package import create_package +from bigmler.whizzml.package import create_package, export_as_package from bigmler.dispatcher import SESSIONS_LOG, clear_log_files from bigmler.command import get_context @@ -57,7 +57,13 @@ def whizzml_dispatcher(args=sys.argv[1:]): # package_dir if command_args.package_dir is not None: command_args.package_dir = os.path.expanduser(command_args.package_dir) - create_package(command_args, api, command, + if command_args.from_id is not None: + # the command can create a local storage of a script and or libraries + # as a package + export_as_package(command_args, api, command, resume=resume) + else: + # or create the resources described in a package + create_package(command_args, api, command, resume=resume) else: sys.exit("You must use the --package-dir flag pointing to the" diff --git a/bigmler/whizzml/package.py b/bigmler/whizzml/package.py index 29e02e1f..5d60df64 100644 --- a/bigmler/whizzml/package.py +++ b/bigmler/whizzml/package.py @@ -54,7 +54,8 @@ WHIZZML_LIBRARY = "library" WHIZZML_RESOURCES = [WHIZZML_LIBRARY, "script"] DFT_CATEGORY = 0 # Miscellaneous - +WHIZZML_ATTRS = ["name", "description", "source_code", "imports", + "inputs", "outputs", "category", "project", "resource"] subcommand_list = [] subcommand_file = None @@ -237,3 +238,136 @@ def create_package(args, api, command_obj, resume=False): args.output_dir = output_dir return whizzml_code return "" + + +def get_package_structure(resource_id, api, package_structure=None): + """Downloads the JSON information of the script or library and stores in + the directory set in api.storage. If the resource imports other resources, + it recursively downloads them. It returns a dictionary describing the + resources downloaded and the relations between them. + """ + if package_structure is None: + package_structure = {"resources": []} + elif resource_id in package_structure["resources"]: + return package_structure + package_structure["resources"].append(resource_id) + + resource_type = bigml.api.get_resource_type(resource_id) + resource = api.getters[resource_type](resource_id) + metadata = {"kind": resource_type} + for attr in WHIZZML_ATTRS: + attr_value = resource["object"].get(attr) + if attr_value: + metadata.update({attr: attr_value}) + + package_structure[resource_id] = metadata + if metadata.get("imports") is not None: + for library_id in metadata.get("imports"): + get_package_structure(library_id, api, package_structure) + return package_structure + + +def export_as_package(args, api, command_obj, resume=False): + """Export the script and/or libraries to the expected file structure of a + package. + + """ + set_subcommand_file(args.output_dir) + if resume: + retrieve_subcommands() + # read the metadata.json information + message = ('Reading the WhizzML resources.........\n') + u.log_message(message, log_file=session_file, + console=args.verbosity) + + package_dir = args.package_dir + os.makedirs(package_dir, exist_ok=True) + output_dir = args.output_dir + package_structure = get_package_structure(args.from_id, api) + write_package(package_structure, args) + + +def write_package(package_structure, args): + """Writes the package information in the user-given folder """ + package_dir = args.package_dir + # write the package information + message = ('Writting the package structure........\n') + u.log_message(message, log_file=session_file, + console=args.verbosity) + + if len(package_structure["resources"]) == 1: + # simple case: only one script or library, no structure + resource_id = package_structure["resources"][0] + write_code(package_structure[resource_id], package_dir) + else: + # complex case: script or library with imports + write_package_folder(package_structure, package_dir) + message = ('Local package created.................\n') + u.log_message(message, log_file=session_file, + console=args.verbosity) + + +def write_package_folder(package_structure, package_dir): + """Creates folders for every script or library and the metadata.json to + describes them + """ + counter = 1 + components = {} + + def write_subfolder(resource_info, counter): + resource_id = resource_info["resource"] + components[resource_id] = "%s_%s" % (resource_info["kind"], counter) + folder = os.path.join(package_dir, components[resource_id]) + os.makedirs(folder, exist_ok=True) + write_code(resource_info, folder) + counter += 1 + return counter + + for resource_id in package_structure["resources"]: + if package_structure[resource_id]["kind"] == WHIZZML_LIBRARY: + counter = write_subfolder(package_structure[resource_id], counter) + + for resource_id in package_structure["resources"]: + metadata = package_structure[resource_id] + if metadata["kind"] != WHIZZML_LIBRARY: + if metadata.get("imports") is not None: + import_folders = [os.path.join("..", components[library_id]) + for library_id in metadata.get("imports")] + metadata["imports"] = import_folders + counter = write_subfolder(metadata, counter) + + first_script = package_structure[package_structure["resources"][0]] + package_info = {"kind": "package", + "components": list(components.values()), + "name": first_script["name"], + "description": first_script["description"]} + with open(os.path.join(package_dir, "metadata.json"), + "wt", encoding="utf-8") as handler: + json.dump(package_info, handler) + + +def write_code(resource_info, package_dir): + """Creates a metadata.json, script.whizzml | library.whizzml and + a README.md file in the package_dir + """ + filename = "%s.whizzml" % resource_info["kind"] + code_file = os.path.join(package_dir, filename) + with open(code_file, "wt", encoding="utf-8") as handler: + handler.write(resource_info["source_code"]) + resource_info["source_code"] = filename + with open(os.path.join(package_dir, "README.md"), + "wt", encoding="utf-8") as handler: + project_info = "" + if resource_info.get("project") is not None: + project_info = " in project %s " % resource_info["project"] + content = "Extracted from %s%s by bigmler" % ( + resource_info["resource"], project_info) + handler.write(content) + del resource_info["resource"] + try: + del resource_info["project"] + except KeyError: + pass + with open(os.path.join(package_dir, "metadata.json"), + "wt", encoding="utf-8") as handler: + json.dump(resource_info, handler) diff --git a/docs/index.rst b/docs/index.rst index 2200acd7..9ff80b01 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3044,6 +3044,34 @@ metadata. ] } +Also, existing scripts or libraries can be downladed and stored in the +user's file system following the structure and conventions needed to be +uploaded again to BigML. By using the option ``--from`` followed by the +script or library ID and the ``--package-dir`` pointing to the storage folder. + +.. code-block:: bash + + bigmler whizzml --package-dir package_bck \ + --from script/5a3ae0f14006833a070003a4 + +If the script is self-contained, the +previous command will create a ``package_bck`` folder where the +corresponding ``metadata.json`` file will store all the attributes, like the +name and description of the script, its inputs and outputs, the kind of +resource (script or library) and a ``source_code`` attribute that will +contain the name of the file where the source code will be placed. + +Other complex scripts (and libraries) may not be self-contained and +will be importing functions defined in WhizzML libraries. +In that case, the ``package_bck`` folder will contain a list +of subdirectories, one per script or imported library. Each subdirectory will +contain the information about either the script or the library as described +in the previous paragraph. The ``--package-dir`` containing folder will in this +case also contain a ``metadata.json`` where the list of subfolders is stored +in its ``components`` attribute so that each of them can be generated and +imported correctly. It also contains the name and description of the +downloaded script and the ``kind`` attribute will be set to ``package``. + .. _bigmler-retrain: diff --git a/setup.py b/setup.py index 8ad87f21..81f553cd 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2012-2017 BigML, Inc +# Copyright 2012-2024 BigML, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -47,7 +47,6 @@ url="https://bigml.com/developers", download_url="https://github.com/bigmlcom/bigmler", license="http://www.apache.org/licenses/LICENSE-2.0", - setup_requires=['nose'], packages=['bigmler', 'bigmler.processing', 'bigmler.analyze', 'bigmler.cluster', 'bigmler.anomaly', 'bigmler.report', 'bigmler.options', 'bigmler.delete', 'bigmler.sample',