Skip to content

Commit

Permalink
Add UI for dataset settings to edit tags of a dataset. (databricks#824)
Browse files Browse the repository at this point in the history
- Move tags up to the top-level dataset settings in python
- Allow the UI to change the tags.
- When some datasets are tagged, show a no-tag for the untagged ones. If
no datasets are tagged, no tag UI is shown.

Demo: https://huggingface.co/spaces/lilacai/nikhil_staging


https://github.com/lilacai/lilac/assets/1100749/d0077f40-fd2b-4d24-b983-e299306a2e7c

Other fixes:
- Move the skeleton text for the row_id query to the bottom so it's not
always at the top.
- Fix some padding in the settings modal that caused focus to be cut
off.
  • Loading branch information
nsthorat authored Nov 6, 2023
1 parent 9b5dc5e commit 6189fc5
Show file tree
Hide file tree
Showing 12 changed files with 168 additions and 42 deletions.
11 changes: 9 additions & 2 deletions lilac/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,22 @@ class DatasetSettings(BaseModel):
ui: Optional[DatasetUISettings] = None
preferred_embedding: Optional[str] = DEFAULT_EMBEDDING
model_config = ConfigDict(extra='forbid')
tags: Optional[list[str]] = PydanticField(
description='A list of tags for the dataset to organize in the UI.', default=[]
)


class DatasetConfig(BaseModel):
"""Configures a dataset with a source and transformations."""

namespace: str = PydanticField(description='The namespace of the dataset.')
name: str = PydanticField(description='The name of the dataset.')
tags: list[str] = PydanticField(
description='A list of tags for the dataset to organize in the UI.', default=[]

# Deprecated.
tags: Optional[list[str]] = PydanticField(
description='[Deprecated] This field is *deprecated* in favor of DatasetSettings.tags and '
'will be removed in a later release.',
default=[],
)

# The source configuration.
Expand Down
6 changes: 3 additions & 3 deletions lilac/db_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ def list_datasets(project_dir: Optional[Union[str, pathlib.Path]] = None) -> lis
continue

dataset_config = get_dataset_config(project_config, namespace, dataset_name)
tags = []
if dataset_config:
tags = dataset_config.tags
tags: list[str] = []
if dataset_config and dataset_config.settings and dataset_config.settings.tags:
tags = dataset_config.settings.tags

dataset_infos.append(DatasetInfo(namespace=namespace, dataset_name=dataset_name, tags=tags))

Expand Down
20 changes: 10 additions & 10 deletions lilac_hf_space.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
datasets:
- name: imdb
namespace: lilac
tags: [machine-learning]
settings:
tags: [machine-learning]
ui:
media_paths:
- text
Expand All @@ -13,8 +13,8 @@ datasets:

- name: open-asssistant-conversations
namespace: lilac
tags: [machine-learning]
settings:
tags: [machine-learning]
ui:
media_paths:
- text
Expand All @@ -25,8 +25,8 @@ datasets:

- name: wikitext-2-raw-v1
namespace: lilac
tags: [machine-learning]
settings:
tags: [machine-learning]
ui:
media_paths:
- text
Expand All @@ -39,11 +39,11 @@ datasets:

- name: textbook_quality_programming
namespace: lilac
tags: [machine-learning]
source:
dataset_name: vikp/textbook_quality_programming
source_name: huggingface
settings:
tags: [machine-learning]
ui:
media_paths:
- - outline
Expand All @@ -57,8 +57,8 @@ datasets:

- name: databricks-dolly-15k-curated-en
namespace: lilac
tags: [machine-learning]
settings:
tags: [machine-learning]
ui:
media_paths:
- original-instruction
Expand All @@ -80,8 +80,8 @@ datasets:

- name: 'OpenOrca-100k'
namespace: lilac
tags: [machine-learning]
settings:
tags: [machine-learning]
ui:
media_paths:
- question
Expand All @@ -94,8 +94,8 @@ datasets:

- name: opus100-en-es-validation
namespace: lilac
tags: [machine-learning]
settings:
tags: [machine-learning]
ui:
media_paths:
- [translation, es]
Expand All @@ -110,8 +110,8 @@ datasets:
# Science datasets
- name: science-qa-derek-thomas
namespace: lilac
tags: [science]
settings:
tags: [science]
ui:
media_paths: [lecture]
preferred_embedding: 'gte-small'
Expand All @@ -122,8 +122,8 @@ datasets:
# Business datasets.
- name: enron-emails
namespace: lilac
tags: [business]
settings:
tags: [business]
ui:
media_paths: [text]
preferred_embedding: 'gte-small'
Expand All @@ -136,8 +136,8 @@ datasets:
# Other datasets.
- name: the_movies_dataset
namespace: lilac
tags: [other]
settings:
tags: [other]
ui:
media_paths: [overview]
preferred_embedding: 'gte-small'
Expand Down
47 changes: 30 additions & 17 deletions scripts/deploy_staging.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
files.
--skip_data_upload: When true, only uploads the wheel + ts files without any other changes.
--create_space: When true, creates the space if it doesn't exist.
"""

import os
Expand All @@ -27,7 +28,6 @@

import click
from huggingface_hub import CommitOperationAdd, CommitOperationDelete, HfApi

from lilac.deploy import PY_DIST_DIR, deploy_project_operations
from lilac.env import env
from lilac.utils import log
Expand All @@ -38,51 +38,60 @@
'--hf_space',
help='The huggingface space. Defaults to env.HF_STAGING_DEMO_REPO. '
'Should be formatted like `SPACE_ORG/SPACE_NAME`.',
type=str)
type=str,
)
@click.option('--dataset', help='The name of a dataset to upload', type=str, multiple=True)
@click.option(
'--concept',
help='The name of a concept to upload. By default all lilac/ concepts are uploaded.',
type=str,
multiple=True)
multiple=True,
)
@click.option(
'--skip_ts_build',
help='Skip building the web server TypeScript. '
'Useful to speed up the build if you are only changing python or data.',
type=bool,
is_flag=True,
default=False)
default=False,
)
@click.option(
'--skip_cache_upload',
help='Skip uploading the cache files from .cache/lilac which contain cached concept pkl models.',
type=bool,
is_flag=True,
default=False)
default=False,
)
@click.option(
'--skip_ts_build',
help='Skip building the web server TypeScript. '
'Useful to speed up the build if you are only changing python or data.',
type=bool,
is_flag=True,
default=False)
default=False,
)
@click.option(
'--create_space',
help='When True, creates the HuggingFace space if it doesnt exist. The space will be created '
'with the storage type defined by --hf_space_storage.',
is_flag=True,
default=False)
default=False,
)
@click.option(
'--skip_data_upload',
help='When true, only uploads the wheel files without any other changes.',
is_flag=True,
default=False)
def deploy_staging(hf_space: Optional[str] = None,
dataset: Optional[list[str]] = None,
concept: Optional[list[str]] = None,
skip_cache_upload: Optional[bool] = False,
skip_ts_build: Optional[bool] = False,
skip_data_upload: Optional[bool] = False,
create_space: Optional[bool] = False) -> None:
default=False,
)
def deploy_staging(
hf_space: Optional[str] = None,
dataset: Optional[list[str]] = None,
concept: Optional[list[str]] = None,
skip_cache_upload: Optional[bool] = False,
skip_ts_build: Optional[bool] = False,
skip_data_upload: Optional[bool] = False,
create_space: Optional[bool] = False,
) -> None:
"""Generate the huggingface space app."""
hf_space = hf_space or env('HF_STAGING_DEMO_REPO')
if not hf_space:
Expand Down Expand Up @@ -119,7 +128,9 @@ def deploy_staging(hf_space: Optional[str] = None,
make_datasets_public=False,
skip_data_upload=skip_data_upload,
hf_space_storage=None,
create_space=create_space))
create_space=create_space,
)
)

# Unconditionally remove dist. dist is unconditionally uploaded so it is empty when using
# the public package.
Expand All @@ -143,7 +154,9 @@ def deploy_staging(hf_space: Optional[str] = None,
operations.append(
CommitOperationAdd(
path_in_repo=os.path.join(PY_DIST_DIR, upload_file),
path_or_fileobj=os.path.join(PY_DIST_DIR, upload_file)))
path_or_fileobj=os.path.join(PY_DIST_DIR, upload_file),
)
)

# Atomically commit all the operations so we don't kick the server multiple times.
hf_api.create_commit(
Expand Down
10 changes: 8 additions & 2 deletions web/blueprint/src/lib/components/NavigationGroup.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
export let isFetching: boolean;
export let tagGroups: NavigationTagGroup[];
export let expanded = true;
$: hasTags = tagGroups.some(({tag}) => tag != '');
</script>

<div class="my-1 w-full px-1">
Expand All @@ -51,13 +53,17 @@
{:else}
<div class="mt-1">
{#each tagGroups as { tag, groups }}
{#if tag != ''}
{#if hasTags}
<div
class="flex flex-row justify-between pl-3
text-sm opacity-80"
>
<div class="py-1 text-xs">
<Tag type="purple" size="sm">{tag}</Tag>
{#if tag != ''}
<Tag type="purple" size="sm">{tag}</Tag>
{:else}
<Tag type="cool-gray" size="sm">no tag</Tag>
{/if}
</div>
</div>
{/if}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<button
data-active={item === _item.value}
on:click={() => dispatch('select', _item.value)}
class="mb-1 flex items-center justify-between"
class="my-1 flex items-center justify-between"
>
{_item.title}
{#if _item.tag} <Tag size="sm" type={_item.tag.type}>{_item.tag.value}</Tag> {/if}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
$: {
if (selectedMediaFields != null) {
settings = {
...settings,
ui: {
media_paths: selectedMediaFields.map(f => f.path),
markdown_paths: markdownMediaFields?.map(f => f.path)
Expand Down
Loading

0 comments on commit 6189fc5

Please sign in to comment.