Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update tasks with new models and apps #1229

Merged
merged 9 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/tasks/src/tasks/depth-estimation/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ const taskData: TaskDataCustom = {
},
{
description: "A robust depth estimation model.",
id: "apple/DepthPro",
id: "apple/DepthPro-hf",
},
],
spaces: [
Expand Down
16 changes: 10 additions & 6 deletions packages/tasks/src/tasks/image-text-to-text/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ const taskData: TaskDataCustom = {
},
{
description: "A screenshot understanding model used to control computers.",
id: "showlab/ShowUI-2B",
id: "microsoft/OmniParser-v2.0",
},
{
description: "Cutting-edge vision language model.",
Expand All @@ -63,12 +63,16 @@ const taskData: TaskDataCustom = {
id: "Qwen/Qwen2.5-VL-7B-Instruct",
},
{
description: "Image-text-to-text model with reasoning capabilities.",
id: "Qwen/QVQ-72B-Preview",
description: "Image-text-to-text model with agentic capabilities.",
id: "microsoft/Magma-8B",
},
{
description: "Strong image-text-to-text model focused on documents.",
id: "stepfun-ai/GOT-OCR2_0",
id: "allenai/olmOCR-7B-0225-preview",
},
{
description: "Small yet strong image-text-to-text model.",
id: "ibm-granite/granite-vision-3.2-2b",
},
],
spaces: [
Expand All @@ -85,8 +89,8 @@ const taskData: TaskDataCustom = {
id: "akhaliq/Molmo-7B-D-0924",
},
{
description: "An image-text-to-text application focused on documents.",
id: "stepfun-ai/GOT_official_online_demo",
description: "Powerful vision language assistant that can understand multiple images.",
id: "HuggingFaceTB/SmolVLM2",
},
{
description: "An application for chatting with an image-text-to-text model.",
Expand Down
4 changes: 4 additions & 0 deletions packages/tasks/src/tasks/keypoint-detection/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ const taskData: TaskDataCustom = {
description: "A robust keypoint detection model.",
id: "magic-leap-community/superpoint",
},
{
description: "A robust keypoint matching model.",
id: "magic-leap-community/superglue_outdoor",
},
{
description: "Strong keypoint detection model used to detect human pose.",
id: "facebook/sapiens-pose-1b",
Expand Down
10 changes: 5 additions & 5 deletions packages/tasks/src/tasks/object-detection/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ const taskData: TaskDataCustom = {
id: "facebook/detr-resnet-50",
},
{
description: "Real-time and accurate object detection model.",
id: "jameslahm/yolov10x",
description: "Accurate object detection model.",
id: "IDEA-Research/dab-detr-resnet-50",
},
{
description: "Fast and accurate object detection model trained on COCO and Object365 datasets.",
id: "PekingU/rtdetr_r18vd_coco_o365",
description: "Fast and accurate object detection model.",
id: "PekingU/rtdetr_v2_r50vd",
},
{
description: "Object detection model for low-lying objects.",
Expand All @@ -70,7 +70,7 @@ const taskData: TaskDataCustom = {
},
{
description: "A cutting-edge object detection application.",
id: "Ultralytics/YOLO11",
id: "sunsmarterjieleaf/yolov12",
},
{
description: "An object tracking, segmentation and inpainting application.",
Expand Down
2 changes: 1 addition & 1 deletion packages/tasks/src/tasks/text-generation/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ const taskData: TaskDataCustom = {
},
{
description: "A very powerful model with reasoning capabilities.",
id: "PowerInfer/SmallThinker-3B-Preview",
id: "simplescaling/s1.1-32B",
},
{
description: "Strong conversational model that supports very long instructions.",
Expand Down
4 changes: 4 additions & 0 deletions packages/tasks/src/tasks/text-to-speech/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ const taskData: TaskDataCustom = {
description: "An application that synthesizes emotional speech for diverse speaker prompts.",
id: "parler-tts/parler-tts-expresso",
},
{
description: "An application that generates podcast episodes.",
id: "ngxson/kokoro-podcast-generator",
},
],
summary:
"Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
Expand Down
6 changes: 5 additions & 1 deletion packages/tasks/src/tasks/text-to-video/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ const taskData: TaskDataCustom = {
description: "A text-to-video model focusing on physics-aware applications like robotics.",
id: "nvidia/Cosmos-1.0-Diffusion-7B-Text2World",
},
{
description: "A robust model for video generation.",
id: "Wan-AI/Wan2.1-T2V-1.3B",
},
],
spaces: [
{
Expand All @@ -86,7 +90,7 @@ const taskData: TaskDataCustom = {
},
{
description: "Consistent video generation application.",
id: "TIGER-Lab/T2V-Turbo-V2",
id: "Wan-AI/Wan2.1",
},
{
description: "A cutting edge video generation application.",
Expand Down
8 changes: 8 additions & 0 deletions packages/tasks/src/tasks/video-text-to-text/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ const taskData: TaskDataCustom = {
description: "Strong video-text-to-text model with reasoning capabilities.",
id: "GoodiesHere/Apollo-LMMs-Apollo-7B-t32",
},
{
description: "Strong video-text-to-text model.",
id: "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
},
],
spaces: [
{
Expand All @@ -56,6 +60,10 @@ const taskData: TaskDataCustom = {
description: "A leaderboard for various video-text-to-text models.",
id: "opencompass/openvlm_video_leaderboard",
},
{
description: "An application to generate highlights from a video.",
id: "HuggingFaceTB/SmolVLM2-HighlightGenerator",
},
],
summary:
"Video-text-to-text models take in a video and a text prompt and output text. These models are also called video-language models.",
Expand Down
4 changes: 4 additions & 0 deletions packages/tasks/src/tasks/zero-shot-classification/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ const taskData: TaskDataCustom = {
description: "Cutting-edge zero-shot multilingual text classification model.",
id: "MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
},
{
description: "Zero-shot text classification model that can be used for topic and sentiment classification.",
id: "knowledgator/gliclass-modern-base-v2.0-init",
},
],
spaces: [],
summary:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ const taskData: TaskDataCustom = {
},
{
description: "Strong zero-shot image classification model.",
id: "google/siglip-so400m-patch14-224",
id: "google/siglip2-base-patch16-224",
},
{
description: "Robust zero-shot image classification model.",
id: "microsoft/LLM2CLIP-EVA02-L-14-336",
id: "intfloat/mmE5-mllama-11b-instruct",
},
{
description: "Powerful zero-shot image classification model supporting 94 languages.",
Expand Down