fix(llm-validation): accept stepfun multimodal models and return 422 on capability mismatch
This commit is contained in:
@@ -1239,7 +1239,7 @@ async def _dispatch_intent(
|
||||
)
|
||||
provider = LLMProviderService(db).get_provider(provider_id)
|
||||
provider_model = provider.default_model if provider else ""
|
||||
if not is_multimodal_model(provider_model):
|
||||
if not is_multimodal_model(provider_model, provider.provider_type if provider else None):
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=(
|
||||
|
||||
@@ -83,9 +83,13 @@ async def create_task(
|
||||
db_provider = llm_service.get_provider(provider_id)
|
||||
if not db_provider:
|
||||
raise ValueError(f"LLM Provider {provider_id} not found")
|
||||
if request.plugin_id == "llm_dashboard_validation" and not is_multimodal_model(db_provider.default_model):
|
||||
raise ValueError(
|
||||
"Selected provider model is not multimodal for dashboard validation"
|
||||
if request.plugin_id == "llm_dashboard_validation" and not is_multimodal_model(
|
||||
db_provider.default_model,
|
||||
db_provider.provider_type,
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
detail="Selected provider model is not multimodal for dashboard validation",
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -109,7 +109,7 @@ class DashboardValidationPlugin(PluginBase):
|
||||
llm_log.debug(f" Base URL: {db_provider.base_url}")
|
||||
llm_log.debug(f" Default Model: {db_provider.default_model}")
|
||||
llm_log.debug(f" Is Active: {db_provider.is_active}")
|
||||
if not is_multimodal_model(db_provider.default_model):
|
||||
if not is_multimodal_model(db_provider.default_model, db_provider.provider_type):
|
||||
raise ValueError(
|
||||
"Dashboard validation requires a multimodal model (image input support)."
|
||||
)
|
||||
|
||||
@@ -74,6 +74,7 @@ def test_render_prompt_replaces_known_placeholders():
|
||||
def test_is_multimodal_model_detects_known_vision_models():
|
||||
assert is_multimodal_model("gpt-4o") is True
|
||||
assert is_multimodal_model("claude-3-5-sonnet") is True
|
||||
assert is_multimodal_model("stepfun/step-3.5-flash:free", "openrouter") is True
|
||||
assert is_multimodal_model("text-only-model") is False
|
||||
# [/DEF:test_is_multimodal_model_detects_known_vision_models:Function]
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Dict, Any
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
|
||||
# [DEF:DEFAULT_LLM_PROMPTS:Constant]
|
||||
@@ -131,10 +131,21 @@ def normalize_llm_settings(llm_settings: Any) -> Dict[str, Any]:
|
||||
# @PURPOSE: Heuristically determine whether model supports image input required for dashboard validation.
|
||||
# @PRE: model_name may be empty or mixed-case.
|
||||
# @POST: Returns True when model likely supports multimodal input.
|
||||
def is_multimodal_model(model_name: str) -> bool:
|
||||
def is_multimodal_model(model_name: str, provider_type: Optional[str] = None) -> bool:
|
||||
token = (model_name or "").strip().lower()
|
||||
if not token:
|
||||
return False
|
||||
provider = (provider_type or "").strip().lower()
|
||||
text_only_markers = (
|
||||
"text-only",
|
||||
"embedding",
|
||||
"rerank",
|
||||
"whisper",
|
||||
"tts",
|
||||
"transcribe",
|
||||
)
|
||||
if any(marker in token for marker in text_only_markers):
|
||||
return False
|
||||
multimodal_markers = (
|
||||
"gpt-4o",
|
||||
"gpt-4.1",
|
||||
@@ -143,8 +154,21 @@ def is_multimodal_model(model_name: str) -> bool:
|
||||
"gemini",
|
||||
"claude-3",
|
||||
"claude-sonnet-4",
|
||||
"omni",
|
||||
"multimodal",
|
||||
"pixtral",
|
||||
"llava",
|
||||
"internvl",
|
||||
"qwen-vl",
|
||||
"qwen2-vl",
|
||||
"stepfun/step-3.5",
|
||||
)
|
||||
return any(marker in token for marker in multimodal_markers)
|
||||
if any(marker in token for marker in multimodal_markers):
|
||||
return True
|
||||
# OpenRouter model ids are heterogeneous; keep permissive path for known StepFun family.
|
||||
if provider == "openrouter" and token.startswith("stepfun/step-3.5"):
|
||||
return True
|
||||
return False
|
||||
# [/DEF:is_multimodal_model:Function]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user