fix(llm-validation): accept stepfun multimodal models and return 422 on capability mismatch
This commit is contained in:
@@ -1239,7 +1239,7 @@ async def _dispatch_intent(
|
|||||||
)
|
)
|
||||||
provider = LLMProviderService(db).get_provider(provider_id)
|
provider = LLMProviderService(db).get_provider(provider_id)
|
||||||
provider_model = provider.default_model if provider else ""
|
provider_model = provider.default_model if provider else ""
|
||||||
if not is_multimodal_model(provider_model):
|
if not is_multimodal_model(provider_model, provider.provider_type if provider else None):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=422,
|
status_code=422,
|
||||||
detail=(
|
detail=(
|
||||||
|
|||||||
@@ -83,9 +83,13 @@ async def create_task(
|
|||||||
db_provider = llm_service.get_provider(provider_id)
|
db_provider = llm_service.get_provider(provider_id)
|
||||||
if not db_provider:
|
if not db_provider:
|
||||||
raise ValueError(f"LLM Provider {provider_id} not found")
|
raise ValueError(f"LLM Provider {provider_id} not found")
|
||||||
if request.plugin_id == "llm_dashboard_validation" and not is_multimodal_model(db_provider.default_model):
|
if request.plugin_id == "llm_dashboard_validation" and not is_multimodal_model(
|
||||||
raise ValueError(
|
db_provider.default_model,
|
||||||
"Selected provider model is not multimodal for dashboard validation"
|
db_provider.provider_type,
|
||||||
|
):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
detail="Selected provider model is not multimodal for dashboard validation",
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ class DashboardValidationPlugin(PluginBase):
|
|||||||
llm_log.debug(f" Base URL: {db_provider.base_url}")
|
llm_log.debug(f" Base URL: {db_provider.base_url}")
|
||||||
llm_log.debug(f" Default Model: {db_provider.default_model}")
|
llm_log.debug(f" Default Model: {db_provider.default_model}")
|
||||||
llm_log.debug(f" Is Active: {db_provider.is_active}")
|
llm_log.debug(f" Is Active: {db_provider.is_active}")
|
||||||
if not is_multimodal_model(db_provider.default_model):
|
if not is_multimodal_model(db_provider.default_model, db_provider.provider_type):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Dashboard validation requires a multimodal model (image input support)."
|
"Dashboard validation requires a multimodal model (image input support)."
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ def test_render_prompt_replaces_known_placeholders():
|
|||||||
def test_is_multimodal_model_detects_known_vision_models():
|
def test_is_multimodal_model_detects_known_vision_models():
|
||||||
assert is_multimodal_model("gpt-4o") is True
|
assert is_multimodal_model("gpt-4o") is True
|
||||||
assert is_multimodal_model("claude-3-5-sonnet") is True
|
assert is_multimodal_model("claude-3-5-sonnet") is True
|
||||||
|
assert is_multimodal_model("stepfun/step-3.5-flash:free", "openrouter") is True
|
||||||
assert is_multimodal_model("text-only-model") is False
|
assert is_multimodal_model("text-only-model") is False
|
||||||
# [/DEF:test_is_multimodal_model_detects_known_vision_models:Function]
|
# [/DEF:test_is_multimodal_model_detects_known_vision_models:Function]
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any, Optional
|
||||||
|
|
||||||
|
|
||||||
# [DEF:DEFAULT_LLM_PROMPTS:Constant]
|
# [DEF:DEFAULT_LLM_PROMPTS:Constant]
|
||||||
@@ -131,10 +131,21 @@ def normalize_llm_settings(llm_settings: Any) -> Dict[str, Any]:
|
|||||||
# @PURPOSE: Heuristically determine whether model supports image input required for dashboard validation.
|
# @PURPOSE: Heuristically determine whether model supports image input required for dashboard validation.
|
||||||
# @PRE: model_name may be empty or mixed-case.
|
# @PRE: model_name may be empty or mixed-case.
|
||||||
# @POST: Returns True when model likely supports multimodal input.
|
# @POST: Returns True when model likely supports multimodal input.
|
||||||
def is_multimodal_model(model_name: str) -> bool:
|
def is_multimodal_model(model_name: str, provider_type: Optional[str] = None) -> bool:
|
||||||
token = (model_name or "").strip().lower()
|
token = (model_name or "").strip().lower()
|
||||||
if not token:
|
if not token:
|
||||||
return False
|
return False
|
||||||
|
provider = (provider_type or "").strip().lower()
|
||||||
|
text_only_markers = (
|
||||||
|
"text-only",
|
||||||
|
"embedding",
|
||||||
|
"rerank",
|
||||||
|
"whisper",
|
||||||
|
"tts",
|
||||||
|
"transcribe",
|
||||||
|
)
|
||||||
|
if any(marker in token for marker in text_only_markers):
|
||||||
|
return False
|
||||||
multimodal_markers = (
|
multimodal_markers = (
|
||||||
"gpt-4o",
|
"gpt-4o",
|
||||||
"gpt-4.1",
|
"gpt-4.1",
|
||||||
@@ -143,8 +154,21 @@ def is_multimodal_model(model_name: str) -> bool:
|
|||||||
"gemini",
|
"gemini",
|
||||||
"claude-3",
|
"claude-3",
|
||||||
"claude-sonnet-4",
|
"claude-sonnet-4",
|
||||||
|
"omni",
|
||||||
|
"multimodal",
|
||||||
|
"pixtral",
|
||||||
|
"llava",
|
||||||
|
"internvl",
|
||||||
|
"qwen-vl",
|
||||||
|
"qwen2-vl",
|
||||||
|
"stepfun/step-3.5",
|
||||||
)
|
)
|
||||||
return any(marker in token for marker in multimodal_markers)
|
if any(marker in token for marker in multimodal_markers):
|
||||||
|
return True
|
||||||
|
# OpenRouter model ids are heterogeneous; keep permissive path for known StepFun family.
|
||||||
|
if provider == "openrouter" and token.startswith("stepfun/step-3.5"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
# [/DEF:is_multimodal_model:Function]
|
# [/DEF:is_multimodal_model:Function]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user