fix(llm-validation): accept stepfun multimodal models and return 422 on capability mismatch
This commit is contained in:
@@ -74,6 +74,7 @@ def test_render_prompt_replaces_known_placeholders():
|
||||
def test_is_multimodal_model_detects_known_vision_models():
|
||||
assert is_multimodal_model("gpt-4o") is True
|
||||
assert is_multimodal_model("claude-3-5-sonnet") is True
|
||||
assert is_multimodal_model("stepfun/step-3.5-flash:free", "openrouter") is True
|
||||
assert is_multimodal_model("text-only-model") is False
|
||||
# [/DEF:test_is_multimodal_model_detects_known_vision_models:Function]
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Dict, Any
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
|
||||
# [DEF:DEFAULT_LLM_PROMPTS:Constant]
|
||||
@@ -131,10 +131,21 @@ def normalize_llm_settings(llm_settings: Any) -> Dict[str, Any]:
|
||||
# @PURPOSE: Heuristically determine whether model supports image input required for dashboard validation.
|
||||
# @PRE: model_name may be empty or mixed-case.
|
||||
# @POST: Returns True when model likely supports multimodal input.
|
||||
def is_multimodal_model(model_name: str) -> bool:
|
||||
def is_multimodal_model(model_name: str, provider_type: Optional[str] = None) -> bool:
|
||||
token = (model_name or "").strip().lower()
|
||||
if not token:
|
||||
return False
|
||||
provider = (provider_type or "").strip().lower()
|
||||
text_only_markers = (
|
||||
"text-only",
|
||||
"embedding",
|
||||
"rerank",
|
||||
"whisper",
|
||||
"tts",
|
||||
"transcribe",
|
||||
)
|
||||
if any(marker in token for marker in text_only_markers):
|
||||
return False
|
||||
multimodal_markers = (
|
||||
"gpt-4o",
|
||||
"gpt-4.1",
|
||||
@@ -143,8 +154,21 @@ def is_multimodal_model(model_name: str) -> bool:
|
||||
"gemini",
|
||||
"claude-3",
|
||||
"claude-sonnet-4",
|
||||
"omni",
|
||||
"multimodal",
|
||||
"pixtral",
|
||||
"llava",
|
||||
"internvl",
|
||||
"qwen-vl",
|
||||
"qwen2-vl",
|
||||
"stepfun/step-3.5",
|
||||
)
|
||||
return any(marker in token for marker in multimodal_markers)
|
||||
if any(marker in token for marker in multimodal_markers):
|
||||
return True
|
||||
# OpenRouter model ids are heterogeneous; keep permissive path for known StepFun family.
|
||||
if provider == "openrouter" and token.startswith("stepfun/step-3.5"):
|
||||
return True
|
||||
return False
|
||||
# [/DEF:is_multimodal_model:Function]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user