# [DEF:backend.src.api.routes.__tests__.test_datasets:Module] # @TIER: STANDARD # @SEMANTICS: datasets, api, tests, pagination, mapping, docs # @PURPOSE: Unit tests for Datasets API endpoints # @LAYER: API # @RELATION: TESTS -> backend.src.api.routes.datasets # @INVARIANT: Endpoint contracts remain stable for success and validation failure paths. import pytest from unittest.mock import MagicMock, patch, AsyncMock from fastapi.testclient import TestClient from src.app import app from src.api.routes.datasets import DatasetsResponse, DatasetDetailResponse from src.dependencies import get_current_user, has_permission, get_config_manager, get_task_manager, get_resource_service, get_mapping_service # Global mock user for get_current_user dependency overrides mock_user = MagicMock() mock_user.username = "testuser" mock_user.roles = [] admin_role = MagicMock() admin_role.name = "Admin" mock_user.roles.append(admin_role) @pytest.fixture(autouse=True) def mock_deps(): config_manager = MagicMock() task_manager = MagicMock() resource_service = MagicMock() mapping_service = MagicMock() app.dependency_overrides[get_config_manager] = lambda: config_manager app.dependency_overrides[get_task_manager] = lambda: task_manager app.dependency_overrides[get_resource_service] = lambda: resource_service app.dependency_overrides[get_mapping_service] = lambda: mapping_service app.dependency_overrides[get_current_user] = lambda: mock_user app.dependency_overrides[has_permission("plugin:migration", "READ")] = lambda: mock_user app.dependency_overrides[has_permission("plugin:migration", "EXECUTE")] = lambda: mock_user app.dependency_overrides[has_permission("plugin:backup", "EXECUTE")] = lambda: mock_user app.dependency_overrides[has_permission("tasks", "READ")] = lambda: mock_user yield { "config": config_manager, "task": task_manager, "resource": resource_service, "mapping": mapping_service } app.dependency_overrides.clear() client = TestClient(app) # [DEF:test_get_datasets_success:Function] # @PURPOSE: Validate successful datasets listing contract for an existing environment. # @TEST: GET /api/datasets returns 200 and valid schema # @PRE: env_id exists # @POST: Response matches DatasetsResponse schema def test_get_datasets_success(mock_deps): # Mock environment mock_env = MagicMock() mock_env.id = "prod" mock_deps["config"].get_environments.return_value = [mock_env] # Mock resource service response mock_deps["resource"].get_datasets_with_status = AsyncMock( return_value=[ { "id": 1, "table_name": "sales_data", "schema": "public", "database": "sales_db", "mapped_fields": {"total": 10, "mapped": 5}, "last_task": {"task_id": "task-1", "status": "SUCCESS"} } ] ) response = client.get("/api/datasets?env_id=prod") assert response.status_code == 200 data = response.json() assert "datasets" in data assert len(data["datasets"]) >= 0 # Validate against Pydantic model DatasetsResponse(**data) # [/DEF:test_get_datasets_success:Function] # [DEF:test_get_datasets_env_not_found:Function] # @TEST: GET /api/datasets returns 404 if env_id missing # @PRE: env_id does not exist # @POST: Returns 404 error def test_get_datasets_env_not_found(mock_deps): mock_deps["config"].get_environments.return_value = [] response = client.get("/api/datasets?env_id=nonexistent") assert response.status_code == 404 assert "Environment not found" in response.json()["detail"] # [/DEF:test_get_datasets_env_not_found:Function] # [DEF:test_get_datasets_invalid_pagination:Function] # @TEST: GET /api/datasets returns 400 for invalid page/page_size # @PRE: page < 1 or page_size > 100 # @POST: Returns 400 error def test_get_datasets_invalid_pagination(mock_deps): mock_env = MagicMock() mock_env.id = "prod" mock_deps["config"].get_environments.return_value = [mock_env] # Invalid page response = client.get("/api/datasets?env_id=prod&page=0") assert response.status_code == 400 assert "Page must be >= 1" in response.json()["detail"] # Invalid page_size (too small) response = client.get("/api/datasets?env_id=prod&page_size=0") assert response.status_code == 400 assert "Page size must be between 1 and 100" in response.json()["detail"] # @TEST_EDGE: page_size > 100 exceeds max response = client.get("/api/datasets?env_id=prod&page_size=101") assert response.status_code == 400 assert "Page size must be between 1 and 100" in response.json()["detail"] # [/DEF:test_get_datasets_invalid_pagination:Function] # [DEF:test_map_columns_success:Function] # @TEST: POST /api/datasets/map-columns creates mapping task # @PRE: Valid env_id, dataset_ids, source_type # @POST: Returns task_id def test_map_columns_success(mock_deps): # Mock environment mock_env = MagicMock() mock_env.id = "prod" mock_deps["config"].get_environments.return_value = [mock_env] # Mock task manager mock_task = MagicMock() mock_task.id = "task-123" mock_deps["task"].create_task = AsyncMock(return_value=mock_task) response = client.post( "/api/datasets/map-columns", json={ "env_id": "prod", "dataset_ids": [1, 2, 3], "source_type": "postgresql" } ) assert response.status_code == 200 data = response.json() assert "task_id" in data # @POST/@SIDE_EFFECT: create_task was called mock_deps["task"].create_task.assert_called_once() # [/DEF:test_map_columns_success:Function] # [DEF:test_map_columns_invalid_source_type:Function] # @TEST: POST /api/datasets/map-columns returns 400 for invalid source_type # @PRE: source_type is not 'postgresql' or 'xlsx' # @POST: Returns 400 error def test_map_columns_invalid_source_type(mock_deps): response = client.post( "/api/datasets/map-columns", json={ "env_id": "prod", "dataset_ids": [1], "source_type": "invalid" } ) assert response.status_code == 400 assert "Source type must be 'postgresql' or 'xlsx'" in response.json()["detail"] # [/DEF:test_map_columns_invalid_source_type:Function] # [DEF:test_generate_docs_success:Function] # @TEST: POST /api/datasets/generate-docs creates doc generation task # @PRE: Valid env_id, dataset_ids, llm_provider # @POST: Returns task_id def test_generate_docs_success(mock_deps): # Mock environment mock_env = MagicMock() mock_env.id = "prod" mock_deps["config"].get_environments.return_value = [mock_env] # Mock task manager mock_task = MagicMock() mock_task.id = "task-456" mock_deps["task"].create_task = AsyncMock(return_value=mock_task) response = client.post( "/api/datasets/generate-docs", json={ "env_id": "prod", "dataset_ids": [1], "llm_provider": "openai" } ) assert response.status_code == 200 data = response.json() assert "task_id" in data # @POST/@SIDE_EFFECT: create_task was called mock_deps["task"].create_task.assert_called_once() # [/DEF:test_generate_docs_success:Function] # [DEF:test_map_columns_empty_ids:Function] # @TEST: POST /api/datasets/map-columns returns 400 for empty dataset_ids # @PRE: dataset_ids is empty # @POST: Returns 400 error def test_map_columns_empty_ids(mock_deps): """@PRE: dataset_ids must be non-empty.""" response = client.post( "/api/datasets/map-columns", json={ "env_id": "prod", "dataset_ids": [], "source_type": "postgresql" } ) assert response.status_code == 400 assert "At least one dataset ID must be provided" in response.json()["detail"] # [/DEF:test_map_columns_empty_ids:Function] # [DEF:test_generate_docs_empty_ids:Function] # @TEST: POST /api/datasets/generate-docs returns 400 for empty dataset_ids # @PRE: dataset_ids is empty # @POST: Returns 400 error def test_generate_docs_empty_ids(mock_deps): """@PRE: dataset_ids must be non-empty.""" response = client.post( "/api/datasets/generate-docs", json={ "env_id": "prod", "dataset_ids": [], "llm_provider": "openai" } ) assert response.status_code == 400 assert "At least one dataset ID must be provided" in response.json()["detail"] # [/DEF:test_generate_docs_empty_ids:Function] # [DEF:test_generate_docs_env_not_found:Function] # @TEST: POST /api/datasets/generate-docs returns 404 for missing env # @PRE: env_id does not exist # @POST: Returns 404 error def test_generate_docs_env_not_found(mock_deps): """@PRE: env_id must be a valid environment.""" mock_deps["config"].get_environments.return_value = [] response = client.post( "/api/datasets/generate-docs", json={ "env_id": "ghost", "dataset_ids": [1], "llm_provider": "openai" } ) assert response.status_code == 404 assert "Environment not found" in response.json()["detail"] # [/DEF:test_generate_docs_env_not_found:Function] # [DEF:test_get_datasets_superset_failure:Function] # @TEST_EDGE: external_superset_failure -> {status: 503} def test_get_datasets_superset_failure(mock_deps): """@TEST_EDGE: external_superset_failure -> {status: 503}""" mock_env = MagicMock() mock_env.id = "bad_conn" mock_deps["config"].get_environments.return_value = [mock_env] mock_deps["task"].get_all_tasks.return_value = [] mock_deps["resource"].get_datasets_with_status = AsyncMock( side_effect=Exception("Connection refused") ) response = client.get("/api/datasets?env_id=bad_conn") assert response.status_code == 503 assert "Failed to fetch datasets" in response.json()["detail"] # [/DEF:test_get_datasets_superset_failure:Function] # [/DEF:backend.src.api.routes.__tests__.test_datasets:Module]