mitlist/be/app/api/v1/endpoints/ocr.py
2025-05-07 20:16:16 +02:00

96 lines
3.8 KiB
Python

# app/api/v1/endpoints/ocr.py
import logging
from typing import List
from fastapi import APIRouter, Depends, UploadFile, File
from google.api_core import exceptions as google_exceptions
from app.api.dependencies import get_current_user
from app.models import User as UserModel
from app.schemas.ocr import OcrExtractResponse
from app.core.gemini import extract_items_from_image_gemini, gemini_initialization_error
from app.core.exceptions import (
OcrServiceUnavailableError,
InvalidFileTypeError,
FileTooLargeError,
OcrProcessingError,
OcrQuotaExceededError
)
logger = logging.getLogger(__name__)
router = APIRouter()
# Allowed image MIME types
ALLOWED_IMAGE_TYPES = ["image/jpeg", "image/png", "image/webp"]
MAX_FILE_SIZE_MB = 10
@router.post(
"/extract-items",
response_model=OcrExtractResponse,
summary="Extract List Items via OCR (Gemini)",
tags=["OCR"]
)
async def ocr_extract_items(
current_user: UserModel = Depends(get_current_user),
image_file: UploadFile = File(..., description="Image file (JPEG, PNG, WEBP) of the shopping list or receipt."),
):
"""
Accepts an image upload, sends it to Gemini Flash with a prompt
to extract shopping list items, and returns the parsed items.
"""
# Check if Gemini client initialized correctly
if gemini_initialization_error:
logger.error("OCR endpoint called but Gemini client failed to initialize.")
raise OcrServiceUnavailableError(gemini_initialization_error)
logger.info(f"User {current_user.email} uploading image '{image_file.filename}' for OCR extraction.")
# --- File Validation ---
if image_file.content_type not in ALLOWED_IMAGE_TYPES:
logger.warning(f"Invalid file type uploaded by {current_user.email}: {image_file.content_type}")
raise InvalidFileTypeError(ALLOWED_IMAGE_TYPES)
# Simple size check
contents = await image_file.read()
if len(contents) > MAX_FILE_SIZE_MB * 1024 * 1024:
logger.warning(f"File too large uploaded by {current_user.email}: {len(contents)} bytes")
raise FileTooLargeError(MAX_FILE_SIZE_MB)
try:
# Call the Gemini helper function
extracted_items = await extract_items_from_image_gemini(
image_bytes=contents,
mime_type=image_file.content_type
)
logger.info(f"Successfully extracted {len(extracted_items)} items for user {current_user.email}.")
return OcrExtractResponse(extracted_items=extracted_items)
except ValueError as e:
# Handle errors from Gemini processing (blocked, empty response, etc.)
logger.warning(f"Gemini processing error for user {current_user.email}: {e}")
raise OcrProcessingError(str(e))
except google_exceptions.ResourceExhausted as e:
# Specific handling for quota errors
logger.error(f"Gemini Quota Exceeded for user {current_user.email}: {e}", exc_info=True)
raise OcrQuotaExceededError()
except google_exceptions.GoogleAPIError as e:
# Handle other Google API errors (e.g., invalid key, permissions)
logger.error(f"Gemini API Error for user {current_user.email}: {e}", exc_info=True)
raise OcrServiceUnavailableError(str(e))
except RuntimeError as e:
# Catch initialization errors from get_gemini_client()
logger.error(f"Gemini client runtime error during OCR request: {e}")
raise OcrServiceUnavailableError(f"OCR service configuration error: {e}")
except Exception as e:
# Catch any other unexpected errors
logger.exception(f"Unexpected error during OCR extraction for user {current_user.email}: {e}")
raise OcrServiceUnavailableError("An unexpected error occurred during item extraction.")
finally:
# Ensure file handle is closed
await image_file.close()