From b65a81faa41e8bc3052edb8422a6557200ea7737 Mon Sep 17 00:00:00 2001 From: medusa Date: Mon, 30 Jun 2025 08:13:28 +0000 Subject: [PATCH] Update tech_docs/python/FastAPI.md --- tech_docs/python/FastAPI.md | 387 +++++++++++++++++++++++++++++++++++- 1 file changed, 386 insertions(+), 1 deletion(-) diff --git a/tech_docs/python/FastAPI.md b/tech_docs/python/FastAPI.md index f2cc9ce..1377edc 100644 --- a/tech_docs/python/FastAPI.md +++ b/tech_docs/python/FastAPI.md @@ -105,4 +105,389 @@ async def send_notification(email: str, background_tasks: BackgroundTasks): `FastAPI` is designed for building APIs that are easy to build, test, and use, with a focus on speed and type safety. This guide introduces the basics, but FastAPI's capabilities extend to more advanced features like security and authentication, more complex data modeling with Pydantic, WebSockets, GraphQL, and more, catering to modern web application development needs. -FastAPI's design and performance make it an attractive option for high-speed API services, providing a robust foundation for building reliable, efficient web services and applications. \ No newline at end of file +FastAPI's design and performance make it an attractive option for high-speed API services, providing a robust foundation for building reliable, efficient web services and applications. + +--- + +That's an interesting combination for an MVP\! SQLite is excellent for traditional transactional data (e.g., users, settings), while DuckDB shines for analytical queries on local files (e.g., logs, temporary datasets, large CSVs/Parquets). This setup allows you to leverage the strengths of both without the overhead of a full-blown database server. + +Here's a template idea for an MVP using FastAPI, SQLite, and DuckDB, with a clear separation of concerns: + +----- + +### MVP Template: FastAPI with SQLite (Transactional) & DuckDB (Analytical) + +This template focuses on providing core CRUD functionality for simple transactional data using SQLite (via SQLAlchemy for an ORM experience) and analytical capabilities on separate data sources (e.g., CSV, Parquet files) using DuckDB. + +**Key Features:** + + * **FastAPI:** For building the API endpoints. + * **SQLite:** For persistent, transactional data that needs traditional CRUD operations (e.g., user management, small configurations). Managed with SQLAlchemy. + * **DuckDB:** For analytical queries on local files (e.g., processing large datasets, generating reports, temporary data storage). + * **Pydantic:** For data validation and serialization. + * **Dependency Injection:** To manage database connections. + * **Simple Project Structure:** Easy to understand and extend. + +**Assumptions:** + + * You'll likely be dealing with two distinct types of data: + * **"Structured" data:** Stored in SQLite (e.g., `users` table). + * **"Analytical" data:** Stored in files (CSV, Parquet) that DuckDB will query directly. + +**Directory Structure:** + +``` +my-hybrid-fastapi-mvp/ +├── app/ +│ ├── api/ +│ │ ├── endpoints/ +│ │ │ ├── users.py # Endpoints for SQLite data +│ │ │ └── analytics.py # Endpoints for DuckDB queries +│ │ └── __init__.py +│ ├── core/ +│ │ ├── config.py # Application settings +│ │ ├── database_sqlite.py # SQLite engine and session +│ │ ├── database_duckdb.py # DuckDB connection (or connect on demand) +│ │ └── __init__.py +│ ├── models/ +│ │ └── user.py # SQLAlchemy models for SQLite +│ ├── schemas/ +│ │ ├── user.py # Pydantic models for request/response (SQLite) +│ │ └── analytics.py # Pydantic models for analytical results (DuckDB) +│ ├── main.py +│ └── __init__.py +├── data/ +│ ├── my_data.csv # Example analytical data file +│ └── another_data.parquet # Another example analytical data file +├── tests/ +│ └── test_api.py +├── requirements.txt +├── .env # For environment variables +├── README.md +``` + +----- + +**Code Snippets and Explanations:** + +**1. `requirements.txt`:** + +``` +fastapi[all] +uvicorn +sqlalchemy +pydantic +duckdb +``` + +**2. `app/core/config.py`:** + +```python +from pydantic_settings import BaseSettings, SettingsConfigDict + +class Settings(BaseSettings): + PROJECT_NAME: str = "FastAPI Hybrid MVP" + SQLITE_DATABASE_URL: str = "sqlite:///./sql_app.db" # SQLite file path + # DuckDB often works on files directly, so no direct "URL" needed for it + # But you might have a path to a directory where analytical files are stored + ANALYTICS_DATA_PATH: str = "data/" + + model_config = SettingsConfigDict(env_file=".env", extra="ignore") + +settings = Settings() +``` + +**3. `app/core/database_sqlite.py`:** + +```python +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, declarative_base + +from app.core.config import settings + +# SQLite connection string +SQLALCHEMY_DATABASE_URL = settings.SQLITE_DATABASE_URL + +# Create the SQLAlchemy engine +# connect_args={"check_same_thread": False} is needed for SQLite +# if you're using multiple threads (FastAPI runs with multiple workers by default) +engine = create_engine( + SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False} +) + +# Create a SessionLocal class to get database sessions +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +# Base class for SQLAlchemy models +Base = declarative_base() + +# Dependency to get a database session for FastAPI routes +def get_sqlite_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() +``` + +**4. `app/core/database_duckdb.py`:** + +DuckDB is often used directly for queries on files, so you might not need a persistent "connection" in the same way as SQLAlchemy. You'll often open and close connections per request or task, or connect to a persistent `.duckdb` file if you want to store analytical results. + +```python +import duckdb +from app.core.config import settings + +# Option 1: In-memory DuckDB for temporary analysis (data not persisted) +# def get_duckdb_conn(): +# conn = duckdb.connect(database=':memory:') +# try: +# yield conn +# finally: +# conn.close() + +# Option 2: Persistent DuckDB database file (for analytical results) +# This is useful if you want DuckDB to store its own tables/views that persist +DUCKDB_FILE_PATH = "analytics.duckdb" # Name of your persistent DuckDB file + +def get_duckdb_conn(): + conn = duckdb.connect(database=DUCKDB_FILE_PATH) + try: + yield conn + finally: + conn.close() + +# You might also have helper functions to load/query data from files +def query_duckdb_file(conn: duckdb.DuckDBPyConnection, file_path: str, query: str): + full_path = f"{settings.ANALYTICS_DATA_PATH}{file_path}" + # DuckDB can directly query files! + # Example: SELECT * FROM 'data/my_data.csv' + result = conn.execute(f"SELECT * FROM '{full_path}' {query}").fetch_df() + return result + +``` + +**5. `app/models/user.py` (SQLite Model):** + +```python +from sqlalchemy import Column, Integer, String +from app.core.database_sqlite import Base + +class User(Base): + __tablename__ = "users" + + id = Column(Integer, primary_key=True, index=True) + username = Column(String, unique=True, index=True) + email = Column(String, unique=True, index=True) + hashed_password = Column(String) +``` + +**6. `app/schemas/user.py` (Pydantic for SQLite data):** + +```python +from pydantic import BaseModel, EmailStr + +class UserBase(BaseModel): + username: str + email: EmailStr + +class UserCreate(UserBase): + password: str + +class UserResponse(UserBase): + id: int + + class Config: + from_attributes = True # for Pydantic v2+ + # or `orm_mode = True` for Pydantic v1 +``` + +**7. `app/schemas/analytics.py` (Pydantic for DuckDB results):** + +This will vary based on your analytical data. + +```python +from pydantic import BaseModel +from typing import List, Dict, Any + +class AnalyticsQueryRequest(BaseModel): + file_name: str # e.g., "my_data.csv" + sql_query: str # e.g., "SELECT COUNT(*) FROM data" + +class AnalyticsResult(BaseModel): + # This could be a list of dictionaries, or a specific Pydantic model + # if the analytical query results have a known, fixed schema. + # For an MVP, a list of dicts or list of lists is often sufficient. + data: List[Dict[str, Any]] + # Or, if you know the columns: + # results: List[MySpecificAnalyticsRowModel] +``` + +**8. `app/api/endpoints/users.py` (SQLite Endpoints):** + +```python +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.orm import Session +from typing import List + +from app.core.database_sqlite import get_sqlite_db, engine, Base +from app.models.user import User +from app.schemas.user import UserCreate, UserResponse + +router = APIRouter() + +# Create tables on startup +@router.on_event("startup") +def create_db_tables(): + Base.metadata.create_all(bind=engine) + +@router.post("/", response_model=UserResponse, status_code=status.HTTP_201_CREATED) +def create_user(user: UserCreate, db: Session = Depends(get_sqlite_db)): + db_user = db.query(User).filter(User.username == user.username).first() + if db_user: + raise HTTPException(status_code=400, detail="Username already registered") + # In a real app, hash the password! + db_user = User(username=user.username, email=user.email, hashed_password=user.password) + db.add(db_user) + db.commit() + db.refresh(db_user) + return db_user + +@router.get("/", response_model=List[UserResponse]) +def read_users(skip: int = 0, limit: int = 100, db: Session = Depends(get_sqlite_db)): + users = db.query(User).offset(skip).limit(limit).all() + return users + +@router.get("/{user_id}", response_model=UserResponse) +def read_user(user_id: int, db: Session = Depends(get_sqlite_db)): + user = db.query(User).filter(User.id == user_id).first() + if user is None: + raise HTTPException(status_code=404, detail="User not found") + return user +``` + +**9. `app/api/endpoints/analytics.py` (DuckDB Endpoints):** + +```python +from fastapi import APIRouter, Depends, HTTPException +import duckdb +import pandas as pd +from typing import Dict, Any + +from app.core.database_duckdb import get_duckdb_conn, query_duckdb_file +from app.schemas.analytics import AnalyticsQueryRequest, AnalyticsResult + +router = APIRouter() + +@router.post("/query_file", response_model=AnalyticsResult) +def run_analytics_query( + query_request: AnalyticsQueryRequest, + conn: duckdb.DuckDBPyConnection = Depends(get_duckdb_conn) +): + try: + # Construct the full file path. DuckDB can query various file types directly. + # Ensure the file_name is sanitized or validated in a real app to prevent path traversal. + df_result = query_duckdb_file(conn, query_request.file_name, query_request.sql_query) + # Convert DataFrame to list of dictionaries for JSON response + return AnalyticsResult(data=df_result.to_dict(orient="records")) + except duckdb.Error as e: + raise HTTPException(status_code=400, detail=f"DuckDB Query Error: {e}") + except FileNotFoundError: + raise HTTPException(status_code=404, detail=f"File not found: {query_request.file_name}") + except Exception as e: + raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}") + +@router.get("/example_report") +def get_example_report(conn: duckdb.DuckDBPyConnection = Depends(get_duckdb_conn)): + """ + An example of a pre-defined analytical report using DuckDB. + This avoids exposing raw SQL queries directly if you prefer. + """ + try: + # Assuming 'data/my_data.csv' exists and has a 'value' column + df_report = conn.execute("SELECT SUM(value) as total_value, AVG(value) as avg_value FROM 'data/my_data.csv'").fetch_df() + return df_report.to_dict(orient="records")[0] # Return first row as dict + except duckdb.Error as e: + raise HTTPException(status_code=500, detail=f"Report generation error: {e}") + +``` + +**10. `app/main.py`:** + +```python +from fastapi import FastAPI +from app.api.endpoints import users, analytics +from app.core.config import settings + +app = FastAPI( + title=settings.PROJECT_NAME, + description="A FastAPI MVP with SQLite for transactional data and DuckDB for analytical data.", + version="0.1.0" +) + +# Include routers +app.include_router(users.router, prefix="/users", tags=["users"]) +app.include_router(analytics.router, prefix="/analytics", tags=["analytics"]) + +@app.get("/") +async def read_root(): + return {"message": "Welcome to the FastAPI Hybrid MVP!"} +``` + +----- + +**How to Run:** + +1. **Create project directory and files:** Set up the directory structure as shown above. +2. **Install dependencies:** + ```bash + pip install -r requirements.txt + ``` +3. **Create `.env` file:** + ``` + PROJECT_NAME="My Hybrid FastAPI MVP" + SQLITE_DATABASE_URL="sqlite:///./sql_app.db" + ANALYTICS_DATA_PATH="./data/" + ``` +4. **Create `data/` directory and some dummy files:** + * `data/my_data.csv`: + ```csv + id,name,value + 1,A,100 + 2,B,150 + 3,C,200 + 4,A,120 + ``` + * `data/another_data.parquet` (you'd generate this with pandas or similar): + ```python + import pandas as pd + df = pd.DataFrame({'col1': [1, 2, 3], 'col2': ['x', 'y', 'z']}) + df.to_parquet('data/another_data.parquet') + ``` +5. **Run the application:** + ```bash + uvicorn app.main:app --reload + ``` +6. **Access the API:** + * Go to `http://127.0.0.1:8000/docs` in your browser to see the OpenAPI (Swagger UI) documentation. + * Test the `/users` endpoints (these will interact with your `sql_app.db` file). + * Test the `/analytics/query_file` endpoint: + * `file_name`: `my_data.csv` + * `sql_query`: `SELECT name, SUM(value) FROM 'data/my_data.csv' GROUP BY name` (Note the full path might be needed here, or handle relative paths in `query_duckdb_file`) + * Test `/analytics/example_report`. + +----- + +**MVP Considerations and Future Enhancements:** + + * **Error Handling:** Implement more robust error handling for both database types. + * **Security:** For a real application, implement proper password hashing for users (e.g., `passlib` with `bcrypt`). Add authentication (JWT, OAuth2) for API endpoints. + * **Data Validation:** More intricate Pydantic models for analytical results if their schema is known. + * **DuckDB Connection Management:** For very high concurrency analytical queries, you might need a more sophisticated DuckDB connection strategy (though DuckDB is designed to be efficient for many small connections). For simple MVP, connecting per request is usually fine. + * **File Path Validation:** Sanitize or validate `file_name` in analytics endpoints to prevent directory traversal attacks. + * **SQL Injection:** If you allow users to provide raw `sql_query`, this is a significant security risk. For an MVP, if it's for internal use, it might be acceptable, but for public APIs, you should provide predefined analytical queries or a more controlled query builder. The example `run_analytics_query` is vulnerable if `sql_query` comes directly from user input without sanitization. The `example_report` is safer. + * **Testing:** Add unit and integration tests for all endpoints and database interactions. + * **Logging:** Implement basic logging to track requests and errors. + * **Dockerization:** Even for an MVP, a `Dockerfile` can be very helpful for consistent deployment. + +This MVP template provides a solid foundation for your hybrid FastAPI application using SQLite for structured data and DuckDB for powerful, local analytical processing. \ No newline at end of file