import io import logging import os import re import logfire from fastapi import Depends, FastAPI, Header, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel, Field from weasyprint import HTML from weasyprint.text.fonts import FontConfiguration # Initialize logging logfire.configure() logging.basicConfig(level=logging.INFO) logging.basicConfig(handlers=[logfire.LogfireLoggingHandler()]) logger = logging.getLogger("weasyprint") logger.handlers.clear() logger.setLevel(logging.DEBUG) logger.addHandler(logfire.LogfireLoggingHandler()) # Load secret from environment variable SECRET_KEY = os.getenv("SECRET_KEY") if not SECRET_KEY: raise RuntimeError("SECRET_KEY environment variable is not set") class PdfRequest(BaseModel): html: str = Field(..., min_length=1, description="HTML content to convert to PDF") filename: str | None = None app = FastAPI() logfire.instrument_fastapi(app) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Compile the regular expression once FILENAME_SANITIZE_REGEX = re.compile(r"[^a-zA-Z0-9_\-]") def sanitize_filename(filename: str) -> str: """ Sanitize the filename by replacing invalid characters with underscores. Args: filename (str): The original filename. Returns: str: The sanitized filename. """ return FILENAME_SANITIZE_REGEX.sub("_", filename) async def pdf_generator(byte_string: bytes): """ Generator to yield PDF content in chunks. Args: byte_string (bytes): The PDF content as bytes. Yields: bytes: Chunks of the PDF content. """ byte_stream = io.BytesIO(byte_string) chunk = byte_stream.read(4096) while chunk: yield chunk chunk = byte_stream.read(4096) def verify_secret_key(x_secret_key: str = Header(...)): """ Dependency to verify the secret key from the request header. Args: x_secret_key (str): The secret key from the request header. Raises: HTTPException: If the secret key is invalid. """ if x_secret_key != SECRET_KEY: raise HTTPException(status_code=401, detail="Invalid secret key") @app.post("/pdf", dependencies=[Depends(verify_secret_key)]) async def pdf(body: PdfRequest): """ Endpoint to convert HTML content to a PDF file. Args: body (PdfRequest): Request body containing HTML content and an optional filename. Returns: StreamingResponse: A streaming response with the generated PDF file. """ logging.info("Received request to generate PDF") font_config = FontConfiguration() try: byte_string = HTML(string=body.html).write_pdf(font_config=font_config) except Exception as e: logging.error(f"Error generating PDF: {e}") raise HTTPException(status_code=400, detail="Invalid HTML input") from e filename = sanitize_filename(body.filename.strip() if body.filename else "hinpdof") headers = { "Content-Type": "application/pdf", "Content-Disposition": f'attachment; filename="{filename}.pdf"', } logging.info(f"PDF generated successfully: {filename}.pdf") return StreamingResponse(pdf_generator(byte_string), headers=headers) @app.get("/health") async def health_check(): """ Endpoint to check the health status of the application. Returns: JSONResponse: A JSON response with the status of the application. """ return JSONResponse(content={"status": "ok"})