Initial
Some checks failed
Build and Publish Docker Image / build (push) Failing after 17s

This commit is contained in:
2024-11-11 10:26:47 +13:00
commit 6c6c837301
10 changed files with 561 additions and 0 deletions

62
.dockerignore Normal file
View File

@@ -0,0 +1,62 @@
# Ignore Python bytecode files
__pycache__/
*.py[cod]
# Ignore distribution / packaging files
build/
dist/
*.egg-info/
.eggs/
wheels/
# Ignore virtual environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Ignore test and coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Ignore Jupyter Notebook checkpoints
.ipynb_checkpoints
# Ignore IPython profile directories
profile_default/
ipython_config.py
# Ignore pyenv files
.python-version
# Ignore Poetry lock file
poetry.lock
# Ignore VS Code settings
.vscode/
# Ignore Dockerfile and Dockerignore itself
Dockerfile
.dockerignore
# Ignore Git files
.git/
.gitignore
# Ignore logs and temporary files
*.log
*.tmp

34
.github/workflows/build.yaml vendored Normal file
View File

@@ -0,0 +1,34 @@
name: Build and Publish Docker Image
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker registry
uses: docker/login-action@v3
with:
registry: git.nice.net.nz
username: hads
password: ${{ secrets.PAT }}
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: git.nice.net.nz/hads/hinpdof:latest

97
.gitignore vendored Normal file
View File

@@ -0,0 +1,97 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# poetry
poetry.lock
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# VS Code
.vscode/
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# Pyre type checker
.pyre/
# End of https://www.toptal.com/developers/gitignore/api/python

25
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,25 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-toml
- id: check-json
- id: check-merge-conflict
- id: debug-statements
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.3
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
- repo: local
hooks:
- id: tests
name: run tests
require_serial: true
entry: pytest -v tests
language: system
types: [python]
stages: [pre-push]

57
Dockerfile Normal file
View File

@@ -0,0 +1,57 @@
FROM python:3.13-slim AS base
FROM base AS builder
ARG DEBIAN_FRONTEND="noninteractive"
RUN pip install poetry==1.8.3
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONPATH=/app \
POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_IN_PROJECT=1 \
POETRY_VIRTUALENVS_CREATE=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache
WORKDIR /app
COPY pyproject.toml poetry.lock ./
RUN poetry install --without dev --no-root --compile
FROM base
RUN apt update \
&& apt dist-upgrade -y \
&& apt install -y --no-install-recommends \
libpango-1.0-0 \
libpangoft2-1.0-0 \
libharfbuzz-subset0 \
&& rm -rf /var/lib/apt/lists/*
RUN adduser --system --uid 1000 --group app
USER app
WORKDIR /app
ENV VIRTUAL_ENV=/app/.venv \
PATH="/app/.venv/bin:$PATH"
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY app.py ./
ARG PORT=8080
ENV PORT=$PORT
EXPOSE $PORT
CMD ["sh", "-c", \
"exec gunicorn --preload \
--bind 0.0.0.0:${PORT} \
--workers 2 \
--threads 2 \
--worker-tmp-dir /dev/shm \
--access-logfile - \
--forwarded-allow-ips '*' \
app:app"]

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Bear Su
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

50
README.md Normal file
View File

@@ -0,0 +1,50 @@
# hinpdof
`hinpdof` is a FastAPI-based app that converts HTML content to PDF using WeasyPrint.
## Features
- Convert HTML content to PDF
- Customizable PDF filenames
- Health check endpoint
## Requirements
- Python 3.12+
- FastAPI
- WeasyPrint
- Uvicorn
## Installation
1. Clone the repository:
```sh
git clone https://git.nice.net.nz/hinpdof.git
cd hinpdof
```
2. Install dependencies using Poetry:
```sh
poetry install
```
3. Run the application:
```sh
poetry run uvicorn app:app --reload
```
## Usage
### Convert HTML to PDF
Send a POST request to `/pdf` with the following JSON body:
```json
{
"html": "<h1>Hello, World!</h1>",
"filename": "testfile"
}
```

102
app.py Normal file
View File

@@ -0,0 +1,102 @@
import io
import logging
import re
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field
from weasyprint import HTML
# Initialize logging
logging.basicConfig(level=logging.INFO)
class PdfRequest(BaseModel):
html: str = Field(..., min_length=1, description="HTML content to convert to PDF")
filename: str | None = None
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Compile the regular expression once
FILENAME_SANITIZE_REGEX = re.compile(r"[^a-zA-Z0-9_\-]")
def sanitize_filename(filename: str) -> str:
"""
Sanitize the filename by replacing invalid characters with underscores.
Args:
filename (str): The original filename.
Returns:
str: The sanitized filename.
"""
return FILENAME_SANITIZE_REGEX.sub("_", filename)
async def pdf_generator(byte_string: bytes):
"""
Generator to yield PDF content in chunks.
Args:
byte_string (bytes): The PDF content as bytes.
Yields:
bytes: Chunks of the PDF content.
"""
byte_stream = io.BytesIO(byte_string)
chunk = byte_stream.read(4096)
while chunk:
yield chunk
chunk = byte_stream.read(4096)
@app.post("/pdf")
async def pdf(body: PdfRequest):
"""
Endpoint to convert HTML content to a PDF file.
Args:
body (PdfRequest): Request body containing HTML content and an optional filename.
Returns:
StreamingResponse: A streaming response with the generated PDF file.
"""
logging.info("Received request to generate PDF")
try:
byte_string = HTML(string=body.html).write_pdf()
except Exception as e:
logging.error(f"Error generating PDF: {e}")
raise HTTPException(status_code=400, detail="Invalid HTML input")
filename = sanitize_filename(body.filename.strip() if body.filename else "hinpdof")
headers = {
"Content-Type": "application/pdf",
"Content-Disposition": f'attachment; filename="{filename}.pdf"',
}
logging.info(f"PDF generated successfully: {filename}.pdf")
return StreamingResponse(pdf_generator(byte_string), headers=headers)
@app.get("/health")
async def health_check():
"""
Endpoint to check the health status of the application.
Returns:
JSONResponse: A JSON response with the status of the application.
"""
return JSONResponse(content={"status": "ok"})

31
pyproject.toml Normal file
View File

@@ -0,0 +1,31 @@
[tool.poetry]
name = "hinpdof"
version = "0.1.0"
license = "MIT"
description = "Uses FastAPI to expose a REST API which takes HTML as input and converts to PDF output using Weasyprint"
authors = ["Hadley Rich <hads@nice.net.nz>"]
readme = "README.md"
package-mode = false
[tool.poetry.dependencies]
python = "^3.12"
fastapi = "^0.115.4"
weasyprint = "^63.0"
uvicorn = "^0.32.0"
[tool.poetry.group.dev.dependencies]
pytest = "^8.3.3"
httpx = "^0.27.2"
pytest-cov = "^4.0.0"
ruff = "^0.0.289"
pre-commit = "^3.4.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.ruff]
line-length = 88
target-version = "py312"
select = ["E", "F", "W", "C", "N", "B"]
ignore = ["E501"]

82
test_app.py Normal file
View File

@@ -0,0 +1,82 @@
import pytest
from fastapi.testclient import TestClient
from app import app
@pytest.fixture
def client():
return TestClient(app)
def test_health_check(client):
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "ok"}
def test_pdf_generation(client):
request_data = {"html": "<h1>Hello, World!</h1>", "filename": "testfile"}
response = client.post("/pdf", json=request_data)
assert response.status_code == 200
assert response.headers["Content-Type"] == "application/pdf"
assert (
response.headers["Content-Disposition"] == 'attachment; filename="testfile.pdf"'
)
def test_pdf_generation_default_filename(client):
request_data = {"html": "<h1>Hello, World!</h1>", "filename": None}
response = client.post("/pdf", json=request_data)
assert response.status_code == 200
assert response.headers["Content-Type"] == "application/pdf"
assert (
response.headers["Content-Disposition"] == 'attachment; filename="hinpdof.pdf"'
)
def test_pdf_generation_invalid_html(client):
request_data = {"html": "", "filename": "testfile"}
response = client.post("/pdf", json=request_data)
assert response.status_code == 422 # Unprocessable Entity due to invalid input
def test_pdf_generation_missing_html(client):
request_data = {"filename": "testfile"}
response = client.post("/pdf", json=request_data)
assert (
response.status_code == 422
) # Unprocessable Entity due to missing required field
def test_pdf_generation_large_html(client):
large_html = "<h1>" + "Hello, World! " * 1000 + "</h1>"
request_data = {"html": large_html, "filename": "largefile"}
response = client.post("/pdf", json=request_data)
assert response.status_code == 200
assert response.headers["Content-Type"] == "application/pdf"
assert (
response.headers["Content-Disposition"]
== 'attachment; filename="largefile.pdf"'
)
def test_pdf_generation_invalid_filename(client):
request_data = {"html": "<h1>Hello, World!</h1>", "filename": "invalid/filename"}
response = client.post("/pdf", json=request_data)
assert response.status_code == 200
assert response.headers["Content-Type"] == "application/pdf"
assert (
response.headers["Content-Disposition"]
== 'attachment; filename="invalid_filename.pdf"'
)
def test_pdf_generation_missing_filename(client):
request_data = {"html": "<h1>Hello, World!</h1>"}
response = client.post("/pdf", json=request_data)
assert response.status_code == 200
assert response.headers["Content-Type"] == "application/pdf"
assert (
response.headers["Content-Disposition"] == 'attachment; filename="hinpdof.pdf"'
)