This commit is contained in:
2025-07-30 14:10:55 +12:00
commit fa3ea2d21f
6 changed files with 498 additions and 0 deletions

24
.dockerignore Normal file
View File

@@ -0,0 +1,24 @@
# Documentation
README.md
*.md
# Kubernetes files
k8s-*.yaml
# Git files
.git
.gitignore
# OS files
.DS_Store
Thumbs.db
# IDE files
.vscode/
.idea/
*.swp
*.swo
# Temporary files
*.tmp
*.log

38
Dockerfile Normal file
View File

@@ -0,0 +1,38 @@
FROM postgres:17.5-alpine3.22
# Add metadata labels
LABEL maintainer="hads@nice.nz" \
description="PostgreSQL backup container for S3-compatible storage" \
version="1.0"
# Install packages, create user, and setup directories in a single layer
RUN apk update && apk upgrade --no-cache \
&& apk add --no-cache \
bash \
curl \
gzip \
rclone \
&& rm -rf /var/cache/apk/* \
&& addgroup -g 1000 backup \
&& adduser -D -u 1000 -G backup backup \
&& mkdir -p /backups \
&& chown backup:backup /backups
# Copy backup script with correct ownership
COPY --chown=backup:backup backup.sh /usr/local/bin/backup.sh
# Make script executable
RUN chmod +x /usr/local/bin/backup.sh
# Switch to non-root user
USER backup
# Set working directory
WORKDIR /backups
# Add health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD pgrep -f backup.sh > /dev/null || exit 1
# Use exec form for better signal handling
CMD ["/usr/local/bin/backup.sh"]

155
README.md Normal file
View File

@@ -0,0 +1,155 @@
# PostgreSQL Backup to S3-Compatible Storage Container
This Docker container provides automated PostgreSQL database backups to S3-compatible storage services (MinIO, DigitalOcean Spaces, Backblaze B2, etc.), designed to run as a Kubernetes CronJob.
## Features
- Backs up one or more PostgreSQL databases
- Optimized for third-party S3-compatible storage services
- Uses lightweight `rclone` instead of heavy AWS CLI
- Automatic cleanup of old backups based on retention policy
- Compression support (gzip)
- Non-root container for security
- Notification support via webhooks
- Comprehensive logging
## Building the Container
```bash
docker build -t your-registry/postgres-backup:latest .
docker push your-registry/postgres-backup:latest
```
## Environment Variables
### Required Variables
- `POSTGRES_HOST`: PostgreSQL server hostname
- `POSTGRES_USER`: PostgreSQL username
- `POSTGRES_PASSWORD`: PostgreSQL password
- `S3_BUCKET`: S3 bucket name for backups
- `S3_ENDPOINT`: S3 endpoint URL (e.g., https://nyc3.digitaloceanspaces.com)
- `S3_ACCESS_KEY_ID`: S3 access key ID
- `S3_SECRET_ACCESS_KEY`: S3 secret access key
### Optional Variables
- `POSTGRES_PORT`: PostgreSQL port (default: 5432)
- `POSTGRES_DB`: Default database for connection (default: postgres)
- `POSTGRES_DATABASES`: Comma-separated list of databases to backup (default: all databases)
- `S3_PREFIX`: S3 key prefix for backups (default: postgres-backups)
- `S3_REGION`: S3 region (default: us-east-1)
- `BACKUP_RETENTION_DAYS`: Number of days to keep backups (default: 7)
- `WEBHOOK_URL`: Optional webhook URL for notifications
## Running Locally
```bash
docker run --rm \
-e POSTGRES_HOST=your-postgres-host \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=your-password \
-e S3_BUCKET=your-backup-bucket \
-e S3_ENDPOINT=https://nyc3.digitaloceanspaces.com \
-e S3_ACCESS_KEY_ID=your-access-key \
-e S3_SECRET_ACCESS_KEY=your-secret-key \
your-registry/postgres-backup:latest
```
## Kubernetes Deployment
1. **Create the secret with your credentials:**
```bash
# Edit k8s-secret.yaml with your actual credentials (uses stringData for simplicity)
kubectl apply -f k8s-secret.yaml
```
2. **Deploy the CronJob:**
```bash
# Edit k8s-cronjob.yaml with your settings
kubectl apply -f k8s-cronjob.yaml
```
3. **Monitor the CronJob:**
```bash
# Check CronJob status
kubectl get cronjobs
# Check recent jobs
kubectl get jobs
# Check logs
kubectl logs -l job-name=postgres-backup-<timestamp>
```
## Backup Structure
Backups are stored in S3 with a simple flat structure:
```
s3://your-bucket/
└── postgres-backups/
├── database1_20240130_020000.sql.gz
├── database1_20240131_020000.sql.gz
├── database2_20240130_020000.sql.gz
└── database2_20240131_020000.sql.gz
```
All backups are created as gzipped SQL dumps with:
- `--clean` and `--if-exists` flags for safer restores
- `gzip --rsyncable` for efficient incremental transfers
- Human-readable SQL format after decompression
## Security Considerations
- Container runs as non-root user (UID 1001)
- Uses read-only root filesystem
- Drops all capabilities
- Secrets are stored in Kubernetes secrets, not environment variables
- Network policies can be applied to restrict access
## Backup Restoration
To restore a backup:
1. Download the backup file from S3:
```bash
rclone copy s3remote:your-bucket/postgres-backups/ ./ --include "database1_20240130_020000.sql.gz"
```
2. Decompress and restore:
```bash
gunzip database1_20240130_020000.sql.gz
psql -h your-postgres-host -U postgres -d database1 < database1_20240130_020000.sql
```
## Troubleshooting
### Common Issues
1. **Connection refused**: Check PostgreSQL host and port
2. **Authentication failed**: Verify username and password
3. **S3 upload failed**: Check AWS credentials and bucket permissions
4. **Out of space**: Ensure sufficient disk space in /backups volume
### Logs
Check container logs for detailed information:
```bash
kubectl logs -l job-name=postgres-backup-<timestamp> -f
```
## Customization
You can modify the `backup.sh` script to:
- Add custom backup validation
- Implement different notification methods
- Add encryption before upload
- Modify backup naming conventions
- Add database-specific backup options
## License
This project is provided as-is for educational and production use.

178
backup.sh Normal file
View File

@@ -0,0 +1,178 @@
#!/bin/bash
set -euo pipefail
# Configuration from environment variables
POSTGRES_HOST="${POSTGRES_HOST:-localhost}"
POSTGRES_PORT="${POSTGRES_PORT:-5432}"
POSTGRES_USER="${POSTGRES_USER:-postgres}"
POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-}"
POSTGRES_DB="${POSTGRES_DB:-postgres}"
POSTGRES_DATABASES="${POSTGRES_DATABASES:-}" # Comma-separated list of databases, if empty backs up all
S3_BUCKET="${S3_BUCKET}"
S3_PREFIX="${S3_PREFIX:-postgres-backups}"
S3_ENDPOINT="${S3_ENDPOINT}" # Required for third-party S3 services
S3_ACCESS_KEY_ID="${S3_ACCESS_KEY_ID}"
S3_SECRET_ACCESS_KEY="${S3_SECRET_ACCESS_KEY}"
S3_REGION="${S3_REGION:-auto}" # Default to 'auto' for S3-compatible services
BACKUP_RETENTION_DAYS="${BACKUP_RETENTION_DAYS:-7}"
# Generate timestamp
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
# Function to log messages
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
# Function to setup rclone configuration
setup_rclone() {
log "Setting up rclone configuration for S3-compatible storage"
# Create rclone config directory
mkdir -p ~/.config/rclone
# Create rclone configuration
cat > ~/.config/rclone/rclone.conf << EOF
[s3remote]
type = s3
provider = Other
access_key_id = ${S3_ACCESS_KEY_ID}
secret_access_key = ${S3_SECRET_ACCESS_KEY}
endpoint = ${S3_ENDPOINT}
region = ${S3_REGION}
force_path_style = true
acl = private
EOF
}
# Function to get list of databases
get_databases() {
if [[ -n "${POSTGRES_DATABASES}" ]]; then
echo "${POSTGRES_DATABASES}" | tr ',' '\n'
else
PGPASSWORD="${POSTGRES_PASSWORD}" psql -h "${POSTGRES_HOST}" -p "${POSTGRES_PORT}" -U "${POSTGRES_USER}" -d postgres -t -c "SELECT datname FROM pg_database WHERE datistemplate = false;" | grep -v '^$' | xargs
fi
}
# Function to backup a single database
backup_database() {
local db_name="$1"
local backup_file="/backups/${db_name}_${TIMESTAMP}.sql.gz"
log "Starting backup of database: ${db_name}"
log "Creating compressed SQL dump"
# Create database dump with gzip compression
PGPASSWORD="${POSTGRES_PASSWORD}" pg_dump \
-h "${POSTGRES_HOST}" \
-p "${POSTGRES_PORT}" \
-U "${POSTGRES_USER}" \
-d "${db_name}" \
--no-password \
--format=plain \
--clean \
--if-exists \
--no-privileges \
--no-owner \
| gzip --rsyncable > "${backup_file}"
# Upload to S3 with flat structure
local s3_key="${S3_PREFIX}/$(basename "${backup_file}")"
log "Uploading backup to S3: s3://${S3_BUCKET}/${s3_key}"
rclone copy "${backup_file}" "s3remote:${S3_BUCKET}/${S3_PREFIX}/" --progress
# Verify upload
if ! rclone ls "s3remote:${S3_BUCKET}/${s3_key}" > /dev/null; then
log "ERROR: Failed to verify backup upload"
return 1
fi
log "Successfully uploaded backup for ${db_name}"
# Clean up local file
rm -f "${backup_file}"
}
# Function to cleanup old backups
cleanup_old_backups() {
local db_name="$1"
local cutoff_date=$(date -d "${BACKUP_RETENTION_DAYS} days ago" +%Y%m%d_%H%M%S)
log "Cleaning up backups older than ${BACKUP_RETENTION_DAYS} days for database: ${db_name}"
# List and delete old backups using rclone with flat structure
rclone lsf "s3remote:${S3_BUCKET}/${S3_PREFIX}/" --include "${db_name}_*.sql.gz" | while read -r backup_file; do
# Extract timestamp from filename
backup_date=$(echo "$backup_file" | grep -o '[0-9]\{8\}_[0-9]\{6\}' || true)
if [[ -n "$backup_date" && "$backup_date" < "$cutoff_date" ]]; then
log "Deleting old backup file: ${backup_file}"
rclone delete "s3remote:${S3_BUCKET}/${S3_PREFIX}/${backup_file}" || log "Failed to delete ${backup_file}"
fi
done
}
# Function to send notification (placeholder for webhook/email integration)
send_notification() {
local status="$1"
local message="$2"
if [[ -n "${WEBHOOK_URL:-}" ]]; then
curl -X POST "${WEBHOOK_URL}" \
-H "Content-Type: application/json" \
-d "{\"status\": \"${status}\", \"message\": \"${message}\", \"timestamp\": \"$(date -Iseconds)\"}" \
|| log "Failed to send notification"
fi
}
# Main execution
main() {
log "Starting PostgreSQL backup process"
# Validate required environment variables
if [[ -z "${S3_BUCKET}" || -z "${S3_ACCESS_KEY_ID}" || -z "${S3_SECRET_ACCESS_KEY}" || -z "${S3_ENDPOINT}" ]]; then
log "ERROR: Missing required environment variables (S3_BUCKET, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_ENDPOINT)"
exit 1
fi
# Setup rclone
setup_rclone
# Test database connection
log "Testing database connection"
if ! PGPASSWORD="${POSTGRES_PASSWORD}" psql -h "${POSTGRES_HOST}" -p "${POSTGRES_PORT}" -U "${POSTGRES_USER}" -d postgres -c "SELECT 1" > /dev/null 2>&1; then
log "ERROR: Cannot connect to PostgreSQL database"
send_notification "error" "Cannot connect to PostgreSQL database"
exit 1
fi
# Get list of databases to backup
databases=$(get_databases)
log "Databases to backup: ${databases}"
# Backup each database
backup_success=true
for db in ${databases}; do
if backup_database "${db}"; then
cleanup_old_backups "${db}"
else
log "ERROR: Failed to backup database: ${db}"
backup_success=false
fi
done
if [[ "${backup_success}" == "true" ]]; then
log "All database backups completed successfully"
send_notification "success" "All PostgreSQL database backups completed successfully"
else
log "Some database backups failed"
send_notification "error" "Some PostgreSQL database backups failed"
exit 1
fi
}
# Execute main function
main "$@"

91
k8s-cronjob.yaml Normal file
View File

@@ -0,0 +1,91 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: postgres-backup
namespace: default
spec:
# Run daily at 2:00 AM
schedule: "0 2 * * *"
jobTemplate:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: postgres-backup
image: your-registry/postgres-backup:latest
imagePullPolicy: Always
env:
# PostgreSQL connection settings
- name: POSTGRES_HOST
value: "postgres-service.database.svc.cluster.local"
- name: POSTGRES_PORT
value: "5432"
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: postgres-backup-secret
key: postgres-user
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgres-backup-secret
key: postgres-password
# Optionally specify specific databases (comma-separated)
# If not set, all databases will be backed up
- name: POSTGRES_DATABASES
value: "myapp,analytics"
# S3-compatible storage settings
- name: S3_BUCKET
value: "my-postgres-backups"
- name: S3_PREFIX
value: "production/postgres-backups"
- name: S3_ENDPOINT
value: "https://s3.your-provider.com" # Required for third-party S3
- name: S3_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: postgres-backup-secret
key: s3-access-key-id
- name: S3_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: postgres-backup-secret
key: s3-secret-access-key
- name: S3_REGION
value: "us-east-1"
# Backup settings
- name: BACKUP_RETENTION_DAYS
value: "7"
- name: COMPRESSION
value: "gzip"
# Optional webhook for notifications
# - name: WEBHOOK_URL
# value: "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK"
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
# Security context
securityContext:
runAsNonRoot: true
runAsUser: 1001
runAsGroup: 1001
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
# Temporary volume for backup files
volumeMounts:
- name: tmp-volume
mountPath: /backups
volumes:
- name: tmp-volume
emptyDir: {}
# Job settings
activeDeadlineSeconds: 3600 # 1 hour timeout
backoffLimit: 2

12
k8s-secret.yaml Normal file
View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: Secret
metadata:
name: postgres-backup-secret
namespace: default
type: Opaque
stringData:
# Plain text values - much easier to manage than base64
postgres-user: postgres
postgres-password: your-password
s3-access-key-id: your-s3-access-key
s3-secret-access-key: your-s3-secret-key