// Expert knowledge for deploying and operating Composable Rust applications in production. Use when setting up database migrations, configuring connection pools, implementing backup/restore procedures, tuning performance, setting up monitoring and observability, or handling operational concerns like disaster recovery and production database management.
| name | composable-rust-production |
| description | Expert knowledge for deploying and operating Composable Rust applications in production. Use when setting up database migrations, configuring connection pools, implementing backup/restore procedures, tuning performance, setting up monitoring and observability, or handling operational concerns like disaster recovery and production database management. |
Expert knowledge for production deployment and operations of Composable Rust applications - database migrations, connection pooling, backup/restore, performance tuning, monitoring, and operational excellence.
Automatically apply when:
Option 1: Helper Function (Deployment Scripts)
use composable_rust_postgres::run_migrations;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let database_url = std::env::var("DATABASE_URL")?;
// Run migrations during startup
run_migrations(&database_url).await?;
println!("Database ready!");
Ok(())
}
Option 2: EventStore Method (When Store Exists)
use composable_rust_postgres::PostgresEventStore;
let store = PostgresEventStore::new(&database_url).await?;
store.run_migrations().await?;
Option 3: sqlx CLI (Development)
# Install CLI
cargo install sqlx-cli --no-default-features --features postgres
# Run migrations
sqlx migrate run --database-url postgres://localhost/mydb
# Revert last migration
sqlx migrate revert --database-url postgres://localhost/mydb
Step 1: Create SQL file with sequential number
# migrations/003_add_user_context.sql
Step 2: Write idempotent SQL
-- Add user_context column to events table
ALTER TABLE events
ADD COLUMN IF NOT EXISTS user_context JSONB;
-- Add index
CREATE INDEX IF NOT EXISTS idx_events_user_context
ON events USING GIN (user_context);
Critical Rules:
IF NOT EXISTS for idempotency-- ✅ GOOD: Idempotent
CREATE TABLE IF NOT EXISTS orders (
id UUID PRIMARY KEY,
customer_id UUID NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ✅ GOOD: Safe column addition
ALTER TABLE orders
ADD COLUMN IF NOT EXISTS status TEXT DEFAULT 'pending';
-- ❌ BAD: Not idempotent
CREATE TABLE orders (...); -- Fails on second run
-- ❌ BAD: Destructive
DROP TABLE old_orders; -- Can't be undone
use sqlx::postgres::PgPoolOptions;
use std::time::Duration;
let pool = PgPoolOptions::new()
// Connection limits
.max_connections(20) // Max concurrent
.min_connections(5) // Keep warm
// Timeouts
.acquire_timeout(Duration::from_secs(10)) // Wait for conn
.idle_timeout(Duration::from_secs(600)) // 10min idle
.max_lifetime(Duration::from_secs(1800)) // 30min recycle
// Health
.test_before_acquire(true) // Validate conn
.connect(&database_url)
.await?;
let store = PostgresEventStore::from_pool(pool);
Formula: max_connections = (req/sec × conn_time) / req_time + buffer
Example:
(1000 × 0.010) / 0.050 + 5 = 25 connectionsRecommendations by Load:
| Environment | Max Connections | Use Case |
|---|---|---|
| Development | 5 | Minimal overhead |
| Staging | 10-20 | Simulate production |
| Low traffic | 20-50 | < 100 req/sec |
| Medium traffic | 50-100 | 100-1000 req/sec |
| High traffic | 100-200 | > 1000 req/sec |
PostgreSQL Limits:
# postgresql.conf
max_connections = 200 # Reserve some for admin/monitoring
// Check pool health
let size = pool.size(); // Current connections
let idle = pool.num_idle(); // Available connections
tracing::info!(
pool_size = size,
pool_idle = idle,
"Connection pool status"
);
// Metrics to track:
// - pool.size() < max_connections (not exhausted)
// - pool.num_idle() > 0 (connections available)
// - Acquire time < 100ms (fast acquisition)
// - Connection errors ≈ 0 (healthy pool)
# Backup with compression
pg_dump -h localhost -U postgres -d mydb \
--format=custom \
--compress=9 \
--file=backup_$(date +%Y%m%d_%H%M%S).dump
# Restore
pg_restore -h localhost -U postgres \
--clean --create \
--dbname=postgres \
backup_20250110_143022.dump
# Backup events table (source of truth)
pg_dump -h localhost -U postgres -d mydb \
--table=events \
--format=custom \
--file=events_backup_$(date +%Y%m%d_%H%M%S).dump
# Restore
pg_restore -h localhost -U postgres \
--dbname=mydb \
--table=events \
events_backup_20250110_143022.dump
#!/bin/bash
# daily-backup.sh
set -e
BACKUP_DIR="/backups/postgres"
DATE=$(date +%Y%m%d_%H%M%S)
DB_NAME="mydb"
RETENTION_DAYS=30
# Create backup
pg_dump -h localhost -U postgres -d $DB_NAME \
--format=custom --compress=9 \
--file=$BACKUP_DIR/backup_$DATE.dump
# Upload to S3 (optional)
aws s3 cp $BACKUP_DIR/backup_$DATE.dump \
s3://my-backups/postgres/backup_$DATE.dump
# Delete old backups
find $BACKUP_DIR -name "backup_*.dump" \
-mtime +$RETENTION_DAYS -delete
echo "Backup completed: backup_$DATE.dump"
Cron Schedule:
# Daily at 2 AM
0 2 * * * /usr/local/bin/daily-backup.sh >> /var/log/backup.log 2>&1
Critical: Test restoration monthly!
# 1. Create test database
createdb -h localhost -U postgres mydb_test
# 2. Restore backup
pg_restore -h localhost -U postgres \
--dbname=mydb_test \
backup_latest.dump
# 3. Verify data
psql -h localhost -U postgres -d mydb_test \
-c "SELECT COUNT(*) FROM events;"
# 4. Cleanup
dropdb -h localhost -U postgres mydb_test
For Event Sourcing Workloads (postgresql.conf):
# Memory (assume 16GB RAM server)
shared_buffers = 4GB # 25% of RAM
effective_cache_size = 12GB # 75% of RAM
work_mem = 64MB # Per-operation memory
# Write Performance
wal_buffers = 16MB
checkpoint_completion_target = 0.9
max_wal_size = 4GB
min_wal_size = 1GB
# Query Performance (SSDs)
random_page_cost = 1.1 # Default 4.0 for HDDs
effective_io_concurrency = 200
# Logging
log_min_duration_statement = 1000 # Log queries > 1s
log_checkpoints = on
Default Event Store Indexes:
-- Primary key (automatic)
PRIMARY KEY (stream_id, version)
-- Query by time
CREATE INDEX idx_events_created ON events(created_at);
-- Query by type
CREATE INDEX idx_events_type ON events(event_type);
Custom Indexes for Your Workload:
-- Query by metadata fields
CREATE INDEX idx_events_user_id
ON events ((metadata->>'user_id'));
-- Query by correlation ID
CREATE INDEX idx_events_correlation
ON events ((metadata->>'correlation_id'));
-- Partial index for recent events
CREATE INDEX idx_events_recent
ON events(created_at)
WHERE created_at > NOW() - INTERVAL '30 days';
# Regular maintenance
psql -c "VACUUM ANALYZE events;"
# Full vacuum (locks table - maintenance window)
psql -c "VACUUM FULL events;"
# Reindex if bloated
psql -c "REINDEX TABLE events;"
Automate with autovacuum (postgresql.conf):
autovacuum = on
autovacuum_vacuum_scale_factor = 0.1
autovacuum_analyze_scale_factor = 0.05
use axum::{Json, extract::State};
use serde::Serialize;
#[derive(Serialize)]
struct HealthStatus {
status: String,
database: String,
event_count: i64,
}
async fn health_check(
State(store): State<Arc<PostgresEventStore>>,
) -> Json<HealthStatus> {
// Test database connectivity
let db_status = match sqlx::query("SELECT 1")
.execute(store.pool())
.await
{
Ok(_) => "healthy",
Err(_) => "unhealthy",
};
// Get event count
let event_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM events")
.fetch_one(store.pool())
.await
.unwrap_or((0,));
Json(HealthStatus {
status: if db_status == "healthy" { "ok" } else { "error" }.into(),
database: db_status.into(),
event_count: event_count.0,
})
}
Database Metrics:
-- Active connections
SELECT count(*) FROM pg_stat_activity WHERE state = 'active';
-- Database size
SELECT pg_size_pretty(pg_database_size('mydb'));
-- Table size
SELECT pg_size_pretty(pg_total_relation_size('events'));
-- Slow queries (requires pg_stat_statements)
SELECT query, calls, total_exec_time, mean_exec_time
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;
Application Metrics:
use metrics::{counter, histogram, gauge};
// Track reducer execution time
let start = Instant::now();
let effects = reducer.reduce(&mut state, action, &env);
histogram!("reducer_duration_ms").record(start.elapsed().as_millis() as f64);
// Track effect execution
counter!("effects_executed_total").increment(effects.len() as u64);
// Track store state
gauge!("store_state_size_bytes").set(state_size as f64);
Set up alerts for:
pool.num_idle() == 0 for > 1 minuteTarget: < 4 hours to full recovery
Steps:
Target: < 1 hour of data loss
Strategies:
Step 1: Assess the situation
# Check PostgreSQL logs
tail -100 /var/log/postgresql/postgresql.log
# Check disk space
df -h
# Check system resources
top
iostat
Step 2: Attempt quick recovery
# Restart PostgreSQL
systemctl restart postgresql
# Verify
systemctl status postgresql
psql -c "SELECT 1"
Step 3: If restart fails, restore from backup
# Stop application
systemctl stop myapp
# Restore latest backup
pg_restore -h localhost -U postgres \
--clean --create --dbname=postgres \
/backups/postgres/backup_latest.dump
# Run migrations
./myapp migrate
# Start application
systemctl start myapp
Step 4: Verify recovery
# Check event count
psql -c "SELECT COUNT(*) FROM events;"
# Check application health
curl http://localhost:8080/health
Before deploying to production, verify:
Symptom: "Timeout acquiring connection from pool"
Diagnosis:
let size = pool.size();
let idle = pool.num_idle();
tracing::error!(size, idle, "Pool exhausted");
Solutions:
max_connections in pool configSymptom: High latency on database operations
Diagnosis:
-- Enable query logging
SET log_min_duration_statement = 100;
-- Check slow queries
SELECT query, mean_exec_time
FROM pg_stat_statements
ORDER BY mean_exec_time DESC;
Solutions:
Symptom: PostgreSQL using excessive memory
Diagnosis:
SELECT pg_size_pretty(pg_database_size('mydb'));
SELECT pg_size_pretty(pg_total_relation_size('events'));
Solutions:
shared_buffers if too highwork_mem for query complexitycomposable-rust-architecture skill for core patternscomposable-rust-event-sourcing skill for event designcomposable-rust-testing skill for integration testsdocs/production-database.md (800+ lines)docs/observability.md for tracing and metrics