원클릭으로
monitoring-observability
Monitoring and observability with OpenTelemetry, Prometheus, Grafana dashboards, and structured logging
Codex 또는 Claude로 설치 이 Prompt를 복사해 Codex, Claude 또는 다른 어시스턴트에 붙여 넣으면 Skill 페이지를 검토하고 설치를 진행할 수 있습니다.
메뉴
Monitoring and observability with OpenTelemetry, Prometheus, Grafana dashboards, and structured logging
Codex 또는 Claude로 설치 이 Prompt를 복사해 Codex, Claude 또는 다른 어시스턴트에 붙여 넣으면 Skill 페이지를 검토하고 설치를 진행할 수 있습니다.
SOC 직업 분류 기준
Route broad or ambiguous AgentKit SEO work to the right module while keeping context scoped. Use when a request spans multiple surfaces, asks for overall digital-presence strategy, involves provider or install architecture, needs agent-context planning, or the correct platform skill is unclear.
Persistent memory system for Claude Code. Two-layer architecture (hot cache + knowledge wiki), safety hooks, /close-day end-of-day synthesis. Zero external dependencies.
Claude-native deep research using DAG-based query planning, parallel subagent execution, and gap-driven iteration. No external API needed.
Web accessibility patterns for WCAG 2.2 compliance including ARIA, keyboard navigation, screen readers, and testing
Authentication and authorization patterns including OAuth2, JWT, RBAC, session management, and PKCE flows
AWS cloud patterns for Lambda, ECS, S3, DynamoDB, and Infrastructure as Code with CDK/Terraform
| name | monitoring-observability |
| description | Monitoring and observability with OpenTelemetry, Prometheus, Grafana dashboards, and structured logging |
import { NodeSDK } from "@opentelemetry/sdk-node";
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-http";
import { HttpInstrumentation } from "@opentelemetry/instrumentation-http";
import { PgInstrumentation } from "@opentelemetry/instrumentation-pg";
import { PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics";
const sdk = new NodeSDK({
serviceName: "order-service",
traceExporter: new OTLPTraceExporter({
url: "http://otel-collector:4318/v1/traces",
}),
metricReader: new PeriodicExportingMetricReader({
exporter: new OTLPMetricExporter({
url: "http://otel-collector:4318/v1/metrics",
}),
exportIntervalMillis: 15000,
}),
instrumentations: [
new HttpInstrumentation(),
new PgInstrumentation(),
],
});
sdk.start();
process.on("SIGTERM", () => sdk.shutdown());
import { trace, metrics, SpanStatusCode } from "@opentelemetry/api";
const tracer = trace.getTracer("order-service");
const meter = metrics.getMeter("order-service");
const orderCounter = meter.createCounter("orders.created", {
description: "Number of orders created",
});
const orderDuration = meter.createHistogram("orders.processing_duration_ms", {
description: "Order processing duration in milliseconds",
unit: "ms",
});
async function createOrder(input: CreateOrderInput) {
return tracer.startActiveSpan("createOrder", async (span) => {
try {
span.setAttributes({
"order.customer_id": input.customerId,
"order.item_count": input.items.length,
});
const start = performance.now();
const order = await db.order.create({ data: input });
orderCounter.add(1, { status: "success" });
orderDuration.record(performance.now() - start);
span.setStatus({ code: SpanStatusCode.OK });
return order;
} catch (error) {
span.setStatus({ code: SpanStatusCode.ERROR, message: error.message });
orderCounter.add(1, { status: "error" });
throw error;
} finally {
span.end();
}
});
}
# prometheus.yml
global:
scrape_interval: 15s
scrape_configs:
- job_name: "api-servers"
static_configs:
- targets: ["api-1:9090", "api-2:9090"]
metrics_path: /metrics
- job_name: "node-exporter"
static_configs:
- targets: ["node-exporter:9100"]
import { collectDefaultMetrics, Counter, Histogram, Registry } from "prom-client";
const registry = new Registry();
collectDefaultMetrics({ register: registry });
const httpRequestDuration = new Histogram({
name: "http_request_duration_seconds",
help: "HTTP request duration in seconds",
labelNames: ["method", "route", "status"],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
registers: [registry],
});
app.use((req, res, next) => {
const end = httpRequestDuration.startTimer();
res.on("finish", () => {
end({ method: req.method, route: req.route?.path ?? req.path, status: res.statusCode });
});
next();
});
app.get("/metrics", async (req, res) => {
res.set("Content-Type", registry.contentType);
res.end(await registry.metrics());
});
import pino from "pino";
const logger = pino({
level: process.env.LOG_LEVEL ?? "info",
formatters: {
level: (label) => ({ level: label }),
},
redact: ["req.headers.authorization", "password", "token"],
});
function requestLogger(req, res, next) {
const start = Date.now();
res.on("finish", () => {
logger.info({
method: req.method,
url: req.url,
status: res.statusCode,
duration_ms: Date.now() - start,
trace_id: req.headers["x-trace-id"],
});
});
next();
}
groups:
- name: api-alerts
rules:
- alert: HighErrorRate
expr: rate(http_request_duration_seconds_count{status=~"5.."}[5m]) / rate(http_request_duration_seconds_count[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "Error rate above 5% for {{ $labels.route }}"
- alert: HighLatency
expr: histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 10m
labels:
severity: warning