一键导入
monitoring-observability
Monitoring and observability with OpenTelemetry, Prometheus, Grafana dashboards, and structured logging
用 Codex 或 Claude 帮你安装 复制这段 Prompt,粘贴到 Codex、Claude 或其他助手里,让它检查 Skill 页面并帮你完成安装。
菜单
Monitoring and observability with OpenTelemetry, Prometheus, Grafana dashboards, and structured logging
用 Codex 或 Claude 帮你安装 复制这段 Prompt,粘贴到 Codex、Claude 或其他助手里,让它检查 Skill 页面并帮你完成安装。
基于 SOC 职业分类
Route broad or ambiguous AgentKit SEO work to the right module while keeping context scoped. Use when a request spans multiple surfaces, asks for overall digital-presence strategy, involves provider or install architecture, needs agent-context planning, or the correct platform skill is unclear.
Persistent memory system for Claude Code. Two-layer architecture (hot cache + knowledge wiki), safety hooks, /close-day end-of-day synthesis. Zero external dependencies.
Claude-native deep research using DAG-based query planning, parallel subagent execution, and gap-driven iteration. No external API needed.
Web accessibility patterns for WCAG 2.2 compliance including ARIA, keyboard navigation, screen readers, and testing
Authentication and authorization patterns including OAuth2, JWT, RBAC, session management, and PKCE flows
AWS cloud patterns for Lambda, ECS, S3, DynamoDB, and Infrastructure as Code with CDK/Terraform
| name | monitoring-observability |
| description | Monitoring and observability with OpenTelemetry, Prometheus, Grafana dashboards, and structured logging |
import { NodeSDK } from "@opentelemetry/sdk-node";
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-http";
import { HttpInstrumentation } from "@opentelemetry/instrumentation-http";
import { PgInstrumentation } from "@opentelemetry/instrumentation-pg";
import { PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics";
const sdk = new NodeSDK({
serviceName: "order-service",
traceExporter: new OTLPTraceExporter({
url: "http://otel-collector:4318/v1/traces",
}),
metricReader: new PeriodicExportingMetricReader({
exporter: new OTLPMetricExporter({
url: "http://otel-collector:4318/v1/metrics",
}),
exportIntervalMillis: 15000,
}),
instrumentations: [
new HttpInstrumentation(),
new PgInstrumentation(),
],
});
sdk.start();
process.on("SIGTERM", () => sdk.shutdown());
import { trace, metrics, SpanStatusCode } from "@opentelemetry/api";
const tracer = trace.getTracer("order-service");
const meter = metrics.getMeter("order-service");
const orderCounter = meter.createCounter("orders.created", {
description: "Number of orders created",
});
const orderDuration = meter.createHistogram("orders.processing_duration_ms", {
description: "Order processing duration in milliseconds",
unit: "ms",
});
async function createOrder(input: CreateOrderInput) {
return tracer.startActiveSpan("createOrder", async (span) => {
try {
span.setAttributes({
"order.customer_id": input.customerId,
"order.item_count": input.items.length,
});
const start = performance.now();
const order = await db.order.create({ data: input });
orderCounter.add(1, { status: "success" });
orderDuration.record(performance.now() - start);
span.setStatus({ code: SpanStatusCode.OK });
return order;
} catch (error) {
span.setStatus({ code: SpanStatusCode.ERROR, message: error.message });
orderCounter.add(1, { status: "error" });
throw error;
} finally {
span.end();
}
});
}
# prometheus.yml
global:
scrape_interval: 15s
scrape_configs:
- job_name: "api-servers"
static_configs:
- targets: ["api-1:9090", "api-2:9090"]
metrics_path: /metrics
- job_name: "node-exporter"
static_configs:
- targets: ["node-exporter:9100"]
import { collectDefaultMetrics, Counter, Histogram, Registry } from "prom-client";
const registry = new Registry();
collectDefaultMetrics({ register: registry });
const httpRequestDuration = new Histogram({
name: "http_request_duration_seconds",
help: "HTTP request duration in seconds",
labelNames: ["method", "route", "status"],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
registers: [registry],
});
app.use((req, res, next) => {
const end = httpRequestDuration.startTimer();
res.on("finish", () => {
end({ method: req.method, route: req.route?.path ?? req.path, status: res.statusCode });
});
next();
});
app.get("/metrics", async (req, res) => {
res.set("Content-Type", registry.contentType);
res.end(await registry.metrics());
});
import pino from "pino";
const logger = pino({
level: process.env.LOG_LEVEL ?? "info",
formatters: {
level: (label) => ({ level: label }),
},
redact: ["req.headers.authorization", "password", "token"],
});
function requestLogger(req, res, next) {
const start = Date.now();
res.on("finish", () => {
logger.info({
method: req.method,
url: req.url,
status: res.statusCode,
duration_ms: Date.now() - start,
trace_id: req.headers["x-trace-id"],
});
});
next();
}
groups:
- name: api-alerts
rules:
- alert: HighErrorRate
expr: rate(http_request_duration_seconds_count{status=~"5.."}[5m]) / rate(http_request_duration_seconds_count[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "Error rate above 5% for {{ $labels.route }}"
- alert: HighLatency
expr: histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 10m
labels:
severity: warning