| name | sns-workflow:observe |
| description | 智能体可观测性 —— 汇总工作流执行状态、成功率、耗时指标、错误模式。读取 .snsplay/task/ 产物文件和日志,生成运维视角的运行报告(覆盖 review/heal/ui-verify artifact + 技能执行日志)。未来增强: 支持 LogQL/PromQL 查询能力。 |
智能体可观测性
汇总展示当前项目的 sns-workflow 执行状态、成功率、耗时指标、错误模式。
数据来源:
.snsplay/task/ — 工作流产物文件(manifest、impl-result、review 输出)
.snsplay/sns-workflow.log — 调试日志(需启用 debug 模式)
.snsplay/task/cli_trace.log — CLI 进程 stderr 追踪
~/.sns-workflow/skill-executions.log — 技能执行跟踪日志(所有技能)
步骤 1: 检查工作流状态
SHELL_DIR="${CLAUDE_PLUGIN_ROOT:-plugins/sns-workflow}/scripts"
source "$SHELL_DIR/skill-logger.sh"
sns_skill_start "observe" "$*"
ROOT=$(git rev-parse --show-toplevel 2>/dev/null || echo "${PWD}")
TASK_DIR="$ROOT/.snsplay/task"
if [[ ! -d "$TASK_DIR" ]]; then
echo "=== 可观测性报告 ==="
echo ""
echo "无活动工作流(.snsplay/task/ 目录不存在)"
echo ""
echo "提示: 运行 /sns-workflow:once 启动工作流"
exit 0
fi
echo "=== 工作流状态 ==="
if [[ -f "$TASK_DIR/workflow-tasks.json" ]]; then
workflow_type=$(grep -o '"workflow_type"[[:space:]]*:[[:space:]]*"[^"]*"' "$TASK_DIR/workflow-tasks.json" | head -1 | sed 's/.*:"//;s/"$//')
echo "工作流类型: ${workflow_type:-未知}"
stages=$(grep -o '"output_file"[[:space:]]*:[[:space:]]*"[^"]*"' "$TASK_DIR/workflow-tasks.json" | sed 's/.*:"//;s/"$//')
echo ""
echo "阶段列表:"
echo "$stages" | while read stage_file; do
status="pending"
if [[ -n "$stage_file" ]]; then
artifact="$TASK_DIR/$stage_file"
if [[ -f "$artifact" ]]; then
art_status=$(grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' "$artifact" | head -1 | sed 's/.*:"//;s/"$//')
if [[ -n "$art_status" ]]; then
status="$art_status"
else
status="done"
fi
fi
fi
case "$status" in
approved) icon="✓" ;;
needs_changes|needs_clarification) icon="~" ;;
rejected) icon="✗" ;;
done) icon="✓" ;;
*) icon="○" ;;
esac
echo " $icon $stage_file ($status)"
done
else
echo "工作流类型: 无 workflow-tasks.json"
fi
echo ""
echo "阶段完成状态:"
[[ -f "$TASK_DIR/user-story/manifest.json" ]] && echo " ✓ 需求分析 (user-story)" || echo " ○ 需求分析"
[[ -f "$TASK_DIR/plan/manifest.json" ]] && echo " ✓ 架构设计 (plan)" || echo " ○ 架构设计"
[[ -f "$TASK_DIR/impl-result.json" ]] && echo " ✓ 实现 (impl-result)" || echo " ○ 实现"
if [[ -f "$TASK_DIR/impl-result.json" ]]; then
impl_status=$(grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' "$TASK_DIR/impl-result.json" | head -1 | sed 's/.*:"//;s/"$//')
steps=$(grep -o '"steps_completed"[[:space:]]*:[[:space:]]*[0-9]*' "$TASK_DIR/impl-result.json" | head -1 | sed 's/.*://')
echo ""
echo "实现状态: $impl_status (完成步骤: $steps)"
fi
步骤 2: 输出审核统计
TASK_DIR="$ROOT/.snsplay/task"
echo ""
echo "=== 审核统计 ==="
plan_review_count=$(ls "$TASK_DIR"/plan-review-*.json 2>/dev/null | wc -l | tr -d ' ')
echo "计划审核: $plan_review_count 份输出"
code_review_count=$(ls "$TASK_DIR"/code-review-*.json 2>/dev/null | wc -l | tr -d ' ')
echo "代码审核: $code_review_count 份输出"
rca_count=$(ls "$TASK_DIR"/rca-*.json 2>/dev/null | wc -l | tr -d ' ')
echo "根因分析: $rca_count 份输出"
req_count=$(ls "$TASK_DIR"/analysis-*.json 2>/dev/null | wc -l | tr -d ' ')
echo "需求分析: $req_count 份输出"
if [[ -f "$TASK_DIR/review-findings-to-fix.json" ]]; then
echo ""
echo "⚠ 有审核未解决问题(review-findings-to-fix.json 存在)"
fi
review_count=$(ls "$TASK_DIR"/review-*.json 2>/dev/null | wc -l | tr -d ' ')
echo "交叉审查: $review_count 份输出"
heal_count=$(ls "$TASK_DIR"/heal-*.json 2>/dev/null | wc -l | tr -d ' ')
echo "恢复计划: $heal_count 份输出"
uiv_count=$(ls "$TASK_DIR"/ui-verify-*.json 2>/dev/null | wc -l | tr -d ' ')
echo "UI 验证: $uiv_count 份输出"
plan_count=$(ls "$TASK_DIR"/plan-*.json 2>/dev/null | wc -l | tr -d ' ')
plan_confirmed=$(ls "$TASK_DIR"/plan-*.json 2>/dev/null | xargs grep -l '"status"[[:space:]]*:[[:space:]]*"confirmed"' 2>/dev/null | wc -l | tr -d ' ')
echo "执行计划: $plan_count 份 (已确认: $plan_confirmed)"
latest_plan=$(ls -t "$TASK_DIR"/plan-*.json 2>/dev/null | head -1)
if [[ -n "$latest_plan" ]] && [[ -f "$latest_plan" ]]; then
plan_scope=$(grep -o '"scope"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_plan" | head -1 | sed 's/.*:"//;s/"$//')
plan_status=$(grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_plan" | head -1 | sed 's/.*:"//;s/"$//')
plan_desc=$(grep -o '"description"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_plan" | head -1 | sed 's/.*:"//;s/"$//')
echo " 最新: [$plan_scope/$plan_status] $plan_desc"
fi
qagate_count=$(ls "$TASK_DIR"/qa-gate-*.json 2>/dev/null | wc -l | tr -d ' ')
echo "质量门禁: $qagate_count 份输出"
latest_qagate=$(ls -t "$TASK_DIR"/qa-gate-*.json 2>/dev/null | head -1)
if [[ -n "$latest_qagate" ]] && [[ -f "$latest_qagate" ]]; then
qagate_verdict=$(grep -o '"verdict"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_qagate" | head -1 | sed 's/.*:"//;s/"$//')
qagate_score=$(grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' "$latest_qagate" | head -1 | sed 's/.*://')
qagate_round=$(grep -o '"round"[[:space:]]*:[[:space:]]*[0-9]*' "$latest_qagate" | head -1 | sed 's/.*://')
echo " 最新: [$qagate_verdict] score=$qagate_score round=$qagate_round"
fi
步骤 2.5: UI 验证详细报告
TASK_DIR="$ROOT/.snsplay/task"
echo ""
echo "=== UI 验证详细报告 ==="
if [[ "$uiv_count" -eq 0 ]] 2>/dev/null || [[ ! -f "$(ls -t "$TASK_DIR"/ui-verify-*.json 2>/dev/null | head -1)" ]]; then
echo "无 UI 验证产物"
else
snapshot_count=$(ls "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"snapshot"' 2>/dev/null | wc -l | tr -d ' ')
verify_count=$(ls "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"verify"' 2>/dev/null | wc -l | tr -d ' ')
reproduce_count=$(ls "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"reproduce"' 2>/dev/null | wc -l | tr -d ' ')
audit_count=$(ls "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"audit"' 2>/dev/null | wc -l | tr -d ' ')
echo "模式分布: snapshot=$snapshot_count verify=$verify_count reproduce=$reproduce_count audit=$audit_count"
latest_verify=$(ls -t "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"verify"' 2>/dev/null | head -1)
if [[ -n "$latest_verify" ]] && [[ -f "$latest_verify" ]]; then
severity=$(grep -o '"severity"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_verify" | head -1 | sed 's/.*:"//;s/"$//')
total_changes=$(grep -o '"total_changes"[[:space:]]*:[[:space:]]*[0-9]*' "$latest_verify" | head -1 | sed 's/.*://')
echo ""
echo "最新差异验证:"
echo " 严重度: ${severity:-unknown}"
echo " 变更总数: ${total_changes:-0}"
if [[ "$severity" == "significant" ]] || [[ "$severity" == "moderate" ]]; then
echo " ⚠ 存在需关注的页面变更"
fi
fi
latest_repro=$(ls -t "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"reproduce"' 2>/dev/null | head -1)
if [[ -n "$latest_repro" ]] && [[ -f "$latest_repro" ]]; then
reproducible=$(grep -o '"reproducible"[[:space:]]*:[[:space:]]*[a-z]*' "$latest_repro" | head -1 | sed 's/.*://')
echo ""
echo "最新 Bug 复现:"
echo " 可复现: ${reproducible:-unknown}"
fi
latest_audit=$(ls -t "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"audit"' 2>/dev/null | head -1)
if [[ -n "$latest_audit" ]] && [[ -f "$latest_audit" ]]; then
health_val=$(grep -o '"overall_health"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_audit" | head -1 | sed 's/.*:"//;s/"$//')
critical=$(grep -o '"critical_issues"[[:space:]]*:[[:space:]]*[0-9]*' "$latest_audit" | head -1 | sed 's/.*://')
echo ""
echo "最新审计健康度:"
echo " 整体: ${health_val:-unknown}"
echo " 关键问题: ${critical:-0}"
if [[ "$health_val" == "poor" ]] || [[ -n "$critical" ]] && [[ "$critical" -gt 0 ]] 2>/dev/null; then
echo " ⚠ 页面健康度需关注"
fi
fi
if [[ -f "$TASK_DIR/ui-verify-baseline.json" ]]; then
baseline_ts=$(grep -o '"timestamp"[[:space:]]*:[[:space:]]*"[^"]*"' "$TASK_DIR/ui-verify-baseline.json" | head -1 | sed 's/.*:"//;s/"$//')
echo ""
echo "基线: 已保存 (${baseline_ts:-unknown})"
else
echo ""
echo "基线: 未建立(运行 /sns-workflow:ui-verify --snapshot)"
fi
fi
步骤 3: 成功率与重试统计
TASK_DIR="$ROOT/.snsplay/task"
echo ""
echo "=== 成功率与重试 ==="
if [[ -f "$TASK_DIR/impl-result.json" ]]; then
impl_status=$(grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' "$TASK_DIR/impl-result.json" | head -1 | sed 's/.*:"//;s/"$//')
echo "实现结果: $impl_status"
test_pass=$(grep -o '"tests_passed"[[:space:]]*:[[:space:]]*[0-9]*' "$TASK_DIR/impl-result.json" | head -1 | sed 's/.*://')
test_fail=$(grep -o '"tests_failed"[[:space:]]*:[[:space:]]*[0-9]*' "$TASK_DIR/impl-result.json" | head -1 | sed 's/.*://')
[[ -n "$test_pass" ]] && echo " 测试通过: $test_pass"
[[ -n "$test_fail" ]] && echo " 测试失败: $test_fail"
else
echo "实现结果: 无"
fi
echo ""
echo "修订轮次:"
for manifest in "$TASK_DIR"/user-story/manifest.json "$TASK_DIR"/plan/manifest.json; do
if [[ -f "$manifest" ]]; then
rev=$(grep -o '"revision_number"[[:space:]]*:[[:space:]]*[0-9]*' "$manifest" | head -1 | sed 's/.*://')
name=$(basename "$(dirname "$manifest")")
echo " $name: ${rev:-0} 轮"
fi
done
impl_steps=$(ls "$TASK_DIR"/impl-steps/impl-step-*.json 2>/dev/null | wc -l | tr -d ' ')
echo " 实现步骤: $impl_steps"
步骤 4: 调试日志分析
LOG_DIR="$ROOT/.snsplay"
LOG_FILE="$LOG_DIR/sns-workflow.log"
echo ""
echo "=== 调试日志 ==="
if [[ ! -f "$LOG_FILE" ]]; then
echo "日志文件不存在(debug 模式未启用)"
echo "启用: 在 ~/.snsplay/config.json 设置 { \"debug\": true }"
else
log_size=$(wc -c < "$LOG_FILE" | tr -d ' ')
log_lines=$(wc -l < "$LOG_FILE" | tr -d ' ')
echo "日志大小: $((log_size / 1024)) KB"
echo "日志行数: $log_lines"
echo ""
echo "事件分布:"
grep -o '\[[^]]*\]' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | while read count event; do
echo " $event: $count"
done
error_count=$(grep -c 'error\|FAILED\|Error' "$LOG_FILE" 2>/dev/null || echo "0")
if [[ "$error_count" -gt 0 ]]; then
echo ""
echo "错误 ($error_count 项):"
grep 'error\|FAILED\|Error' "$LOG_FILE" 2>/dev/null | tail -5 | while IFS= read -r line; do
echo " $line"
done
fi
fi
步骤 4.5: 技能执行日志分析
EXEC_LOG_DIR="${HOME}/.sns-workflow"
EXEC_LOG_FILE="$EXEC_LOG_DIR/skill-executions.log"
echo ""
echo "=== 技能执行日志 ==="
if [[ ! -f "$EXEC_LOG_FILE" ]]; then
echo "技能执行日志不存在(首次执行技能后将生成)"
else
exec_lines=$(wc -l < "$EXEC_LOG_FILE" | tr -d ' ')
exec_size=$(du -h "$EXEC_LOG_FILE" | cut -f1)
echo "日志大小: $exec_size ($exec_lines 条记录)"
echo ""
echo "技能调用排行:"
grep '"action":"start"' "$EXEC_LOG_FILE" 2>/dev/null | grep -o '"skill":"[^"]*"' | sed 's/"skill":"//;s/"//' | sort | uniq -c | sort -rn | head -10 | while read count skill; do
success_count=$(grep "\"skill\":\"$skill\"" "$EXEC_LOG_FILE" 2>/dev/null | grep -c '"action":"end"')
error_count=$(grep "\"skill\":\"$skill\"" "$EXEC_LOG_FILE" 2>/dev/null | grep -c '"action":"error"')
echo " $skill: 调用 $count 次 (成功 $success_count, 错误 $error_count)"
done
recent_errors=$(grep '"action":"error"' "$EXEC_LOG_FILE" 2>/dev/null | tail -3)
if [[ -n "$recent_errors" ]]; then
echo ""
echo "最近错误:"
echo "$recent_errors" | while IFS= read -r err_line; do
err_skill=$(echo "$err_line" | grep -o '"skill":"[^"]*"' | sed 's/"skill":"//;s/"//')
err_msg=$(echo "$err_line" | grep -o '"error":"[^"]*"' | sed 's/"error":"//;s/"//')
echo " [$err_skill] $err_msg"
done
fi
fi
步骤 5: CLI 错误追踪
TRACE_FILE="$ROOT/.snsplay/task/cli_trace.log"
echo ""
echo "=== CLI 错误追踪 ==="
if [[ ! -f "$TRACE_FILE" ]]; then
echo "无 CLI 追踪记录"
else
trace_lines=$(wc -l < "$TRACE_FILE" | tr -d ' ')
echo "追踪记录: $trace_lines 行"
auth_errors=$(grep -ci 'auth\|login\|credential' "$TRACE_FILE" 2>/dev/null || echo "0")
timeout_errors=$(grep -ci 'timeout\|timed out\|ETIMEDOUT' "$TRACE_FILE" 2>/dev/null || echo "0")
not_found=$(grep -ci 'not found\|ENOENT\|command not found' "$TRACE_FILE" 2>/dev/null || echo "0")
terminal_errors=$(grep -ci 'stdin is not a terminal\|TTY' "$TRACE_FILE" 2>/dev/null || echo "0")
echo ""
echo "错误分类:"
[[ "$auth_errors" -gt 0 ]] && echo " 认证错误: $auth_errors"
[[ "$timeout_errors" -gt 0 ]] && echo " 超时: $timeout_errors"
[[ "$not_found" -gt 0 ]] && echo " 命令未找到: $not_found"
[[ "$terminal_errors" -gt 0 ]] && echo " 终端错误: $terminal_errors"
if [[ "$auth_errors" -eq 0 ]] && [[ "$timeout_errors" -eq 0 ]] && [[ "$not_found" -eq 0 ]] && [[ "$terminal_errors" -eq 0 ]]; then
echo " 无分类错误"
fi
fi
步骤 6: 输出汇总报告
echo ""
echo "=== 可观测性汇总 ==="
health="healthy"
issues=""
if [[ -f "$TASK_DIR/impl-result.json" ]]; then
impl_status=$(grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' "$TASK_DIR/impl-result.json" | head -1 | sed 's/.*:"//;s/"$//')
[[ "$impl_status" == "failed" ]] && health="degraded" && issues="$issues 实现失败\n"
[[ "$impl_status" == "partial" ]] && health="warning" && issues="$issues 部分实现\n"
fi
if [[ -f "$TASK_DIR/review-findings-to-fix.json" ]]; then
health="warning"
issues="$issues 待修复审核问题\n"
fi
latest_review=$(ls -t "$TASK_DIR"/review-*.json 2>/dev/null | head -1)
if [[ -n "$latest_review" ]]; then
review_status=$(grep -o '"overall_status"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_review" | head -1 | sed 's/.*:"//;s/"$//')
[[ "$review_status" == "needs_changes" ]] && health="warning" && issues="$issues 最新审查待修复\n"
fi
latest_verify=$(ls -t "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"verify"' 2>/dev/null | head -1)
if [[ -n "$latest_verify" ]] && [[ -f "$latest_verify" ]]; then
sev=$(grep -o '"severity"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_verify" | head -1 | sed 's/.*:"//;s/"$//')
[[ "$sev" == "significant" ]] && health="degraded" && issues="$issues UI 差异严重 (significant)\n"
[[ "$sev" == "moderate" ]] && [[ "$health" == "healthy" ]] && health="warning" && issues="$issues UI 差异中等 (moderate)\n"
fi
latest_audit=$(ls -t "$TASK_DIR"/ui-verify-*.json 2>/dev/null | xargs grep -l '"mode"[[:space:]]*:[[:space:]]*"audit"' 2>/dev/null | head -1)
if [[ -n "$latest_audit" ]] && [[ -f "$latest_audit" ]]; then
audit_health=$(grep -o '"overall_health"[[:space:]]*:[[:space:]]*"[^"]*"' "$latest_audit" | head -1 | sed 's/.*:"//;s/"$//')
[[ "$audit_health" == "poor" ]] && health="degraded" && issues="$issues 页面审计健康度差 (poor)\n"
[[ "$audit_health" == "fair" ]] && [[ "$health" == "healthy" ]] && health="warning" && issues="$issues 页面审计健康度一般 (fair)\n"
fi
BASELINE="$TASK_DIR/drift-baseline.json"
if [[ -f "$BASELINE" ]]; then
drift_score=$(grep -o '"total_score"[[:space:]]*:[[:space:]]*[0-9]*' "$BASELINE" | head -1 | sed 's/.*://' || echo "")
drift_grade=$(grep -o '"grade"[[:space:]]*:[[:space:]]*"[^"]*"' "$BASELINE" | head -1 | sed 's/.*:"//;s/"$//' || echo "")
if [[ -n "$drift_grade" ]] && [[ "$drift_grade" == "D" ]]; then
health="degraded" && issues="$issues 漂移扫描等级 D (${drift_score:-?}/100)\n"
fi
fi
if [[ -f "$TRACE_FILE" ]]; then
trace_errors=$(grep -ci 'error\|failed' "$TRACE_FILE" 2>/dev/null || echo "0")
[[ "$trace_errors" -gt 10 ]] && health="degraded" && issues="$issues CLI 错误过多 ($trace_errors)\n"
fi
echo "系统状态: $health"
if [[ -n "$issues" ]]; then
echo "问题:"
echo -e "$issues"
else
echo " 无问题"
fi
echo "数据目录: $TASK_DIR"
echo "日志文件: ${LOG_FILE:-未启用}"
sns_skill_end "success"