Manus에서 모든 스킬 실행
원클릭으로
원클릭으로
원클릭으로 Manus에서 모든 스킬 실행
시작하기prediction-accuracy
Review calibration metrics, accuracy trends, and model confidence levels
스타0
포크0
업데이트2026년 3월 23일 07:31
SKILL.md
readonly메뉴
Review calibration metrics, accuracy trends, and model confidence levels
Deep analysis of a specific segment including roles, topics, and past performance
Display high-level summary of audience segments, sizes, topics, and activity
Score 3 content variations and rank them with comparative analysis
Core skill for scoring a draft post with per-segment predictions and reasoning
| name | prediction-accuracy |
| description | Review calibration metrics, accuracy trends, and model confidence levels |
| version | 1.0.0 |
Purpose: Show how accurate predictions have become and identify where the model needs more data.
{
"min_predictions_for_calibration": "integer (optional, default 10)"
}
{
"overall_status": {
"total_scored_posts": 18,
"total_tracked_results": 4,
"overall_mae": 4.2,
"overall_rmse": 5.1,
"global_calibration": {
"slope": 0.98,
"intercept": 0.3
},
"confidence_level": "low",
"next_milestone": "Need 10 predictions per segment for medium confidence"
},
"confidence_by_segment": [
{
"segment": "Technical Leaders",
"predictions_tracked": 2,
"confidence": "low",
"mae": 3.1,
"rmse": 3.8,
"calibration_slope": 1.02,
"calibration_intercept": -0.5,
"interpretation": "Good performance so far, but need 8 more tracked predictions"
}
],
"topic_calibration_details": [
{
"topic": "AI/ML",
"predictions_tracked": 3,
"mae": 2.8,
"rmse": 3.1,
"segments_with_data": ["Technical Leaders", "Growth & Sales"],
"status": "good_confidence_for_tech"
}
],
"calibration_matrix": {
"Technical Leaders": {
"AI/ML": {
"count": 2,
"mae": 3.1,
"status": "need_8_more"
}
}
},
"accuracy_trends": {
"posts_1_to_5": {
"mae": 5.2,
"improvement": "baseline"
},
"posts_6_to_10": {
"mae": 4.1,
"improvement": "+21%"
}
},
"build_plan": [
{
"priority": "high",
"recommendation": "Post 5+ more about Leadership + Executives",
"rationale": "0 tracked predictions. High affinity topic but no calibration."
}
],
"recommendations": [
"You're on track. After 10-15 total posts with tracked results, medium confidence kicks in.",
"Post about: Leadership (0 data), Growth Hacking (0 data), Founder Stories (0 data)"
]
}
def get_prediction_accuracy(min_predictions=10):
"""Review prediction calibration and accuracy."""
conn = get_db_connection()
try:
cursor = conn.cursor()
# Step 1: Overall status
cursor.execute('SELECT COUNT(*) as count FROM predictions')
total_scored = cursor.fetchone()['count']
cursor.execute('SELECT COUNT(*) as count FROM predictions WHERE actual_reactions IS NOT NULL')
total_tracked = cursor.fetchone()['count']
# Calculate overall MAE/RMSE
cursor.execute('''
SELECT
AVG(ABS(predicted_reactions - actual_reactions)) as mae,
SQRT(AVG((predicted_reactions - actual_reactions) * (predicted_reactions - actual_reactions))) as rmse
FROM predictions
WHERE actual_reactions IS NOT NULL
''')
accuracy_row = cursor.fetchone()
overall_mae = accuracy_row['mae'] if accuracy_row['mae'] else 0.0
overall_rmse = accuracy_row['rmse'] if accuracy_row['rmse'] else 0.0
# Global calibration
cursor.execute('''
SELECT
AVG(calibration_slope) as avg_slope,
AVG(calibration_intercept) as avg_intercept
FROM prediction_accuracy
WHERE prediction_count >= ?
''', (min_predictions,))
calib_row = cursor.fetchone()
global_slope = calib_row['avg_slope'] if calib_row and calib_row['avg_slope'] else 1.0
global_intercept = calib_row['avg_intercept'] if calib_row and calib_row['avg_intercept'] else 0.0
# Overall confidence
avg_predictions = total_tracked / max(4, total_scored / 4) if total_scored > 0 else 0
if avg_predictions < min_predictions / 4:
confidence_level = 'low'
elif avg_predictions < min_predictions / 2:
confidence_level = 'medium'
else:
confidence_level = 'high'
overall_status = {
'total_scored_posts': total_scored,
'total_tracked_results': total_tracked,
'overall_mae': round(overall_mae, 1),
'overall_rmse': round(overall_rmse, 1),
'global_calibration': {
'slope': round(global_slope, 2),
'intercept': round(global_intercept, 2)
},
'confidence_level': confidence_level,
'next_milestone': f"Need {min_predictions - (total_tracked // 4)} more tracked predictions per segment"
}
# Step 2: Confidence by segment
cursor.execute('''
SELECT segment_name, prediction_count, mean_absolute_error, rmse,
calibration_slope, calibration_intercept
FROM prediction_accuracy
GROUP BY segment_name
ORDER BY prediction_count DESC
''')
confidence_by_segment = []
for row in cursor.fetchall():
if row['prediction_count'] == 0:
confidence = 'no_data'
elif row['prediction_count'] < min_predictions / 2:
confidence = 'low'
elif row['prediction_count'] < min_predictions:
confidence = 'medium'
else:
confidence = 'high'
interpretation = get_segment_interpretation(row['prediction_count'], min_predictions)
confidence_by_segment.append({
'segment': row['segment_name'],
'predictions_tracked': row['prediction_count'],
'confidence': confidence,
'mae': round(row['mean_absolute_error'] or 0.0, 1),
'rmse': round(row['rmse'] or 0.0, 1),
'calibration_slope': round(row['calibration_slope'] or 1.0, 2),
'calibration_intercept': round(row['calibration_intercept'] or 0.0, 2),
'interpretation': interpretation
})
# Step 3: Topic calibration details
cursor.execute('''
SELECT topic_name, prediction_count, mean_absolute_error, rmse
FROM prediction_accuracy
WHERE topic_name IS NOT NULL
ORDER BY prediction_count DESC
LIMIT 10
''')
topic_calibration = []
for row in cursor.fetchall():
segments_with_data = get_segments_for_topic(cursor, row['topic_name'])
status = 'need_data'
if row['prediction_count'] >= min_predictions:
status = 'high_confidence'
elif row['prediction_count'] >= min_predictions / 2:
status = 'medium_confidence'
elif row['prediction_count'] > 0:
status = 'building_confidence'
topic_calibration.append({
'topic': row['topic_name'],
'predictions_tracked': row['prediction_count'],
'mae': round(row['mean_absolute_error'] or 0.0, 1),
'rmse': round(row['rmse'] or 0.0, 1),
'segments_with_data': segments_with_data,
'status': status
})
# Step 4: Calibration matrix
cursor.execute('''
SELECT segment_name, topic_name, prediction_count, mean_absolute_error
FROM prediction_accuracy
WHERE segment_name IS NOT NULL AND topic_name IS NOT NULL
ORDER BY segment_name, prediction_count DESC
''')
calibration_matrix = {}
for row in cursor.fetchall():
seg = row['segment_name']
topic = row['topic_name']
if seg not in calibration_matrix:
calibration_matrix[seg] = {}
need_more = max(0, min_predictions - row['prediction_count'])
calibration_matrix[seg][topic] = {
'count': row['prediction_count'],
'mae': round(row['mean_absolute_error'] or 0.0, 1),
'status': f"need_{need_more}_more" if need_more > 0 else 'confident'
}
# Step 5: Accuracy trends
cursor.execute('''
SELECT
CASE
WHEN rowid <= 5 THEN 'posts_1_to_5'
WHEN rowid <= 10 THEN 'posts_6_to_10'
WHEN rowid <= 15 THEN 'posts_11_to_15'
ELSE 'posts_16+'
END as period,
AVG(ABS(predicted_reactions - actual_reactions)) as mae
FROM predictions
WHERE actual_reactions IS NOT NULL
GROUP BY period
ORDER BY period
''')
trends = {}
baseline_mae = None
for row in cursor.fetchall():
mae = round(row['mae'] or 0.0, 1)
if baseline_mae is None:
baseline_mae = mae
improvement = 'baseline'
else:
improvement = f"+{int((baseline_mae - mae) / baseline_mae * 100)}%"
trends[row['period']] = {
'mae': mae,
'improvement': improvement
}
# Step 6: Build plan
build_plan = generate_build_plan(cursor, min_predictions)
return {
'overall_status': overall_status,
'confidence_by_segment': confidence_by_segment,
'topic_calibration_details': topic_calibration,
'calibration_matrix': calibration_matrix,
'accuracy_trends': trends,
'build_plan': build_plan,
'recommendations': generate_recommendations(confidence_by_segment, total_tracked, min_predictions)
}
except Exception as e:
return {'error': f'Error: {str(e)}'}
finally:
conn.close()
def get_segment_interpretation(count, min_needed):
"""Interpret prediction count for a segment."""
if count == 0:
return f"No data yet. Need {min_needed} predictions to build calibration."
elif count < min_needed / 2:
return f"Early stage. Have {count}, need {min_needed - count} more."
elif count < min_needed:
return f"Getting there. Have {count}, need {min_needed - count} more for high confidence."
else:
return f"High confidence! Predictions are reliable ({count} predictions tracked)."
def get_segments_for_topic(cursor, topic_name):
"""Get segments that have prediction data for this topic."""
cursor.execute('''
SELECT DISTINCT segment_name
FROM prediction_accuracy
WHERE topic_name = ? AND prediction_count > 0
''', (topic_name,))
return [row['segment_name'] for row in cursor.fetchall()]
def generate_build_plan(cursor, min_predictions):
"""Generate a prioritized list of what to post next."""
cursor.execute('''
SELECT segment_name, topic_name, prediction_count
FROM prediction_accuracy
WHERE prediction_count < ?
ORDER BY prediction_count ASC
LIMIT 5
''', (min_predictions,))
plan = []
for row in cursor.fetchall():
need_more = min_predictions - row['prediction_count']
if need_more >= 8:
priority = 'high'
rationale = f"{row['prediction_count']} tracked. High affinity topic but no calibration."
elif need_more >= 5:
priority = 'medium'
rationale = f"{row['prediction_count']} tracked. Need more data for confidence."
else:
priority = 'low'
rationale = f"{row['prediction_count']} tracked. Almost there."
plan.append({
'priority': priority,
'segment': row['segment_name'],
'topic': row['topic_name'],
'recommendation': f"Post about {row['topic_name']} for {row['segment_name']}",
'need_more': need_more,
'rationale': rationale
})
return plan
def generate_recommendations(confidence_by_segment, total_tracked, min_predictions):
"""Generate actionable recommendations."""
recs = []
if total_tracked < 5:
recs.append("You're in cold-start phase. Post 5+ more times and track results. Calibration will improve prediction accuracy.")
elif total_tracked < min_predictions:
recs.append(f"Good progress! After {min_predictions - total_tracked} more tracked results, medium confidence kicks in.")
else:
recs.append("High confidence across most segments. Predictions are reliable.")
# Identify missing segment/topic combos
zero_segments = [s['segment'] for s in confidence_by_segment if s['predictions_tracked'] == 0]
if zero_segments:
recs.append(f"Build calibration for: {', '.join(zero_segments)}. Post content targeting these segments.")
recs.append("Timeline: 2 posts/week = medium confidence in 4-5 weeks, high confidence in 8+ weeks.")
return recs