with one click
word-analysis
Word (.docx/.doc) 文档全量解析。覆盖:正文/段落文本提取、表格数据提取、高亮/颜色格式读取、多文件汇总对比、嵌入图片转 caption。
Menu
Word (.docx/.doc) 文档全量解析。覆盖:正文/段落文本提取、表格数据提取、高亮/颜色格式读取、多文件汇总对比、嵌入图片转 caption。
Standard and fast PPT pipeline. All LLM / VLM / T2I calls are wrapped in a single CLI entry (scripts/run_stage.py). The main agent's job is simple: emit ONE shell command per stage, never write loops, never write prompts. Standard mode plans thoroughly with a style preview checkpoint, web research, and image search for polished, delivery-ready presentations. Fast mode builds a complete draft immediately with autonomous decisions, then provides structured refinement suggestions so the user can iterate quickly. Supports AI-generated infographics (U1) for diagrams and flowcharts, web image search (Serper) for real photos, and ECharts for data charts.
PDF 文档解析。自动区分文字型 PDF 与扫描型 PDF,覆盖:文本/表格提取、多页全量扫描、嵌入图表 caption、单位感知数值计算。
PPT (.pptx/.ppt) 全量解析。覆盖:所有 slide 文本/表格/图表提取、嵌入图片 caption、纯图片 slide 渲染识别、数据标签提取。
Word / PDF / PPT 文档解析与数据分析引擎。覆盖三类文件格式的全量提取、表格数值化、图表理解与跨文档汇总分析。**遇到以下任一情况就主动使用本 skill**:①用户上传或指定了 .docx / .doc / .pdf / .pptx / .ppt 文件并要求分析、提取或统计其中内容;②用户出现触发词:Word分析 / PDF解析 / PPT提取 / 文档分析 / 报告解析 / 幻灯片分析 / 发票提取 / 合同分析 / 文档统计 / 错别字 / 语病 / 字号检查 / 简历分析 / 多文档对比;③任务涉及从文档中提取表格、数值、图表、格式(颜色/高亮/字号)、组织架构、时间线等结构化信息。仅不用于:Excel/CSV 数据分析(使用 sn-da-excel-workflow)、纯图片分析(使用 sn-da-image-caption)。
Creative-mode PPT pipeline. One full-page 16:9 PNG per slide. LLM / VLM calls go through sn-ppt-standard/lib/model_client.py (shared thin client). Text-to-image (the actual png rendering) goes through sn-image-base/scripts/sn_agent_runner.py. Falls back to web image search when T2I generation fails. Expects task_pack.json + info_pack.json already written by sn-ppt-entry.
Entry point for PPT generation. Asks the user to choose a mode (fast, standard, or creative), then collects role / audience / scene / page_count as needed. For standard mode, also asks how images should be sourced (AI generation, web search, or none) and whether charts should use AI-generated infographics or ECharts. Parses uploaded pdf/docx/md/txt files, produces task_pack.json + info_pack.json in a new deck_dir, then dispatches to sn-ppt-creative or sn-ppt-standard. Fast mode skips optional questions and gets straight to building. Use when the user asks to make a PPT / presentation / 演示 / PPT.
| name | word-analysis |
| description | Word (.docx/.doc) 文档全量解析。覆盖:正文/段落文本提取、表格数据提取、高亮/颜色格式读取、多文件汇总对比、嵌入图片转 caption。 |
from docx import Document
import os
# python-docx is available; for .doc (old format) convert via libreoffice first
def load_doc(path):
"""Load .docx directly; convert .doc to .docx first if needed."""
if path.lower().endswith('.doc'):
import subprocess
out_dir = os.path.dirname(path)
subprocess.run(
['libreoffice', '--headless', '--convert-to', 'docx', '--outdir', out_dir, path],
check=True, capture_output=True
)
path = path.rsplit('.', 1)[0] + '.docx'
return Document(path)
def extract_full_text(doc_path):
"""Extract all text: paragraphs + table cells, in document order."""
doc = load_doc(doc_path)
lines = []
# Iterate paragraphs and tables in body order
from docx.oxml.ns import qn
for block in doc.element.body:
tag = block.tag.split('}')[-1]
if tag == 'p':
# Paragraph
from docx.text.paragraph import Paragraph
para = Paragraph(block, doc)
text = para.text.strip()
if text:
lines.append(text)
elif tag == 'tbl':
# Table
from docx.table import Table
tbl = Table(block, doc)
for row in tbl.rows:
row_text = '\t'.join(cell.text.strip() for cell in row.cells)
if row_text.strip():
lines.append(row_text)
return '\n'.join(lines)
# Usage
text = extract_full_text("/mnt/data/doc.docx")
print(text[:2000]) # preview first 2000 chars
import pandas as pd
def extract_all_tables(doc_path):
"""Extract all tables from a Word document as list of DataFrames."""
doc = load_doc(doc_path)
tables = []
for i, tbl in enumerate(doc.tables):
rows = []
for row in tbl.rows:
rows.append([cell.text.strip() for cell in row.cells])
if not rows:
continue
# Use first row as header if it looks like a header
df = pd.DataFrame(rows[1:], columns=rows[0]) if rows else pd.DataFrame()
tables.append((i, df))
print(f"Table {i}: {df.shape[0]} rows × {df.shape[1]} cols")
print(df.head(3))
return tables
# Usage
tables = extract_all_tables("/mnt/data/doc.docx")
Some questions require reading cell background color or text highlight color (e.g., "标黄的行", "红色文字"). Use XML-level access:
from docx import Document
from docx.oxml.ns import qn
from lxml import etree
def get_paragraph_highlight(para):
"""Return highlight color name of first run, or None."""
for run in para.runs:
rPr = run._r.find(qn('w:rPr'))
if rPr is not None:
hl = rPr.find(qn('w:highlight'))
if hl is not None:
return hl.get(qn('w:val')) # e.g. 'yellow', 'cyan', 'red'
return None
def get_table_cell_shading(cell):
"""Return background color hex of a table cell, or None."""
tcPr = cell._tc.find(qn('w:tcPr'))
if tcPr is not None:
shd = tcPr.find(qn('w:shd'))
if shd is not None:
return shd.get(qn('w:fill')) # hex color, e.g. 'FFFF00'
return None
# Example: find all highlighted paragraphs
def find_highlighted_rows(doc_path, color='yellow'):
doc = load_doc(doc_path)
highlighted = []
for i, para in enumerate(doc.paragraphs):
hl = get_paragraph_highlight(para)
if hl == color or (color == 'yellow' and hl in ('yellow', 'FFFF00')):
highlighted.append((i, para.text))
return highlighted
# For table cells with yellow background:
def find_highlighted_table_cells(doc_path, fill_colors=('FFFF00', 'FFD700')):
doc = load_doc(doc_path)
results = []
for t_idx, tbl in enumerate(doc.tables):
for r_idx, row in enumerate(tbl.rows):
for c_idx, cell in enumerate(row.cells):
color = get_table_cell_shading(cell)
if color and color.upper() in fill_colors:
results.append({
'table': t_idx, 'row': r_idx, 'col': c_idx,
'color': color, 'text': cell.text.strip()
})
return results
When the user asks about "these files" or the input is a directory:
def process_all_docs(file_list, extractor_fn):
"""Apply extractor to all files and aggregate results."""
all_results = []
for path in file_list:
print(f"\n=== Processing: {os.path.basename(path)} ===")
try:
result = extractor_fn(path)
all_results.append({'file': os.path.basename(path), 'data': result})
except Exception as e:
print(f" ERROR: {e}")
return all_results
# Example: extract text from all .docx in a directory
doc_files = [f for f in all_files if f.lower().endswith(('.docx', '.doc'))]
results = process_all_docs(doc_files, extract_full_text)
When a Word doc contains embedded images (charts, screenshots):
import zipfile, io, subprocess, json
CAPTION = "/path/to/skills/sn-da-image-caption/scripts/caption.py"
def extract_and_caption_images(doc_path, prompt=None):
"""Extract all images from .docx and caption each one."""
# .docx is a ZIP archive; images are in word/media/
results = []
with zipfile.ZipFile(doc_path, 'r') as z:
media_files = [n for n in z.namelist() if n.startswith('word/media/')]
for media in media_files:
ext = os.path.splitext(media)[-1].lower()
if ext not in ('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.wmf', '.emf'):
continue
# Save to temp
tmp_path = f"/tmp/{os.path.basename(media)}"
with z.open(media) as src, open(tmp_path, 'wb') as dst:
dst.write(src.read())
# Caption
cmd = ["python3", CAPTION, tmp_path, "--json"]
if prompt:
cmd += ["--prompt", prompt]
r = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if r.returncode == 0:
desc = json.loads(r.stdout).get("description", "")
results.append({'image': media, 'caption': desc})
print(f" {media}: {desc[:100]}...")
else:
print(f" {media}: caption failed — {r.stderr[:80]}")
return results
from docx.shared import Pt
def check_font_sizes(doc_path):
doc = load_doc(doc_path)
issues = []
for i, para in enumerate(doc.paragraphs):
for run in para.runs:
size = run.font.size
size_pt = size.pt if size else None
# Also check style-level font
if size_pt is None:
style_size = run.style.font.size if run.style else None
size_pt = style_size.pt if style_size else None
issues.append({'para': i, 'text': run.text[:30], 'size_pt': size_pt})
return issues
def find_keyword(doc_path, keyword):
text = extract_full_text(doc_path)
idx = text.find(keyword)
if idx >= 0:
context = text[max(0, idx-100):idx+200]
print(f"Found '{keyword}' at pos {idx}:\n{context}")
else:
print(f"'{keyword}' not found. Try broader search.")
# Try case-insensitive or partial match
for kw in keyword.split():
if kw in text:
print(f" Partial match for '{kw}'")
| Pitfall | Fix |
|---|---|
Only read doc.paragraphs, miss tables | Use the body-order iterator in Method 1 |
| Single file when input is multi-file | Check os.path.isdir(), iterate all |
| Highlighted cells not detected | Use XML-level w:shd / w:highlight (Method 3) |
.doc format fails to open | Convert to .docx via libreoffice (Method 0) |
| Embedded charts look empty | Extract images from ZIP, caption each (Method 5) |
| Font size is None | Check both run-level and style-level (Method for font check) |