with one click
spreadsheet-source-validated
Execute Python scripts for spreadsheet operations with mandatory source data validation and fallback protocols
Menu
Execute Python scripts for spreadsheet operations with mandatory source data validation and fallback protocols
Delegate tasks to OpenSpace — a full-stack autonomous worker for coding, DevOps, web research, and desktop automation, backed by an extensive MCP tool and skill library. Skills auto-improve through use, reducing token consumption over time. A cloud community lets agents share and collectively evolve reusable skills.
Incremental audio production with duration mismatch handling, adaptive stem extension, and pre-mix alignment verification
Audio production with diagnostic analysis, timecode parsing from documents, and verified export workflow
Incremental audio production with duration alignment handling, per-stem verification, and adaptive extension strategies
Step-by-step audio production with per-stem verification, timing alignment, and incremental quality gates
End-to-end audio production workflow with stems, effects, archiving, and verification
| name | spreadsheet-source-validated |
| description | Execute Python scripts for spreadsheet operations with mandatory source data validation and fallback protocols |
CRITICAL: Before attempting any spreadsheet operations, validate that your source data is accessible and complete.
import os
import sys
from pathlib import Path
def validate_source_data(source_paths):
"""Validate all required data sources before processing."""
missing = []
inaccessible = []
for path in source_paths:
p = Path(path)
if not p.exists():
missing.append(str(path))
elif not os.access(path, os.R_OK):
inaccessible.append(str(path))
if missing:
print(f"ERROR: Missing data sources: {', '.join(missing)}", file=sys.stderr)
return False
if inaccessible:
print(f"ERROR: Inaccessible data sources: {', '.join(inaccessible)}", file=sys.stderr)
return False
print(f"VALIDATED: {len(source_paths)} source(s) available")
return True
# Usage
sources = ['input.xlsx', 'reference_data.csv']
if not validate_source_data(sources):
sys.exit(1)
import pandas as pd
from openpyxl import load_workbook
def validate_spreadsheet_integrity(file_path, required_sheets=None, required_columns=None):
"""Check spreadsheet structure before processing."""
try:
# Check file is readable
wb = load_workbook(file_path, read_only=True)
# Verify required sheets exist
if required_sheets:
missing_sheets = [s for s in required_sheets if s not in wb.sheetnames]
if missing_sheets:
print(f"ERROR: Missing sheets: {missing_sheets}", file=sys.stderr)
wb.close()
return False
# Verify required columns (sample first sheet)
if required_columns:
ws = wb.active
headers = [cell.value for cell in ws[1]]
missing_cols = [c for c in required_columns if c not in headers]
if missing_cols:
print(f"ERROR: Missing columns: {missing_cols}", file=sys.stderr)
wb.close()
return False
wb.close()
print(f"VALIDATED: Spreadsheet structure OK")
return True
except Exception as e:
print(f"ERROR: Cannot validate spreadsheet: {str(e)}", file=sys.stderr)
return False
For data retrieved from APIs, websites, or external services:
import requests
from urllib3.exceptions import SSLError, MaxRetryError
def validate_external_source(url, timeout=30, max_retries=2):
"""Verify external data source is accessible."""
for attempt in range(max_retries + 1):
try:
response = requests.head(url, timeout=timeout, allow_redirects=True)
if response.status_code == 200:
print(f"VALIDATED: External source accessible ({url})")
return True
else:
print(f"WARNING: External source returned {response.status_code}", file=sys.stderr)
except (SSLError, MaxRetryError, requests.ConnectionError) as e:
if attempt == max_retries:
print(f"ERROR: External source inaccessible after {max_retries + 1} attempts: {url}", file=sys.stderr)
return False
print(f"RETRY {attempt + 1}/{max_retries}: {str(e)}", file=sys.stderr)
return False
Use validated Python execution for spreadsheet operations when:
openpyxl, pandas, or similar librariesWhen primary data sources are inaccessible:
ALTERNATIVE_SOURCES = {
'epa_water_data': [
'https://dataservices.epa.illinois.gov/swap', # Primary
'https://www.epa.gov/safewater/data-and-reports', # Federal fallback
'https://waterdata.usgs.gov/nwis', # USGS fallback
],
'financial_data': [
'internal_database.xlsx', # Primary
'backup_financial_data.csv', # Local backup
'request_from_stakeholder', # Manual acquisition
]
}
def try_alternative_sources(source_key):
"""Iterate through alternative sources until one succeeds."""
alternatives = ALTERNATIVE_SOURCES.get(source_key, [])
for i, source in enumerate(alternatives):
print(f"Attempting alternative {i + 1}/{len(alternatives)}: {source}")
if source.startswith('http'):
if validate_external_source(source):
return source
else:
if Path(source).exists():
print(f"SUCCESS: Alternative source found: {source}")
return source
print(f"ERROR: All alternatives exhausted for {source_key}", file=sys.stderr)
return None
def report_data_access_failure(source, error_type, alternatives_tried=0):
"""Standardized error reporting for data access failures."""
error_report = {
'timestamp': datetime.now().isoformat(),
'source': source,
'error_type': error_type,
'alternatives_tried': alternatives_tried,
'action_required': 'Manual data acquisition or source configuration update'
}
print("=" * 60, file=sys.stderr)
print("DATA ACCESS FAILURE REPORT", file=sys.stderr)
print("=" * 60, file=sys.stderr)
for key, value in error_report.items():
print(f"{key}: {value}", file=sys.stderr)
print("=" * 60, file=sys.stderr)
return error_report
The shell_agent tool can:
Direct run_shell with Python is more reliable because it:
.py files first avoids shell_agent parsing issues with heredocs# Step 1: Write validation script
cat > validate_sources.py << 'EOF'
import sys
from pathlib import Path
sources = ['input.xlsx', 'config.json']
missing = [s for s in sources if not Path(s).exists()]
if missing:
print(f"BLOCKED: Missing sources: {missing}", file=sys.stderr)
sys.exit(1)
print("VALIDATED: All sources available")
EOF
# Step 2: Run validation
python3 validate_sources.py || exit 1
# Step 3: Write processing script
cat > process_spreadsheet.py << 'EOF'
import openpyxl
# Your spreadsheet code here
EOF
# Step 4: Execute processing
python3 process_spreadsheet.py
# Step 5: Clean up (optional)
rm validate_sources.py
#!/usr/bin/env python3
"""
Complete workflow: Validate -> Process -> Report
"""
import sys
from pathlib import Path
from datetime import datetime
from openpyxl import load_workbook
def main():
# PHASE 1: Validation
print(f"[{datetime.now().isoformat()}] Starting validation...")
source_file = 'input_data.xlsx'
if not Path(source_file).exists():
print(f"ERROR: Source file not found: {source_file}", file=sys.stderr)
# Check for alternatives
for alt in ['backup_input.xlsx', 'data_backup.csv']:
if Path(alt).exists():
print(f"FALLBACK: Using alternative: {alt}")
source_file = alt
break
else:
print("ERROR: No alternative sources available", file=sys.stderr)
sys.exit(1)
# PHASE 2: Processing
print(f"[{datetime.now().isoformat()}] Processing {source_file}...")
try:
wb = load_workbook(source_file)
ws = wb.active
# Your operations here
for row in ws.iter_rows(min_row=2):
pass # Process data
wb.save('output.xlsx')
print(f"SUCCESS: Processed {ws.max_row - 1} rows")
except Exception as e:
print(f"ERROR: Processing failed: {str(e)}", file=sys.stderr)
sys.exit(1)
# PHASE 3: Cleanup
print(f"[{datetime.now().isoformat()}] Complete")
if __name__ == '__main__':
main()
.py file first, then execute via run_shell| Library | Best For |
|---|---|
openpyxl | Reading/writing .xlsx files, formatting, formulas |
pandas | Data manipulation, analysis, merging datasets |
xlrd | Reading older .xls files (read-only) |
xlsxwriter | Creating new .xlsx files with advanced formatting |
requests | Validating external API/data sources |
pathlib | Cross-platform file path validation |
Issue: Heredoc syntax fails with 'unknown error' when using shell_agent
.py file first, then execute it with python3 script.py. This pattern is significantly more reliable than inline heredoc execution when shell_agent is the executor.Issue: Source data validation fails
ls -laIssue: External data source inaccessible (SSL/proxy errors)
requests.get(url, verify=False)Issue: FileNotFoundError
Issue: PermissionError
Issue: MemoryError on large files
chunksize parameterIssue: Formatting not applying
.copy() for style objectsIssue: Data validation passes but processing fails