This adds the full Atlas (总助 Claw / 老板视角项目执行雷达) scaffolding as a sibling profile to the existing Vega general-purpose assistant. All Atlas content lives under atlas/ to keep the existing top-level skeleton intact. What's included: - atlas/IDENTITY.md, SOUL.md, USER.md, AGENTS.md, MEMORY.md, BOOTSTRAP.md, HEARTBEAT.md, TOOLS.md (+ zh-CN mirrors) — full OpenClaw 8-piece set matching the zero-cca convention - atlas/skills/ — 6 sub-skills with frontmatter: claw-email-parser / claw-project-tracker / claw-people-observer / claw-customer-radar / claw-boss-distiller / claw-report-writer - atlas/skills/claw-boss-distiller/ — adapter notes for nuwa-skill, 5-layer boss_skill seed template (23 rules across Expression DNA / Mental Models / Decision Heuristics / Anti-Patterns / Honest Boundaries), and a complete synthetic distillation demo (10 input emails -> validated 5-layer output) - atlas/mcp-tools/email-extractor/ — Python implementation of stages 1-3 (fetch + decode + dequote), 7 pytest tests passing, CLI: atlas-extract - atlas/state-schemas/ — formal JSON schemas for project / person / customer cards with the no-employee-rating hard constraint baked in - atlas/client-deck/ — 2-page client-facing pitch document - autopilots/atlas-*.yaml — 5 autopilot configs (daily / weekly / monthly / quarterly + andon event-triggered) for a future Multica-side scheduler Notes: - nuwa-skill (MIT, https://github.com/alchaincyf/nuwa-skill) NOT vendored; fetch at deploy time via instructions in atlas/skills/claw-boss-distiller/upstream/README.md - Vega-side prompts/skills/tools/autopilots/docs scaffold left untouched - Top-level README.md updated with a brief Atlas pointer; rest preserved
113 lines
3.9 KiB
Python
113 lines
3.9 KiB
Python
"""Command-line entry point.
|
|
|
|
Three modes:
|
|
|
|
atlas-extract imap --host imap.gmail.com --user X --password Y --state-dir ./state
|
|
atlas-extract eml --input ./fixtures/sample.eml --state-dir ./state
|
|
atlas-extract dir --input-dir ./test-emails --state-dir ./state
|
|
|
|
Suitable for V0 dev + the demo flow. Production wraps this in an MCP server.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from typing import Iterator
|
|
|
|
import click
|
|
|
|
from .fetch import FetchedRaw, fetch_imap
|
|
from .pipeline import run_on_raws
|
|
|
|
|
|
@click.group()
|
|
def main() -> None:
|
|
"""Atlas / 总助 Claw — email extractor V0 (Stages 1-3)."""
|
|
|
|
|
|
@main.command("imap")
|
|
@click.option("--host", required=True)
|
|
@click.option("--port", default=993, type=int)
|
|
@click.option("--user", "username", required=True, envvar="ATLAS_IMAP_USER")
|
|
@click.option("--password", required=True, envvar="ATLAS_IMAP_PASSWORD")
|
|
@click.option("--folder", "folders", multiple=True, default=["INBOX", "Sent"])
|
|
@click.option("--state-dir", required=True, type=click.Path(path_type=Path))
|
|
@click.option(
|
|
"--since-days",
|
|
default=365,
|
|
type=int,
|
|
help="On cold start, only pull messages newer than N days.",
|
|
)
|
|
@click.option("--max-per-run", default=5000, type=int)
|
|
def imap_cmd(host, port, username, password, folders, state_dir, since_days, max_per_run) -> None:
|
|
"""Pull from a real IMAP account, run stages 1-3, write JSON."""
|
|
since = datetime.now(timezone.utc) - timedelta(days=since_days)
|
|
state_dir = state_dir.resolve()
|
|
state_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
raws = fetch_imap(
|
|
host=host,
|
|
port=port,
|
|
username=username,
|
|
password=password,
|
|
folders=list(folders),
|
|
state_dir=state_dir,
|
|
since=since,
|
|
max_per_run=max_per_run,
|
|
)
|
|
|
|
summary = run_on_raws(raws, state_dir)
|
|
click.echo(json.dumps(summary, ensure_ascii=False, indent=2))
|
|
|
|
|
|
@main.command("eml")
|
|
@click.option("--input", "eml_path", required=True, type=click.Path(exists=True, path_type=Path))
|
|
@click.option("--state-dir", required=True, type=click.Path(path_type=Path))
|
|
def eml_cmd(eml_path: Path, state_dir: Path) -> None:
|
|
"""Single .eml file → run stages 1-3."""
|
|
state_dir = state_dir.resolve()
|
|
raws = _eml_iter([eml_path])
|
|
summary = run_on_raws(raws, state_dir)
|
|
click.echo(json.dumps(summary, ensure_ascii=False, indent=2))
|
|
|
|
|
|
@main.command("dir")
|
|
@click.option("--input-dir", required=True, type=click.Path(exists=True, file_okay=False, path_type=Path))
|
|
@click.option("--state-dir", required=True, type=click.Path(path_type=Path))
|
|
def dir_cmd(input_dir: Path, state_dir: Path) -> None:
|
|
"""Directory of .eml/.txt files → run stages 1-3."""
|
|
state_dir = state_dir.resolve()
|
|
paths = sorted([p for p in input_dir.rglob("*") if p.is_file() and p.suffix.lower() in {".eml", ".txt"}])
|
|
raws = _eml_iter(paths)
|
|
summary = run_on_raws(raws, state_dir)
|
|
click.echo(json.dumps(summary, ensure_ascii=False, indent=2))
|
|
|
|
|
|
def _eml_iter(paths: list[Path]) -> Iterator[FetchedRaw]:
|
|
for i, p in enumerate(paths, start=1):
|
|
raw_bytes = p.read_bytes()
|
|
# If it's a .txt without proper MIME headers, wrap minimally so decode doesn't choke
|
|
if p.suffix.lower() == ".txt" and not raw_bytes.lstrip().startswith(b"From:"):
|
|
raw_bytes = (
|
|
b"From: unknown@local\r\nTo: unknown@local\r\nSubject: "
|
|
+ p.stem.encode("utf-8", errors="replace")
|
|
+ b"\r\nMessage-ID: <local-"
|
|
+ str(i).encode()
|
|
+ b"@atlas-eml-cli>\r\n\r\n"
|
|
+ raw_bytes
|
|
)
|
|
yield FetchedRaw(
|
|
account=os.environ.get("ATLAS_LOCAL_ACCOUNT", "local"),
|
|
folder="local",
|
|
uid=str(i),
|
|
internal_date=datetime.fromtimestamp(p.stat().st_mtime, tz=timezone.utc),
|
|
raw_mime=raw_bytes,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|