assistant-claw/mcp-tools/email-extractor/atlas_extractor/cli.py
Atlas refactor bd0be97630 Refactor: drop Vega framing, promote Atlas to repo root
This repo IS Atlas (总助 Claw / 老板视角项目执行雷达). The earlier
two-profile framing (Atlas + Vega placeholder) was a misread — Vega is
the agent persona answering Multica issues, not the product. Vega has
no relationship to assistant-claw the product.

Changes:
- Move atlas/* to top-level (git mv preserves history)
- Remove empty Vega placeholders prompts/.gitkeep, tools/.gitkeep
- Delete atlas/ wrapper directory (now empty)
- Update path references in INTEGRATION-hermes.md, scripts/mirror-...sh,
  docs/decisions/0001-mirror-nuwa-skill.md
- Rewrite README.md as Atlas-only, remove dual-profile language

After this commit:
- Top-level OpenClaw 8 files (IDENTITY/SOUL/USER/AGENTS/TOOLS/MEMORY/
  BOOTSTRAP/HEARTBEAT + CLAUDE symlink + zh-CN mirrors)
- skills/{6 sub-skills + DESCRIPTION + README}
- mcp-tools/{spec + Python implementation}
- state-schemas/{project, person, customer + README}
- autopilots/{5 atlas-*.yaml}
- client-deck/, docs/decisions/, scripts/

The ~/.hermes/skills/atlas/ destination convention preserved (atlas as
a skill namespace on the operator's machine, distinct from source path).
2026-05-09 17:54:18 +08:00

113 lines
3.9 KiB
Python

"""Command-line entry point.
Three modes:
atlas-extract imap --host imap.gmail.com --user X --password Y --state-dir ./state
atlas-extract eml --input ./fixtures/sample.eml --state-dir ./state
atlas-extract dir --input-dir ./test-emails --state-dir ./state
Suitable for V0 dev + the demo flow. Production wraps this in an MCP server.
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Iterator
import click
from .fetch import FetchedRaw, fetch_imap
from .pipeline import run_on_raws
@click.group()
def main() -> None:
"""Atlas / 总助 Claw — email extractor V0 (Stages 1-3)."""
@main.command("imap")
@click.option("--host", required=True)
@click.option("--port", default=993, type=int)
@click.option("--user", "username", required=True, envvar="ATLAS_IMAP_USER")
@click.option("--password", required=True, envvar="ATLAS_IMAP_PASSWORD")
@click.option("--folder", "folders", multiple=True, default=["INBOX", "Sent"])
@click.option("--state-dir", required=True, type=click.Path(path_type=Path))
@click.option(
"--since-days",
default=365,
type=int,
help="On cold start, only pull messages newer than N days.",
)
@click.option("--max-per-run", default=5000, type=int)
def imap_cmd(host, port, username, password, folders, state_dir, since_days, max_per_run) -> None:
"""Pull from a real IMAP account, run stages 1-3, write JSON."""
since = datetime.now(timezone.utc) - timedelta(days=since_days)
state_dir = state_dir.resolve()
state_dir.mkdir(parents=True, exist_ok=True)
raws = fetch_imap(
host=host,
port=port,
username=username,
password=password,
folders=list(folders),
state_dir=state_dir,
since=since,
max_per_run=max_per_run,
)
summary = run_on_raws(raws, state_dir)
click.echo(json.dumps(summary, ensure_ascii=False, indent=2))
@main.command("eml")
@click.option("--input", "eml_path", required=True, type=click.Path(exists=True, path_type=Path))
@click.option("--state-dir", required=True, type=click.Path(path_type=Path))
def eml_cmd(eml_path: Path, state_dir: Path) -> None:
"""Single .eml file → run stages 1-3."""
state_dir = state_dir.resolve()
raws = _eml_iter([eml_path])
summary = run_on_raws(raws, state_dir)
click.echo(json.dumps(summary, ensure_ascii=False, indent=2))
@main.command("dir")
@click.option("--input-dir", required=True, type=click.Path(exists=True, file_okay=False, path_type=Path))
@click.option("--state-dir", required=True, type=click.Path(path_type=Path))
def dir_cmd(input_dir: Path, state_dir: Path) -> None:
"""Directory of .eml/.txt files → run stages 1-3."""
state_dir = state_dir.resolve()
paths = sorted([p for p in input_dir.rglob("*") if p.is_file() and p.suffix.lower() in {".eml", ".txt"}])
raws = _eml_iter(paths)
summary = run_on_raws(raws, state_dir)
click.echo(json.dumps(summary, ensure_ascii=False, indent=2))
def _eml_iter(paths: list[Path]) -> Iterator[FetchedRaw]:
for i, p in enumerate(paths, start=1):
raw_bytes = p.read_bytes()
# If it's a .txt without proper MIME headers, wrap minimally so decode doesn't choke
if p.suffix.lower() == ".txt" and not raw_bytes.lstrip().startswith(b"From:"):
raw_bytes = (
b"From: unknown@local\r\nTo: unknown@local\r\nSubject: "
+ p.stem.encode("utf-8", errors="replace")
+ b"\r\nMessage-ID: <local-"
+ str(i).encode()
+ b"@atlas-eml-cli>\r\n\r\n"
+ raw_bytes
)
yield FetchedRaw(
account=os.environ.get("ATLAS_LOCAL_ACCOUNT", "local"),
folder="local",
uid=str(i),
internal_date=datetime.fromtimestamp(p.stat().st_mtime, tz=timezone.utc),
raw_mime=raw_bytes,
)
if __name__ == "__main__":
main()