This adds the full Atlas (总助 Claw / 老板视角项目执行雷达) scaffolding as a sibling profile to the existing Vega general-purpose assistant. All Atlas content lives under atlas/ to keep the existing top-level skeleton intact. What's included: - atlas/IDENTITY.md, SOUL.md, USER.md, AGENTS.md, MEMORY.md, BOOTSTRAP.md, HEARTBEAT.md, TOOLS.md (+ zh-CN mirrors) — full OpenClaw 8-piece set matching the zero-cca convention - atlas/skills/ — 6 sub-skills with frontmatter: claw-email-parser / claw-project-tracker / claw-people-observer / claw-customer-radar / claw-boss-distiller / claw-report-writer - atlas/skills/claw-boss-distiller/ — adapter notes for nuwa-skill, 5-layer boss_skill seed template (23 rules across Expression DNA / Mental Models / Decision Heuristics / Anti-Patterns / Honest Boundaries), and a complete synthetic distillation demo (10 input emails -> validated 5-layer output) - atlas/mcp-tools/email-extractor/ — Python implementation of stages 1-3 (fetch + decode + dequote), 7 pytest tests passing, CLI: atlas-extract - atlas/state-schemas/ — formal JSON schemas for project / person / customer cards with the no-employee-rating hard constraint baked in - atlas/client-deck/ — 2-page client-facing pitch document - autopilots/atlas-*.yaml — 5 autopilot configs (daily / weekly / monthly / quarterly + andon event-triggered) for a future Multica-side scheduler Notes: - nuwa-skill (MIT, https://github.com/alchaincyf/nuwa-skill) NOT vendored; fetch at deploy time via instructions in atlas/skills/claw-boss-distiller/upstream/README.md - Vega-side prompts/skills/tools/autopilots/docs scaffold left untouched - Top-level README.md updated with a brief Atlas pointer; rest preserved
120 lines
3.6 KiB
Python
120 lines
3.6 KiB
Python
"""Stage 1: Fetch.
|
|
|
|
IMAP-based incremental fetcher. Persists `last_uid` per (account, folder)
|
|
in a JSON sidecar so re-runs only pull new messages.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Iterator
|
|
|
|
from imap_tools import MailBox, AND
|
|
|
|
|
|
@dataclass
|
|
class FetchedRaw:
|
|
account: str
|
|
folder: str
|
|
uid: str
|
|
internal_date: datetime
|
|
raw_mime: bytes
|
|
|
|
|
|
def _sync_state_path(state_dir: Path, account: str, folder: str) -> Path:
|
|
safe = f"{account}__{folder}".replace("/", "_").replace("@", "_at_")
|
|
return state_dir / f".sync__{safe}.json"
|
|
|
|
|
|
def _load_last_uid(state_dir: Path, account: str, folder: str) -> int | None:
|
|
p = _sync_state_path(state_dir, account, folder)
|
|
if not p.exists():
|
|
return None
|
|
try:
|
|
return int(json.loads(p.read_text(encoding="utf-8"))["last_uid"])
|
|
except (KeyError, ValueError, json.JSONDecodeError):
|
|
return None
|
|
|
|
|
|
def _save_last_uid(state_dir: Path, account: str, folder: str, last_uid: int) -> None:
|
|
p = _sync_state_path(state_dir, account, folder)
|
|
p.parent.mkdir(parents=True, exist_ok=True)
|
|
p.write_text(
|
|
json.dumps(
|
|
{
|
|
"account": account,
|
|
"folder": folder,
|
|
"last_uid": last_uid,
|
|
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
},
|
|
ensure_ascii=False,
|
|
indent=2,
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
|
|
def fetch_imap(
|
|
*,
|
|
host: str,
|
|
port: int,
|
|
username: str,
|
|
password: str,
|
|
folders: list[str],
|
|
state_dir: Path,
|
|
since: datetime | None = None,
|
|
max_per_run: int = 5000,
|
|
) -> Iterator[FetchedRaw]:
|
|
"""Yield raw MIME messages incrementally per folder.
|
|
|
|
Sync model: per (username, folder) we remember the highest UID seen.
|
|
On re-run we fetch UIDs strictly greater. First run may use `since`
|
|
to bound the cold-start window.
|
|
"""
|
|
with MailBox(host, port).login(username, password) as mailbox:
|
|
for folder in folders:
|
|
mailbox.folder.set(folder)
|
|
last_uid = _load_last_uid(state_dir, username, folder)
|
|
|
|
if last_uid is None:
|
|
# cold start
|
|
criteria = AND(date_gte=since.date()) if since else "ALL"
|
|
msgs = mailbox.fetch(
|
|
criteria=criteria,
|
|
bulk=True,
|
|
headers_only=False,
|
|
limit=max_per_run,
|
|
mark_seen=False,
|
|
)
|
|
else:
|
|
# incremental: UID > last_uid
|
|
# imap-tools UIDRange string form
|
|
msgs = mailbox.fetch(
|
|
f"UID {last_uid + 1}:*",
|
|
bulk=True,
|
|
headers_only=False,
|
|
limit=max_per_run,
|
|
mark_seen=False,
|
|
)
|
|
|
|
highest_seen = last_uid or 0
|
|
for m in msgs:
|
|
yield FetchedRaw(
|
|
account=username,
|
|
folder=folder,
|
|
uid=str(m.uid),
|
|
internal_date=m.date or datetime.now(timezone.utc),
|
|
raw_mime=m.obj.as_bytes(), # full MIME bytes
|
|
)
|
|
try:
|
|
if int(m.uid) > highest_seen:
|
|
highest_seen = int(m.uid)
|
|
except (TypeError, ValueError):
|
|
continue
|
|
|
|
if highest_seen and (last_uid is None or highest_seen > last_uid):
|
|
_save_last_uid(state_dir, username, folder, highest_seen)
|