This repo IS Atlas (总助 Claw / 老板视角项目执行雷达). The earlier
two-profile framing (Atlas + Vega placeholder) was a misread — Vega is
the agent persona answering Multica issues, not the product. Vega has
no relationship to assistant-claw the product.
Changes:
- Move atlas/* to top-level (git mv preserves history)
- Remove empty Vega placeholders prompts/.gitkeep, tools/.gitkeep
- Delete atlas/ wrapper directory (now empty)
- Update path references in INTEGRATION-hermes.md, scripts/mirror-...sh,
docs/decisions/0001-mirror-nuwa-skill.md
- Rewrite README.md as Atlas-only, remove dual-profile language
After this commit:
- Top-level OpenClaw 8 files (IDENTITY/SOUL/USER/AGENTS/TOOLS/MEMORY/
BOOTSTRAP/HEARTBEAT + CLAUDE symlink + zh-CN mirrors)
- skills/{6 sub-skills + DESCRIPTION + README}
- mcp-tools/{spec + Python implementation}
- state-schemas/{project, person, customer + README}
- autopilots/{5 atlas-*.yaml}
- client-deck/, docs/decisions/, scripts/
The ~/.hermes/skills/atlas/ destination convention preserved (atlas as
a skill namespace on the operator's machine, distinct from source path).
82 lines
2.6 KiB
Python
82 lines
2.6 KiB
Python
"""Tests for Stage 3 dequoting logic.
|
|
|
|
Run: pytest -q
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
from atlas_extractor.decode import decode_mime
|
|
from atlas_extractor.dequote import dequote
|
|
from atlas_extractor.pipeline import stage123
|
|
from atlas_extractor.fetch import FetchedRaw
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
FIXTURE = Path(__file__).parent / "fixtures" / "sample_thread.eml"
|
|
|
|
|
|
def _load_fixture() -> bytes:
|
|
return FIXTURE.read_bytes()
|
|
|
|
|
|
def test_decode_basic():
|
|
decoded = decode_mime(_load_fixture())
|
|
assert decoded.msg_id == "demo-001@us-saas.cn"
|
|
assert decoded.from_addr[1] == "wang@us-saas.cn"
|
|
assert "张三" in decoded.body_text
|
|
# The full body still contains the quoted history at this stage
|
|
assert "On Mon, Apr 16" in decoded.body_text
|
|
|
|
|
|
def test_dequote_strips_english_marker():
|
|
decoded = decode_mime(_load_fixture())
|
|
result = dequote(decoded.body_text)
|
|
assert "On Mon, Apr 16" not in result.text_clean
|
|
assert any("marker" in s for s in result.strategies_used)
|
|
|
|
|
|
def test_dequote_strips_signature_separator():
|
|
decoded = decode_mime(_load_fixture())
|
|
result = dequote(decoded.body_text)
|
|
# signature block "-- \n王\nCEO, US-SaaS..." should be gone
|
|
assert "+86-138" not in result.text_clean
|
|
assert "CEO, US-SaaS" not in result.text_clean
|
|
assert any("signature" in s for s in result.strategies_used)
|
|
|
|
|
|
def test_dequote_strips_disclaimer():
|
|
decoded = decode_mime(_load_fixture())
|
|
result = dequote(decoded.body_text)
|
|
# disclaimer text must be gone — strategy may be "disclaimer" OR
|
|
# may be subsumed by signature stripper if disclaimer sits inside the
|
|
# signature block (which is fine — outcome is what matters).
|
|
assert "保密信息" not in result.text_clean
|
|
|
|
|
|
def test_dequote_keeps_real_content():
|
|
decoded = decode_mime(_load_fixture())
|
|
result = dequote(decoded.body_text)
|
|
assert "PRJ-001" in result.text_clean
|
|
assert "我上次问已经过去 6 天了" in result.text_clean
|
|
assert "不要等我再问第四次" in result.text_clean
|
|
|
|
|
|
def test_dequote_chars_stripped_meaningful():
|
|
decoded = decode_mime(_load_fixture())
|
|
result = dequote(decoded.body_text)
|
|
assert result.chars_stripped > 50, "Expected non-trivial cleanup"
|
|
|
|
|
|
def test_pipeline_e2e_via_fetched_raw():
|
|
raw = FetchedRaw(
|
|
account="test",
|
|
folder="local",
|
|
uid="1",
|
|
internal_date=datetime.now(timezone.utc),
|
|
raw_mime=_load_fixture(),
|
|
)
|
|
out = stage123(raw)
|
|
assert "PRJ-001" in out.dequoted.text_clean
|
|
assert "保密信息" not in out.dequoted.text_clean
|
|
assert "On Mon" not in out.dequoted.text_clean
|