FFCardGame/tools/ai_card_reviewer.py

#!/usr/bin/env python3
"""
AI Card Reviewer - Uses Claude's vision to validate and correct card data.

Usage:
    python tools/ai_card_reviewer.py                    # Review all unreviewed cards
    python tools/ai_card_reviewer.py --set 1            # Review only Opus 1 cards
    python tools/ai_card_reviewer.py --card 1-001H      # Review a specific card
    python tools/ai_card_reviewer.py --limit 10         # Review only 10 cards
    python tools/ai_card_reviewer.py --dry-run          # Don't save changes, just show what would change

Requires:
    pip install anthropic

Set your API key:
    export ANTHROPIC_API_KEY=your-key-here
"""

import argparse
import base64
import json
import os
import sys
import time
from pathlib import Path

try:
    import anthropic
except ImportError:
    print("Error: anthropic package not installed. Run: pip install anthropic")
    sys.exit(1)

PROJECT_ROOT = Path(__file__).parent.parent
CARDS_FILE = PROJECT_ROOT / "data" / "cards.json"
REVIEWED_FILE = PROJECT_ROOT / "data" / "reviewed.json"
SOURCE_CARDS_DIR = PROJECT_ROOT / "source-cards"

# Rate limiting
REQUESTS_PER_MINUTE = 30
REQUEST_DELAY = 60 / REQUESTS_PER_MINUTE


def load_cards():
    with open(CARDS_FILE, "r") as f:
        return json.load(f)


def save_cards(data):
    with open(CARDS_FILE, "w") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)


def load_reviewed():
    if REVIEWED_FILE.exists():
        with open(REVIEWED_FILE, "r") as f:
            return set(json.load(f).get("reviewed", []))
    return set()


def save_reviewed(reviewed_set):
    with open(REVIEWED_FILE, "w") as f:
        json.dump({"reviewed": list(reviewed_set)}, f, indent=2)


def encode_image(image_path: Path) -> tuple[str, str]:
    """Encode image to base64 and return (data, media_type)."""
    with open(image_path, "rb") as f:
        data = base64.standard_b64encode(f.read()).decode("utf-8")

    ext = image_path.suffix.lower()
    if ext in (".jpg", ".jpeg"):
        media_type = "image/jpeg"
    elif ext == ".png":
        media_type = "image/png"
    elif ext == ".gif":
        media_type = "image/gif"
    elif ext == ".webp":
        media_type = "image/webp"
    else:
        media_type = "image/jpeg"  # fallback

    return data, media_type


SYSTEM_PROMPT = """You are a data validator for Final Fantasy Trading Card Game (FFTCG) cards.
You will be shown a card image and its current JSON data. Your job is to:

1. Verify all fields match what's visible on the card
2. Correct any errors you find
3. Fill in any missing data
4. Ensure abilities are accurately transcribed

FFTCG Card Structure:
- id: Card number (e.g., "1-001H" = Opus 1, card 001, Hero rarity)
- name: Character/card name
- type: Forward, Backup, Summon, or Monster
- element: Fire, Ice, Wind, Earth, Lightning, Water, Light, or Dark (can be array for multi-element)
- cost: Crystal Point cost (number in top-left)
- power: Power value for Forwards (number at bottom, in thousands like 7000). Backups/Summons have null power.
- job: Job class (e.g., "Warrior", "Knight")
- category: Game title (e.g., "VII", "X", "TACTICS")
- is_generic: true if card has no category (generic cards)
- has_ex_burst: true if card has EX BURST ability (lightning bolt icon)
- has_haste: true if the card has the Haste keyword ability (can attack/use abilities the turn it enters)

Ability Types:
- field: Passive abilities always active (includes keyword abilities like Haste, Brave, First Strike)
- auto: Triggered abilities (start with "When..." or have a trigger condition)
- action: Activated abilities (have a cost, often require dulling with S symbol)
- special: Special abilities (usually named abilities with S symbol cost)

Important Keywords to Identify:
- Haste: "This card can attack and use abilities the turn it enters the field" - set has_haste=true
- Brave: Card doesn't dull when attacking
- First Strike: Deals damage before opponent in combat

Respond ONLY with valid JSON in this exact format:
{
  "changes_made": true/false,
  "confidence": "high"/"medium"/"low",
  "notes": "Brief explanation of changes or issues",
  "corrected_data": {
    // Complete card JSON with all fields
  }
}

If the data looks correct, set changes_made to false and return the original data in corrected_data.
Always include ALL fields in corrected_data, even if unchanged."""


def review_card(client: anthropic.Anthropic, card: dict, image_path: Path) -> dict:
    """Review a single card using Claude's vision."""

    image_data, media_type = encode_image(image_path)

    user_message = f"""Please review this FFTCG card image and verify/correct the following JSON data:

```json
{json.dumps(card, indent=2)}
```

Look carefully at:
1. Card name spelling
2. Element (color of the crystal/card border)
3. Cost (number in the crystal)
4. Power (number at bottom for Forwards, should be null for Backups/Summons)
5. Job and Category text
6. All abilities - check type, name, trigger, effect text
7. EX BURST indicator (lightning bolt symbol)
8. Haste keyword - if the card mentions attacking or using abilities the turn it enters, has_haste should be true

Return the corrected JSON."""

    try:
        response = client.messages.create(
            model="claude-opus-4-5-20251101",
            max_tokens=4096,
            system=SYSTEM_PROMPT,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": media_type,
                                "data": image_data,
                            },
                        },
                        {
                            "type": "text",
                            "text": user_message,
                        },
                    ],
                }
            ],
        )

        # Extract JSON from response
        response_text = response.content[0].text

        # Try to parse JSON (handle markdown code blocks)
        if "```json" in response_text:
            json_str = response_text.split("```json")[1].split("```")[0].strip()
        elif "```" in response_text:
            json_str = response_text.split("```")[1].split("```")[0].strip()
        else:
            json_str = response_text.strip()

        return json.loads(json_str)

    except json.JSONDecodeError as e:
        print(f"  JSON parse error: {e}")
        print(f"  Response: {response_text[:500]}...")
        return None
    except anthropic.APIError as e:
        print(f"  API error: {e}")
        return None


def print_diff(original: dict, corrected: dict, card_id: str):
    """Print differences between original and corrected card data."""
    changes = []

    # Compare top-level fields
    for key in set(list(original.keys()) + list(corrected.keys())):
        if key in ("abilities", "image"):
            continue
        orig_val = original.get(key)
        corr_val = corrected.get(key)
        if orig_val != corr_val:
            changes.append(f"  {key}: {orig_val!r} -> {corr_val!r}")

    # Compare abilities (simplified)
    orig_abilities = original.get("abilities", [])
    corr_abilities = corrected.get("abilities", [])

    if len(orig_abilities) != len(corr_abilities):
        changes.append(f"  abilities: {len(orig_abilities)} -> {len(corr_abilities)} abilities")
    else:
        for i, (orig_ab, corr_ab) in enumerate(zip(orig_abilities, corr_abilities)):
            if orig_ab != corr_ab:
                changes.append(f"  abilities[{i}]: modified")

    if changes:
        print(f"\n[{card_id}] Changes:")
        for change in changes:
            print(change)
    else:
        print(f"[{card_id}] No changes needed")


def main():
    parser = argparse.ArgumentParser(description="AI Card Reviewer using Claude Vision")
    parser.add_argument("--set", type=str, help="Only review cards from this set/opus (e.g., '1' for Opus 1)")
    parser.add_argument("--card", type=str, help="Review a specific card by ID")
    parser.add_argument("--limit", type=int, help="Maximum number of cards to review")
    parser.add_argument("--dry-run", action="store_true", help="Don't save changes, just show what would change")
    parser.add_argument("--include-reviewed", action="store_true", help="Re-review already reviewed cards")
    parser.add_argument("--auto-mark-reviewed", action="store_true", help="Automatically mark cards as reviewed after AI review")
    args = parser.parse_args()

    # Check API key
    api_key = os.environ.get("ANTHROPIC_API_KEY")
    if not api_key:
        print("Error: ANTHROPIC_API_KEY environment variable not set")
        print("Set it with: export ANTHROPIC_API_KEY=your-key-here")
        sys.exit(1)

    client = anthropic.Anthropic(api_key=api_key)

    # Load data
    cards_data = load_cards()
    reviewed_set = load_reviewed()
    all_cards = cards_data["cards"]

    print(f"Loaded {len(all_cards)} cards, {len(reviewed_set)} already reviewed")

    # Filter cards to review
    cards_to_review = []
    for card in all_cards:
        # Skip if already reviewed (unless --include-reviewed)
        if not args.include_reviewed and card["id"] in reviewed_set:
            continue

        # Filter by set if specified
        if args.set and not card["id"].startswith(args.set + "-"):
            continue

        # Filter by specific card if specified
        if args.card and card["id"] != args.card:
            continue

        # Check image exists
        image_path = SOURCE_CARDS_DIR / card.get("image", "")
        if not image_path.exists():
            print(f"Warning: Image not found for {card['id']}: {image_path}")
            continue

        cards_to_review.append((card, image_path))

    # Apply limit
    if args.limit:
        cards_to_review = cards_to_review[:args.limit]

    if not cards_to_review:
        print("No cards to review matching criteria")
        return

    print(f"\nReviewing {len(cards_to_review)} cards...")
    if args.dry_run:
        print("(DRY RUN - no changes will be saved)")
    print()

    # Track statistics
    stats = {
        "reviewed": 0,
        "changed": 0,
        "errors": 0,
        "high_confidence": 0,
        "medium_confidence": 0,
        "low_confidence": 0,
    }

    # Review each card
    for i, (card, image_path) in enumerate(cards_to_review):
        print(f"[{i+1}/{len(cards_to_review)}] Reviewing {card['id']}: {card.get('name', 'Unknown')}...", end="", flush=True)

        result = review_card(client, card, image_path)

        if result is None:
            print(" ERROR")
            stats["errors"] += 1
            time.sleep(REQUEST_DELAY)
            continue

        stats["reviewed"] += 1
        confidence = result.get("confidence", "unknown")
        stats[f"{confidence}_confidence"] = stats.get(f"{confidence}_confidence", 0) + 1

        if result.get("changes_made"):
            stats["changed"] += 1
            print(f" CHANGED ({confidence} confidence)")
            print(f"  Notes: {result.get('notes', 'No notes')}")

            corrected = result.get("corrected_data", {})
            print_diff(card, corrected, card["id"])

            if not args.dry_run:
                # Update card in data
                for j, c in enumerate(cards_data["cards"]):
                    if c["id"] == card["id"]:
                        # Preserve image path
                        corrected["image"] = card.get("image", "")
                        cards_data["cards"][j] = corrected
                        break
        else:
            print(f" OK ({confidence} confidence)")

        # Mark as reviewed if auto-mark enabled
        if args.auto_mark_reviewed and not args.dry_run:
            reviewed_set.add(card["id"])

        # Rate limiting
        if i < len(cards_to_review) - 1:
            time.sleep(REQUEST_DELAY)

    # Save changes
    if not args.dry_run and stats["changed"] > 0:
        print(f"\nSaving changes to {CARDS_FILE}...")
        save_cards(cards_data)

    if not args.dry_run and args.auto_mark_reviewed:
        print(f"Saving reviewed status to {REVIEWED_FILE}...")
        save_reviewed(reviewed_set)

    # Print summary
    print(f"\n{'='*50}")
    print("Summary:")
    print(f"  Cards reviewed: {stats['reviewed']}")
    print(f"  Cards changed: {stats['changed']}")
    print(f"  Errors: {stats['errors']}")
    print(f"  High confidence: {stats['high_confidence']}")
    print(f"  Medium confidence: {stats['medium_confidence']}")
    print(f"  Low confidence: {stats['low_confidence']}")

    if args.dry_run and stats["changed"] > 0:
        print(f"\n(Dry run - {stats['changed']} changes NOT saved)")


if __name__ == "__main__":
    main()