#!/usr/bin/env python3 """ FFTCG Card Scanner - Extracts card data from images using Claude's vision API. Usage: python tools/scan_cards.py # Scan all cards (resumes from progress) python tools/scan_cards.py --stats # Show scan progress stats python tools/scan_cards.py --finalize # Generate final cards.json from progress python tools/scan_cards.py --validate # Validate scanned data python tools/scan_cards.py --limit N # Scan only N cards (for testing) """ import anthropic import base64 import json import os import sys import time import argparse from pathlib import Path # Paths PROJECT_ROOT = Path(__file__).parent.parent SOURCE_CARDS_DIR = PROJECT_ROOT / "source-cards" DATA_DIR = PROJECT_ROOT / "data" PROGRESS_FILE = DATA_DIR / "cards_progress.json" FINAL_FILE = DATA_DIR / "cards.json" ERROR_LOG = DATA_DIR / "scan_errors.log" # API settings MODEL = "claude-haiku-4-5-20251001" MAX_RETRIES = 3 RETRY_DELAY = 2.0 # seconds, doubles on each retry RATE_LIMIT_DELAY = 0.5 # seconds between requests SCAN_PROMPT = """Analyze this FFTCG card image. Extract ALL data into JSON. CRITICAL ELEMENT DETECTION RULES — The element is determined ONLY by visual color cues. DO NOT guess element from the character name. "Warrior of Light" can be Fire element. "Garland" can be Fire element. A card named "Shiva" can be Fire element. ONLY use these visual indicators: Look at the CARD BORDER COLOR and the COST CRYSTAL (top-left gem with a number): - "Fire" = RED/WARM RED border, RED crystal - "Ice" = LIGHT BLUE/CYAN border, LIGHT BLUE crystal - "Wind" = GREEN border, GREEN crystal - "Earth" = YELLOW/BROWN/ORANGE border, YELLOW-BROWN crystal - "Lightning" = PURPLE/MAGENTA border, PURPLE crystal - "Water" = DARK BLUE/NAVY border, DARK BLUE crystal - "Light" = WHITE/VERY PALE border, WHITE/PALE GOLD crystal (extremely rare, very distinctive pale/white look) - "Dark" = VERY DARK/BLACK border, DARK PURPLE/BLACK crystal (extremely rare, very distinctive dark look) IMPORTANT: Light and Dark elements are RARE (only a few cards per set). Most cards are Fire/Ice/Wind/Earth/Lightning/Water. If the border has any clear hue (red, blue, green, etc.), it is NOT Light or Dark. For multi-element cards (split-color border/gem), return an array like ["Fire", "Ice"]. Extract these fields: 1. **id**: Serial number at very bottom of card (e.g., "1-001H", "10-055R"). 2. **name**: Card name at top center. 3. **type**: "Forward", "Backup", "Summon", or "Monster" — from left side of middle bar. 4. **element**: Use the VISUAL COLOR rules above. Do NOT infer from character name. 5. **cost**: Number inside top-left crystal (integer). 6. **power**: Number at bottom-right (integer), or null if absent (Backups/Summons have none). 7. **job**: Center of middle bar (e.g., "Guardian", "Standard Unit"). Empty string if none. 8. **category**: Right side of middle bar — Roman numeral or abbreviation (e.g., "X", "VII", "FFT", "DFF-I"). Empty string if none. 9. **is_generic**: true for generic units (Standard Unit, Ranger, Black Mage, etc.), false otherwise. 10. **has_ex_burst**: true if "EX" marker in top-right area. 11. **abilities**: Array of objects with: - **type**: "field" (passive), "auto" (triggered, "When..."), "action" (activated, has cost), "special" (S-ability) - **name**: Ability name if present, else "". - **trigger**: Trigger condition for auto abilities, else "". - **effect**: Full effect text as printed. - **is_ex_burst**: true if preceded by "EX BURST". - **cost**: Activation cost object if present (e.g., {"fire": 1, "dull": true}). Return ONLY valid JSON. No markdown. No explanation. {"id":"1-001H","name":"Auron","type":"Forward","element":"Fire","cost":6,"power":9000,"job":"Guardian","category":"X","is_generic":false,"has_ex_burst":false,"abilities":[{"type":"auto","name":"","trigger":"When Auron deals damage to your opponent","effect":"You may play 1 Fire Backup from your hand onto the field dull.","is_ex_burst":false}]}""" def load_progress() -> dict: """Load existing scan progress.""" if PROGRESS_FILE.exists(): with open(PROGRESS_FILE, "r") as f: return json.load(f) return {} def save_progress(progress: dict) -> None: """Save scan progress incrementally.""" with open(PROGRESS_FILE, "w") as f: json.dump(progress, f, indent=2, ensure_ascii=False) def log_error(card_id: str, error: str) -> None: """Log scanning errors.""" with open(ERROR_LOG, "a") as f: f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} | {card_id} | {error}\n") def get_card_images() -> list[Path]: """Get all card image files sorted by ID, skipping duplicates like _eg files.""" images = [] for f in SOURCE_CARDS_DIR.iterdir(): if f.suffix.lower() in (".jpg", ".jpeg", ".png"): # Skip example/duplicate files (e.g., 1-003C_eg.jpg) if "_eg" in f.stem: continue images.append(f) images.sort(key=lambda p: p.stem) return images def encode_image(image_path: Path) -> str: """Read and base64 encode an image file.""" with open(image_path, "rb") as f: return base64.standard_b64encode(f.read()).decode("utf-8") def get_media_type(image_path: Path) -> str: """Get MIME type for image.""" ext = image_path.suffix.lower() if ext in (".jpg", ".jpeg"): return "image/jpeg" elif ext == ".png": return "image/png" return "image/jpeg" def scan_card(client: anthropic.Anthropic, image_path: Path) -> dict | None: """Scan a single card image and return parsed card data.""" image_data = encode_image(image_path) media_type = get_media_type(image_path) for attempt in range(MAX_RETRIES): try: response = client.messages.create( model=MODEL, max_tokens=1024, messages=[ { "role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": media_type, "data": image_data, }, }, { "type": "text", "text": SCAN_PROMPT, }, ], } ], ) # Extract JSON from response text = response.content[0].text.strip() # Handle potential markdown wrapping if text.startswith("```"): lines = text.split("\n") # Remove first and last lines (```json and ```) text = "\n".join(lines[1:-1]).strip() card_data = json.loads(text) return card_data except json.JSONDecodeError as e: if attempt < MAX_RETRIES - 1: time.sleep(RETRY_DELAY * (2 ** attempt)) continue log_error(image_path.stem, f"JSON parse error: {e} | Raw: {text[:200]}") return None except anthropic.RateLimitError: wait = RETRY_DELAY * (2 ** (attempt + 1)) print(f" Rate limited, waiting {wait}s...") time.sleep(wait) continue except anthropic.APIError as e: if attempt < MAX_RETRIES - 1: time.sleep(RETRY_DELAY * (2 ** attempt)) continue log_error(image_path.stem, f"API error: {e}") return None except Exception as e: log_error(image_path.stem, f"Unexpected error: {e}") return None return None # FFTCG Opus sets group cards by element in sequential order. # Each set has a known element ordering by card number ranges. # This maps set_number -> list of (max_card_number, element) tuples. # Cards are numbered within each set: Fire first, then Ice, Wind, Earth, Lightning, Water, Light, Dark. # These ranges are approximate but cover the vast majority of cards. OPUS_ELEMENT_RANGES = { # Sets 1-9 verified from FFTCG Fandom wiki, sets 10-27 from Square Enix official API 1: [(30, "Fire"), (60, "Ice"), (90, "Wind"), (120, "Earth"), (150, "Lightning"), (180, "Water"), (183, "Light"), (186, "Dark")], 2: [(24, "Fire"), (48, "Ice"), (72, "Wind"), (96, "Earth"), (120, "Lightning"), (144, "Water"), (146, "Light"), (148, "Dark")], 3: [(24, "Fire"), (48, "Ice"), (72, "Wind"), (96, "Earth"), (120, "Lightning"), (144, "Water"), (146, "Light"), (148, "Dark")], 4: [(24, "Fire"), (48, "Ice"), (72, "Wind"), (96, "Earth"), (120, "Lightning"), (144, "Water"), (146, "Light"), (148, "Dark")], 5: [(24, "Fire"), (48, "Ice"), (72, "Wind"), (96, "Earth"), (120, "Lightning"), (144, "Water"), (146, "Light"), (148, "Dark")], 6: [(21, "Fire"), (42, "Ice"), (64, "Wind"), (84, "Earth"), (106, "Lightning"), (127, "Water"), (128, "Light"), (130, "Dark")], 7: [(21, "Fire"), (42, "Ice"), (64, "Wind"), (84, "Earth"), (106, "Lightning"), (127, "Water"), (128, "Light"), (130, "Dark")], 8: [(22, "Fire"), (44, "Ice"), (67, "Wind"), (88, "Earth"), (110, "Lightning"), (133, "Water"), (135, "Light"), (137, "Dark")], 9: [(20, "Fire"), (40, "Ice"), (60, "Wind"), (80, "Earth"), (100, "Lightning"), (120, "Water"), (122, "Light"), (124, "Dark")], 10: [(21, "Fire"), (42, "Ice"), (63, "Wind"), (84, "Earth"), (105, "Lightning"), (126, "Water"), (128, "Light"), (130, "Dark"), (132, "Fire"), (134, "Wind"), (136, "Earth"), (138, "Lightning"), (139, "Light"), (140, "Dark")], 11: [(21, "Fire"), (42, "Ice"), (63, "Wind"), (84, "Earth"), (105, "Lightning"), (126, "Water"), (128, "Light"), (130, "Dark"), (132, "Fire"), (134, "Ice"), (136, "Earth"), (138, "Lightning"), (139, "Light"), (140, "Dark")], 12: [(18, "Fire"), (36, "Ice"), (54, "Wind"), (72, "Earth"), (90, "Lightning"), (108, "Water"), (109, "Light"), (110, "Dark")], 13: [(17, "Fire"), (34, "Ice"), (51, "Wind"), (68, "Earth"), (85, "Lightning"), (102, "Water"), (103, "Light"), (128, "Dark"), (130, "Fire"), (132, "Ice"), (134, "Earth"), (136, "Lightning"), (138, "Light")], 14: [(19, "Fire"), (38, "Ice"), (57, "Wind"), (76, "Earth"), (95, "Lightning"), (114, "Water"), (116, "Light"), (118, "Dark")], 15: [(21, "Fire"), (43, "Ice"), (63, "Wind"), (84, "Earth"), (105, "Lightning"), (126, "Water"), (128, "Light"), (130, "Dark"), (134, "Earth"), (138, "Lightning")], 16: [(21, "Fire"), (42, "Ice"), (63, "Wind"), (84, "Earth"), (105, "Lightning"), (126, "Water"), (128, "Light"), (130, "Dark"), (133, "Fire"), (135, "Wind"), (138, "Water"), (139, "Light"), (140, "Dark")], 17: [(21, "Fire"), (42, "Ice"), (63, "Wind"), (84, "Earth"), (105, "Lightning"), (126, "Water"), (128, "Light"), (130, "Dark")], 18: [(17, "Fire"), (34, "Ice"), (51, "Wind"), (68, "Earth"), (85, "Lightning"), (102, "Water"), (104, "Light"), (130, "Dark"), (132, "Fire"), (134, "Wind"), (136, "Earth"), (138, "Lightning")], 19: [(17, "Fire"), (34, "Ice"), (51, "Wind"), (68, "Earth"), (85, "Lightning"), (102, "Water"), (104, "Light"), (128, "Dark"), (131, "Fire"), (134, "Ice"), (137, "Lightning"), (138, "Light")], 20: [(21, "Fire"), (42, "Ice"), (63, "Wind"), (84, "Earth"), (105, "Lightning"), (126, "Water"), (128, "Light"), (130, "Dark")], 21: [(20, "Fire"), (40, "Ice"), (60, "Wind"), (80, "Earth"), (100, "Lightning"), (120, "Water"), (122, "Light"), (124, "Dark"), (127, "Fire"), (130, "Ice"), (133, "Wind"), (134, "Light")], 22: [(18, "Fire"), (36, "Ice"), (54, "Wind"), (72, "Earth"), (90, "Lightning"), (108, "Water"), (110, "Light"), (111, "Dark"), (113, "Fire"), (115, "Ice"), (117, "Wind"), (119, "Earth"), (121, "Lightning"), (123, "Water"), (124, "Dark")], 23: [(19, "Fire"), (38, "Ice"), (57, "Wind"), (76, "Earth"), (95, "Lightning"), (114, "Water"), (115, "Light"), (117, "Dark"), (119, "Fire"), (121, "Ice"), (123, "Wind"), (125, "Earth"), (127, "Lightning"), (129, "Water"), (130, "Light")], 24: [(18, "Fire"), (36, "Ice"), (54, "Wind"), (72, "Earth"), (90, "Lightning"), (108, "Water"), (111, "Light"), (114, "Dark"), (115, "Fire"), (116, "Ice"), (117, "Wind"), (118, "Earth"), (119, "Lightning"), (120, "Water")], 25: [(17, "Fire"), (34, "Ice"), (51, "Wind"), (68, "Earth"), (85, "Lightning"), (102, "Water"), (104, "Light"), (105, "Dark"), (107, "Fire"), (109, "Ice"), (111, "Wind"), (113, "Earth"), (115, "Lightning"), (117, "Water"), (118, "Dark")], 26: [(20, "Fire"), (40, "Ice"), (60, "Wind"), (80, "Earth"), (100, "Lightning"), (120, "Water"), (121, "Light"), (123, "Dark"), (124, "Fire"), (125, "Ice"), (126, "Wind"), (127, "Earth"), (128, "Lightning"), (129, "Water"), (130, "Light")], 27: [(18, "Fire"), (36, "Ice"), (54, "Wind"), (72, "Earth"), (90, "Lightning"), (108, "Water"), (110, "Light"), (112, "Dark")], } def get_expected_element(card_id: str) -> str | None: """Get the expected element for a card based on its set and number in FFTCG ordering. Returns None if we can't determine it (non-standard set, promo, etc.).""" try: # Parse card ID: "1-001H" -> set=1, number=1 parts = card_id.split("-") if len(parts) != 2: return None set_num = int(parts[0]) # Extract number portion (strip rarity suffix: H, R, C, L, S) num_str = "" for ch in parts[1]: if ch.isdigit(): num_str += ch else: break if not num_str: return None card_num = int(num_str) ranges = OPUS_ELEMENT_RANGES.get(set_num) if not ranges: return None for max_num, element in ranges: if card_num <= max_num: return element return None except (ValueError, IndexError): return None def normalize_card(card_data: dict, filename: str, card_id: str) -> dict: """Normalize card data to match the expected schema.""" # Use the filename-based ID as canonical (matches image lookup) card_data["id"] = card_id # Normalize card type — AI sometimes returns "Summoner" instead of "Summon" TYPE_FIXES = { "Summoner": "Summon", "Companion": "Monster", "Weapon": "Monster", } raw_type = card_data.get("type", "") if raw_type in TYPE_FIXES: card_data["type"] = TYPE_FIXES[raw_type] # Normalize element — AI sometimes returns wrong names ELEMENT_FIXES = { "Chaos": "Dark", "Purple": "Lightning", } raw_element = card_data.get("element", "") if isinstance(raw_element, str) and raw_element in ELEMENT_FIXES: card_data["element"] = ELEMENT_FIXES[raw_element] # Element cross-check using card numbering element = card_data.get("element", "") expected = get_expected_element(card_id) if expected and isinstance(element, str) and element != expected: # The AI got it wrong — use the expected element from card numbering print(f"\n ELEMENT FIX: AI said '{element}', expected '{expected}' for {card_id}", end="") card_data["element"] = expected elif isinstance(element, list): # Multi-element: keep as array card_data["element"] = element elif isinstance(element, str): card_data["element"] = element # Ensure power is int or null power = card_data.get("power") if power is not None: try: card_data["power"] = int(power) except (ValueError, TypeError): card_data["power"] = None # Ensure cost is int try: card_data["cost"] = int(card_data.get("cost", 0)) except (ValueError, TypeError): card_data["cost"] = 0 # Set image path card_data["image"] = filename # Ensure boolean fields card_data["is_generic"] = bool(card_data.get("is_generic", False)) card_data["has_ex_burst"] = bool(card_data.get("has_ex_burst", False)) # Ensure abilities is a list if not isinstance(card_data.get("abilities"), list): card_data["abilities"] = [] # Clean up abilities cleaned_abilities = [] for ability in card_data["abilities"]: cleaned = { "type": ability.get("type", "field"), "effect": ability.get("effect", ""), } # Only include non-empty optional fields if ability.get("name"): cleaned["name"] = ability["name"] if ability.get("trigger"): cleaned["trigger"] = ability["trigger"] if ability.get("is_ex_burst"): cleaned["is_ex_burst"] = True if ability.get("cost") and isinstance(ability["cost"], dict): cleaned["cost"] = ability["cost"] cleaned_abilities.append(cleaned) card_data["abilities"] = cleaned_abilities return card_data def validate_card(card_data: dict, expected_id: str) -> list[str]: """Validate card data and return list of warnings.""" warnings = [] # Check ID match (case-insensitive since card printing varies) scanned_id = card_data.get("id", "") if scanned_id.upper() != expected_id.upper(): warnings.append(f"ID mismatch: scanned '{scanned_id}' vs filename '{expected_id}'") # Required fields for field in ["name", "type", "element", "cost"]: if not card_data.get(field): warnings.append(f"Missing required field: {field}") # Type-specific checks card_type = card_data.get("type", "") power = card_data.get("power") if card_type == "Forward" and power is None: warnings.append("Forward has no power value") if card_type in ("Backup", "Summon") and power is not None and power > 0: warnings.append(f"{card_type} has power value: {power}") # Valid type if card_type not in ("Forward", "Backup", "Summon", "Monster", ""): warnings.append(f"Unknown card type: {card_type}") # Valid element valid_elements = {"Fire", "Ice", "Wind", "Earth", "Lightning", "Water", "Light", "Dark"} element = card_data.get("element", "") if isinstance(element, str) and element not in valid_elements: warnings.append(f"Unknown element: {element}") elif isinstance(element, list): for e in element: if e not in valid_elements: warnings.append(f"Unknown element in multi-element: {e}") return warnings def run_scan(limit: int = 0) -> None: """Main scan loop.""" client = anthropic.Anthropic() images = get_card_images() progress = load_progress() total = len(images) if limit > 0: images = images[:limit] print(f"Limiting scan to {limit} cards") already_done = sum(1 for img in images if img.stem in progress) remaining = len(images) - already_done print(f"Found {total} card images total") print(f"Already scanned: {already_done}") print(f"Remaining: {remaining}") print() if remaining == 0: print("All cards already scanned! Use --finalize to generate cards.json") return scanned = 0 failed = 0 warnings_count = 0 for i, image_path in enumerate(images): card_id = image_path.stem # Skip already scanned if card_id in progress: continue print(f"[{already_done + scanned + 1}/{len(images)}] Scanning {card_id}...", end=" ", flush=True) card_data = scan_card(client, image_path) if card_data: card_data = normalize_card(card_data, image_path.name, card_id) warnings = validate_card(card_data, card_id) if warnings: warnings_count += len(warnings) for w in warnings: print(f"\n WARNING: {w}", end="") log_error(card_id, f"VALIDATION: {w}") progress[card_id] = card_data save_progress(progress) scanned += 1 print(f"OK - {card_data.get('name', '?')} ({card_data.get('type', '?')})") else: failed += 1 print("FAILED") # Rate limiting time.sleep(RATE_LIMIT_DELAY) print() print(f"Scan complete: {scanned} scanned, {failed} failed, {warnings_count} warnings") print(f"Total in progress file: {len(progress)} cards") if failed > 0: print(f"Check {ERROR_LOG} for error details") def show_stats() -> None: """Show scan progress statistics.""" progress = load_progress() images = get_card_images() total_images = len(images) total_scanned = len(progress) remaining = total_images - total_scanned print(f"Total card images: {total_images}") print(f"Scanned: {total_scanned}") print(f"Remaining: {remaining}") print(f"Progress: {total_scanned / total_images * 100:.1f}%") if total_scanned > 0: # Type breakdown types = {} elements = {} for card in progress.values(): t = card.get("type", "Unknown") types[t] = types.get(t, 0) + 1 e = card.get("element", "Unknown") if isinstance(e, list): e = "/".join(e) elements[e] = elements.get(e, 0) + 1 print("\nBy type:") for t, count in sorted(types.items()): print(f" {t}: {count}") print("\nBy element:") for e, count in sorted(elements.items()): print(f" {e}: {count}") def run_validate() -> None: """Validate all scanned data.""" progress = load_progress() if not progress: print("No scanned data found. Run scan first.") return total = len(progress) cards_with_warnings = 0 total_warnings = 0 for card_id, card_data in sorted(progress.items()): warnings = validate_card(card_data, card_id) if warnings: cards_with_warnings += 1 total_warnings += len(warnings) print(f"{card_id} ({card_data.get('name', '?')}):") for w in warnings: print(f" - {w}") print(f"\nValidation: {total} cards, {cards_with_warnings} with warnings, {total_warnings} total warnings") def finalize() -> None: """Generate final cards.json from progress data.""" progress = load_progress() if not progress: print("No scanned data found. Run scan first.") return # Convert to array format matching existing schema cards = [] for card_id in sorted(progress.keys()): card = progress[card_id] # Build output card matching existing schema output = { "id": card.get("id", card_id), "name": card.get("name", ""), "type": card.get("type", ""), } # Handle element — CardDatabase.gd expects "element" key (string or array) element = card.get("element", "") if isinstance(element, list) and len(element) == 1: output["element"] = element[0] elif isinstance(element, list): output["element"] = element # GDScript parser handles arrays else: output["element"] = element output["cost"] = card.get("cost", 0) output["power"] = card.get("power") output["job"] = card.get("job", "") output["category"] = card.get("category", "") output["is_generic"] = card.get("is_generic", False) output["has_ex_burst"] = card.get("has_ex_burst", False) output["abilities"] = card.get("abilities", []) output["image"] = card.get("image", f"{card_id}.jpg") cards.append(output) final = { "version": "2.0", "cards": cards, } with open(FINAL_FILE, "w") as f: json.dump(final, f, indent=2, ensure_ascii=False) print(f"Written {len(cards)} cards to {FINAL_FILE}") def main(): parser = argparse.ArgumentParser(description="FFTCG Card Scanner") parser.add_argument("--stats", action="store_true", help="Show scan progress stats") parser.add_argument("--finalize", action="store_true", help="Generate final cards.json") parser.add_argument("--validate", action="store_true", help="Validate scanned data") parser.add_argument("--limit", type=int, default=0, help="Limit number of cards to scan") args = parser.parse_args() if args.stats: show_stats() elif args.finalize: finalize() elif args.validate: run_validate() else: run_scan(limit=args.limit) if __name__ == "__main__": main()