import json
import re
from pathlib import Path

import requests


# ---------------------------------------------------------------------
# CONFIG
# ---------------------------------------------------------------------

GEONETWORK_BASE = "https://metadata.imas.utas.edu.au/geonetwork"

# Used as both:
# 1. the free-text search value
# 2. the exact UUID to exclude from the results
SEARCH_TEXT = "570c389c-6c00-4edf-8aed-8c49a5b4547d"

# Output is written to the current working directory
OUTPUT_JSON = Path.cwd() / "children.json"

PAGE_SIZE = 100


# ---------------------------------------------------------------------
# LABEL CLEANING
# ---------------------------------------------------------------------

def make_display_label(title):
    """
    Converts:
      IND412 - Healthcheck: Safety at sea

    To:
      IND412: Safety at sea
    """
    title = " ".join((title or "").split())

    match = re.match(
        r"^(IND\d+)\s*-\s*Healthcheck:\s*(.+)$",
        title,
        flags=re.IGNORECASE,
    )

    if match:
        indicator_code = match.group(1).upper()
        label = match.group(2).strip()
        return f"{indicator_code}: {label}"

    return title


# ---------------------------------------------------------------------
# GEONETWORK SEARCH
# ---------------------------------------------------------------------

def as_list(value):
    if value is None:
        return []
    if isinstance(value, list):
        return value
    return [value]


def search_geonetwork_freetext(search_text):
    records = []
    start = 1

    while True:
        end = start + PAGE_SIZE - 1

        url = f"{GEONETWORK_BASE}/srv/eng/q"
        params = {
            "_content_type": "json",
            "fast": "index",
            "from": start,
            "to": end,
            "any": search_text,
        }

        response = requests.get(url, params=params, timeout=60)
        response.raise_for_status()

        data = response.json()
        batch = as_list(data.get("metadata"))

        if not batch:
            break

        records.extend(batch)

        summary = data.get("summary", {})
        total = int(summary.get("@count", len(records)))

        if len(records) >= total:
            break

        start += PAGE_SIZE

    return records


# ---------------------------------------------------------------------
# RESULT EXTRACTION
# ---------------------------------------------------------------------

def get_first_string(value):
    if value is None:
        return ""

    if isinstance(value, str):
        return value.strip()

    if isinstance(value, list):
        for item in value:
            result = get_first_string(item)
            if result:
                return result

    if isinstance(value, dict):
        for key in ("#text", "value", "default", "eng", "title"):
            result = get_first_string(value.get(key))
            if result:
                return result

    return ""


def extract_uuid(record):
    return get_first_string(
        record.get("uuid")
        or record.get("@uuid")
        or record.get("geonet:uuid")
        or record.get("id")
        or record.get("geonet:info", {}).get("uuid")
        or record.get("info", {}).get("uuid")
    )

def extract_title(record):
    return get_first_string(
        record.get("title")
        or record.get("defaultTitle")
        or record.get("resourceTitle")
        or record.get("geonet:info", {}).get("title")
        or record.get("info", {}).get("title")
    )

def build_children(records, excluded_uuid):
    children = []
    seen = set()

    for record in records:
        uuid = extract_uuid(record)

        if not uuid:
            continue

        if uuid.lower() == excluded_uuid.lower():
            continue

        if uuid.lower() in seen:
            continue

        seen.add(uuid.lower())

        title = extract_title(record)
        label = make_display_label(title) if title else uuid

        children.append({
            "uuid": uuid,
            "label": label,
        })

    children.sort(key=lambda item: item["label"].casefold())

    return children


# ---------------------------------------------------------------------
# MAIN
# ---------------------------------------------------------------------

def main():
    records = search_geonetwork_freetext(SEARCH_TEXT)
    children = build_children(records, excluded_uuid=SEARCH_TEXT)

    sidecar = {
        "parentUuid": SEARCH_TEXT,
        "children": children,
    }

    OUTPUT_JSON.write_text(
        json.dumps(sidecar, indent=2, ensure_ascii=False),
        encoding="utf-8",
    )

    print(f"Search returned {len(records)} records")
    print(f"Wrote {len(children)} child records to: {OUTPUT_JSON}")


if __name__ == "__main__":
    main()