#!/usr/bin/env python3
"""Flatten raw WooCommerce JSON pages into a single CSV.

Extracts: order_id, date_created, date_paid, customer_name, email, status,
total, payment_method, payment_method_title, billing_city, billing_country,
shipping_city, traffic_source (from order meta - utm/referrer keys), customer_ip,
new_or_repeat, product_summary.
"""
import os, json, csv, glob, re
from collections import defaultdict

RAW = "/home/sol1/Desktop/MARKETING/nitrous_3mo/raw"
OUT = "/home/sol1/Desktop/MARKETING/nitrous_3mo/orders_3mo.csv"

# Keys we'll search in meta_data for attribution
TRAFFIC_KEYS_PRIORITY = [
    "_wc_order_attribution_utm_source",
    "_wc_order_attribution_source_type",
    "_wc_order_attribution_referrer",
    "_wc_order_attribution_utm_medium",
    "_wc_order_attribution_utm_campaign",
    "_source",
    "_referrer",
    "utm_source",
    "_wc_order_attribution_session_pages",
]

def meta_get(meta, *keys):
    """Return first matching meta value."""
    by_key = {}
    for m in meta or []:
        k = m.get("key")
        if k:
            by_key[k] = m.get("value")
    for k in keys:
        if k in by_key and by_key[k] not in (None, "", "(none)", "(direct)"):
            return by_key[k]
    return ""

def get_traffic_source(meta):
    by_key = {}
    for m in meta or []:
        k = m.get("key")
        if k:
            by_key[k] = m.get("value")
    # Priority 1: utm_source
    utm = by_key.get("_wc_order_attribution_utm_source") or by_key.get("utm_source")
    if utm and utm not in ("(none)", "(direct)", ""):
        return utm
    # Priority 2: source_type (organic, direct, referral, typein, utm, admin)
    stype = by_key.get("_wc_order_attribution_source_type")
    if stype == "typein" or stype == "direct":
        return "typein"
    # Priority 3: referrer
    ref = by_key.get("_wc_order_attribution_referrer") or by_key.get("_referrer")
    if ref and ref not in ("(none)", "(direct)", ""):
        return ref
    # Priority 4: stype fallback
    if stype:
        return stype
    return "(direct)"

def main():
    files = sorted(glob.glob(os.path.join(RAW, "*.json")))
    print(f"Found {len(files)} day-files")
    seen_ids = set()
    rows = 0
    customer_first_order = {}
    # First pass: determine first-order date per email for new/repeat tag
    print("Pass 1: tagging new vs repeat customers...")
    for fp in files:
        with open(fp) as f:
            data = json.load(f)
        for o in data:
            email = (o.get("billing", {}).get("email") or "").lower().strip()
            d = o.get("date_created") or ""
            if not email:
                continue
            if email not in customer_first_order or d < customer_first_order[email]:
                customer_first_order[email] = d

    print("Pass 2: writing CSV...")
    with open(OUT, "w", newline="", encoding="utf-8") as cf:
        w = csv.writer(cf)
        w.writerow([
            "order_id","date_created","date_paid","status","total",
            "customer_name","email","payment_method","payment_method_title",
            "billing_city","billing_country","shipping_city",
            "traffic_source","source_type","utm_medium","utm_campaign","referrer",
            "customer_ip","customer_user_agent","new_or_repeat","items_count","product_summary"
        ])
        for fp in files:
            with open(fp) as f:
                data = json.load(f)
            for o in data:
                oid = o.get("id")
                if oid in seen_ids:
                    continue
                seen_ids.add(oid)
                meta = o.get("meta_data", [])
                billing = o.get("billing", {})
                shipping = o.get("shipping", {})
                email = (billing.get("email") or "").lower().strip()
                name = (billing.get("first_name","") + " " + billing.get("last_name","")).strip()
                date_created = o.get("date_created") or ""
                first = customer_first_order.get(email, date_created)
                tag = "new" if date_created == first else "repeat"
                items = o.get("line_items", [])
                items_count = sum((li.get("quantity") or 0) for li in items)
                product_summary = "; ".join(
                    f"{li.get('name','')[:60]} x{li.get('quantity','')}" for li in items[:3]
                )
                w.writerow([
                    oid,
                    date_created,
                    o.get("date_paid") or "",
                    o.get("status",""),
                    o.get("total",""),
                    name,
                    email,
                    o.get("payment_method",""),
                    o.get("payment_method_title",""),
                    billing.get("city",""),
                    billing.get("country",""),
                    shipping.get("city",""),
                    get_traffic_source(meta),
                    meta_get(meta, "_wc_order_attribution_source_type"),
                    meta_get(meta, "_wc_order_attribution_utm_medium"),
                    meta_get(meta, "_wc_order_attribution_utm_campaign"),
                    meta_get(meta, "_wc_order_attribution_referrer", "_referrer"),
                    o.get("customer_ip_address",""),
                    (o.get("customer_user_agent","") or "")[:200],
                    tag,
                    items_count,
                    product_summary,
                ])
                rows += 1
    print(f"Wrote {rows} unique orders to {OUT}")

if __name__ == "__main__":
    main()
