#!/usr/bin/env python3
"""Extract per-competition performance from raw WooCommerce JSON.

Outputs:
- competitions.csv: one row per competition (product_id) with revenue, orders, tickets, unique buyers, draw date, days on sale
- best_competitions.md: top performers + drop-date analysis (when each comp ends)
- new_customers.csv: deduped first-purchase per email, signups per day
- email_send_times.csv: hourly conversion for email-attributed orders
- monthly_pattern.csv: revenue by day-of-month
"""
import os, json, glob, re, csv
from collections import defaultdict, Counter
from datetime import datetime

RAW = "/home/sol1/Desktop/MARKETING/nitrous_3mo/raw"
OUT = "/home/sol1/Desktop/MARKETING/nitrous_3mo"

DRAW_RE = re.compile(r"\[Draw\s+([A-Za-z]+)\s+(\d+)(?:st|nd|rd|th)?\s*\]", re.I)
MONTHS = {m: i for i, m in enumerate(
    ["January","February","March","April","May","June","July","August","September","October","November","December"], start=1)}

def parse_draw_date(name, order_year=2026):
    if not name:
        return None
    m = DRAW_RE.search(name)
    if not m:
        return None
    month_name = m.group(1).strip().title()
    if month_name not in MONTHS:
        return None
    day = int(m.group(2))
    try:
        return datetime(order_year, MONTHS[month_name], day).date()
    except ValueError:
        return None

def main():
    files = sorted(glob.glob(os.path.join(RAW, "*.json")))

    # competitions[(product_id)] = dict of stats
    comp = defaultdict(lambda: {
        "name": "", "product_id": 0,
        "orders": 0, "tickets": 0, "revenue": 0.0,
        "buyers": set(), "first_sold": None, "last_sold": None,
        "draw_date": None, "channels": Counter(), "hours": Counter(),
    })

    # New customers
    first_purchase = {}  # email -> date_iso

    # Email-channel orders by hour & dow
    email_hour = Counter(); email_dow = Counter(); email_rev_hr = defaultdict(float); email_rev_dow = defaultdict(float)

    # Day-of-month
    dom_orders = Counter(); dom_revenue = defaultdict(float)

    # Daily new signups (deduped)
    new_signups_per_day = Counter()

    total_orders = 0
    total_rev = 0.0

    for fp in files:
        with open(fp) as f:
            data = json.load(f)
        for o in data:
            try:
                total = float(o.get("total") or 0)
            except Exception:
                total = 0.0
            d = o.get("date_created","")
            if not d:
                continue
            dt = datetime.fromisoformat(d.replace("Z",""))
            email = (o.get("billing",{}).get("email") or "").lower().strip()

            # detect channel
            meta_by_key = {m.get("key"): m.get("value") for m in (o.get("meta_data") or []) if m.get("key")}
            utm = (meta_by_key.get("_wc_order_attribution_utm_source") or "").lower()
            stype = (meta_by_key.get("_wc_order_attribution_source_type") or "").lower()
            ref = (meta_by_key.get("_wc_order_attribution_referrer") or "").lower()
            is_email = ("klaviyo" in utm) or ("email" in utm) or (stype == "utm" and "klaviyo" in ref)

            total_orders += 1
            total_rev += total

            # First purchase per email
            if email and (email not in first_purchase or d < first_purchase[email]):
                first_purchase[email] = d

            # Day-of-month
            dom = dt.day
            dom_orders[dom] += 1
            dom_revenue[dom] += total

            if is_email:
                email_hour[dt.hour] += 1
                email_rev_hr[dt.hour] += total
                email_dow[dt.weekday()] += 1
                email_rev_dow[dt.weekday()] += total

            # Per-line-item competition allocation
            line_items = o.get("line_items", []) or []
            n_items = len(line_items)
            for li in line_items:
                pid = li.get("product_id") or 0
                name = li.get("name") or ""
                try:
                    li_total = float(li.get("total") or 0)
                except Exception:
                    li_total = 0.0
                qty = int(li.get("quantity") or 0)
                c = comp[pid]
                c["product_id"] = pid
                c["name"] = name or c["name"]
                c["orders"] += 1
                c["tickets"] += qty
                c["revenue"] += li_total
                if email:
                    c["buyers"].add(email)
                if c["first_sold"] is None or d < c["first_sold"]:
                    c["first_sold"] = d
                if c["last_sold"] is None or d > c["last_sold"]:
                    c["last_sold"] = d
                if c["draw_date"] is None:
                    c["draw_date"] = parse_draw_date(name)
                if utm:
                    c["channels"][utm] += 1
                c["hours"][dt.hour] += 1

    # Calculate signups per day
    for email, fd in first_purchase.items():
        day = fd[:10]
        new_signups_per_day[day] += 1

    # ---- Write competitions CSV ----
    rows = []
    for pid, c in comp.items():
        if not c["name"]:
            continue
        first = c["first_sold"][:10] if c["first_sold"] else ""
        last  = c["last_sold"][:10]  if c["last_sold"] else ""
        days_on_sale = 0
        try:
            if first and last:
                days_on_sale = (datetime.fromisoformat(last) - datetime.fromisoformat(first)).days + 1
        except Exception:
            days_on_sale = 0
        rev_per_day = c["revenue"]/days_on_sale if days_on_sale else 0.0
        rows.append({
            "product_id": pid,
            "name": c["name"],
            "draw_date": c["draw_date"].isoformat() if c["draw_date"] else "",
            "first_sold": first,
            "last_sold": last,
            "days_on_sale": days_on_sale,
            "orders": c["orders"],
            "tickets": c["tickets"],
            "revenue": round(c["revenue"], 2),
            "rev_per_day": round(rev_per_day, 2),
            "unique_buyers": len(c["buyers"]),
            "aov": round(c["revenue"]/c["orders"], 2) if c["orders"] else 0,
            "tickets_per_order": round(c["tickets"]/c["orders"], 1) if c["orders"] else 0,
            "top_channel": c["channels"].most_common(1)[0][0] if c["channels"] else "",
        })
    rows.sort(key=lambda r: -r["revenue"])
    with open(os.path.join(OUT,"competitions.csv"),"w",newline="") as f:
        w = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
        w.writeheader()
        for r in rows:
            w.writerow(r)
    print(f"competitions.csv: {len(rows)} unique competitions")

    # ---- new_customers.csv ----
    with open(os.path.join(OUT,"new_customers.csv"),"w",newline="") as f:
        w = csv.writer(f)
        w.writerow(["date","new_customers_first_ever_purchase"])
        for d in sorted(new_signups_per_day):
            w.writerow([d, new_signups_per_day[d]])
    total_new = sum(new_signups_per_day.values())
    print(f"new_customers.csv: {total_new:,} unique new buyers across {len(new_signups_per_day)} days (deduped — each email counted once)")

    # ---- email send time CSV ----
    with open(os.path.join(OUT,"email_send_times.csv"),"w",newline="") as f:
        w = csv.writer(f)
        w.writerow(["hour","email_orders","email_revenue"])
        for h in range(24):
            w.writerow([h, email_hour[h], f"{email_rev_hr[h]:.2f}"])
        w.writerow([])
        w.writerow(["dow","email_orders","email_revenue"])
        for i in range(7):
            w.writerow([["Mon","Tue","Wed","Thu","Fri","Sat","Sun"][i], email_dow[i], f"{email_rev_dow[i]:.2f}"])
    total_email_orders = sum(email_hour.values())
    print(f"email_send_times.csv: {total_email_orders:,} email-attributed orders")

    # ---- monthly pattern (day of month) ----
    with open(os.path.join(OUT,"monthly_pattern.csv"),"w",newline="") as f:
        w = csv.writer(f)
        w.writerow(["day_of_month","orders","revenue"])
        for d in range(1,32):
            w.writerow([d, dom_orders[d], f"{dom_revenue[d]:.2f}"])

    # ---- best_competitions.md narrative ----
    md = []
    md.append("# Competitions Performance — 90 days")
    md.append("")
    md.append(f"Across the window we sold **{len(rows):,} distinct competitions** generating **£{total_rev:,.2f}** from **{total_orders:,} orders**.")
    md.append("")

    md.append("## Top 25 competitions by revenue")
    md.append("")
    md.append("| # | Competition | Draw date | Revenue | Orders | Tickets | Unique buyers | Days on sale | £/day |")
    md.append("|---|---|---|--:|--:|--:|--:|--:|--:|")
    for i, r in enumerate(rows[:25], 1):
        nm = r['name'][:60] + ("…" if len(r['name'])>60 else "")
        md.append(f"| {i} | {nm} | {r['draw_date'] or '—'} | £{r['revenue']:,.0f} | {r['orders']:,} | {r['tickets']:,} | {r['unique_buyers']:,} | {r['days_on_sale']} | £{r['rev_per_day']:,.0f} |")
    md.append("")

    md.append("## Top 25 competitions by £ per day on sale (velocity)")
    md.append("")
    md.append("This is the metric that matters for stocking decisions — keep these formats running.")
    md.append("")
    md.append("| # | Competition | Draw date | £/day | Days on sale | Total revenue |")
    md.append("|---|---|---|--:|--:|--:|")
    by_velocity = sorted([r for r in rows if r["days_on_sale"]>=3], key=lambda r:-r["rev_per_day"])[:25]
    for i, r in enumerate(by_velocity, 1):
        nm = r['name'][:60] + ("…" if len(r['name'])>60 else "")
        md.append(f"| {i} | {nm} | {r['draw_date'] or '—'} | £{r['rev_per_day']:,.0f} | {r['days_on_sale']} | £{r['revenue']:,.0f} |")
    md.append("")

    # Underperformers — ran 7+ days, made <£500
    flops = [r for r in rows if r["days_on_sale"]>=7 and r["revenue"]<500]
    flops.sort(key=lambda r: r["rev_per_day"])
    md.append(f"## Underperformers — {len(flops)} competitions ran ≥7 days but made <£500")
    md.append("")
    md.append("Drop these formats. They take up draw slots and dilute focus.")
    md.append("")
    md.append("| Competition | Draw date | Revenue | Days on sale | £/day |")
    md.append("|---|---|--:|--:|--:|")
    for r in flops[:25]:
        nm = r['name'][:60] + ("…" if len(r['name'])>60 else "")
        md.append(f"| {nm} | {r['draw_date'] or '—'} | £{r['revenue']:,.0f} | {r['days_on_sale']} | £{r['rev_per_day']:,.0f} |")
    md.append("")

    # Drop-day analysis: which weekday should comps end on?
    drop_dow_rev = defaultdict(float)
    drop_dow_count = Counter()
    for r in rows:
        if not r["draw_date"]:
            continue
        try:
            dt = datetime.fromisoformat(r["draw_date"])
            dow = dt.strftime("%A")
            drop_dow_rev[dow] += r["revenue"]
            drop_dow_count[dow] += 1
        except Exception:
            continue
    md.append("## Best draw-end day-of-week (which day to schedule the live draw)")
    md.append("")
    md.append("| Day | # comps | Revenue (sum) | Avg revenue per comp |")
    md.append("|---|--:|--:|--:|")
    for d in ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]:
        n = drop_dow_count[d]; rev = drop_dow_rev[d]
        avg = rev/n if n else 0
        md.append(f"| {d} | {n} | £{rev:,.0f} | £{avg:,.0f} |")
    md.append("")

    # Email send-time recommendation
    md.append("## When to send email/SMS — peak conversion windows for email-attributed orders")
    md.append("")
    md.append("Filtered to orders where `_wc_order_attribution_utm_source` contains 'klaviyo' or 'email'.")
    md.append("")
    md.append(f"Total email-attributed orders in 90 days: **{total_email_orders:,}** generating £{sum(email_rev_hr.values()):,.0f}.")
    md.append("")
    md.append("**By hour:**")
    md.append("")
    md.append("| Hour | Orders | Revenue | Intensity |")
    md.append("|---|--:|--:|---|")
    if email_rev_hr:
        mx = max(email_rev_hr.values()) or 1
        for h in range(24):
            n = email_hour[h]; rev = email_rev_hr[h]
            bar = "█"*int(round(10*rev/mx))
            md.append(f"| {h:02d}:00 | {n:,} | £{rev:,.0f} | `{bar:<10}` {100*rev/mx:.0f}% |")
    md.append("")
    md.append("**By day of week:**")
    md.append("")
    md.append("| Day | Orders | Revenue |")
    md.append("|---|--:|--:|")
    for i,d in enumerate(["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]):
        md.append(f"| {d} | {email_dow[i]:,} | £{email_rev_dow[i]:,.0f} |")
    md.append("")

    # Day of month
    md.append("## Best time of month — revenue by day-of-month (90-day rollup)")
    md.append("")
    md.append("| Day-of-month | Orders | Revenue | Intensity |")
    md.append("|---|--:|--:|---|")
    mx = max(dom_revenue.values()) or 1
    for d in range(1,32):
        n = dom_orders[d]; rev = dom_revenue[d]
        bar = "█"*int(round(10*rev/mx))
        md.append(f"| {d} | {n:,} | £{rev:,.0f} | `{bar:<10}` {100*rev/mx:.0f}% |")
    md.append("")

    # New customers per day
    md.append("## New unique customers per day (deduped — each email counted once)")
    md.append("")
    md.append(f"Total brand-new buyers in 90 days: **{total_new:,}**")
    md.append("")
    md.append("| Date | New first-ever buyers |")
    md.append("|---|--:|")
    for day in sorted(new_signups_per_day):
        md.append(f"| {day} | {new_signups_per_day[day]:,} |")
    md.append("")

    md_path = os.path.join(OUT,"COMPETITIONS_REPORT.md")
    with open(md_path,"w") as f:
        f.write("\n".join(md))
    print(f"Wrote {md_path}")

if __name__ == "__main__":
    main()
