#!/usr/bin/env python3
"""Zap (Meta agency) performance audit — every campaign analysed.

Key findings to surface:
- Total campaigns Zap ran (and the bloat)
- Top revenue campaigns
- ZERO-revenue / near-zero waste campaigns
- Malformed UTM scope
- AOV per campaign (where it differs from site AOV)
- Likely spend via reverse-ROAS modelling
"""
import json, glob, re, csv
from collections import Counter, defaultdict
from datetime import datetime

ROOT = "/home/sol1/Desktop/MARKETING/nitrous_3mo"

print("Building Zap (Meta agency) full audit...")

# Per-campaign deep stats
class Campaign:
    def __init__(self):
        self.name = ""
        self.orders = 0
        self.revenue = 0.0
        self.unique_buyers = set()
        self.first_seen = None
        self.last_seen = None
        self.adsets = set()
        self.ads = set()
        self.aov = 0
        self.is_zap = False
        self.is_advantage = False
        self.fbclid_present = 0
        self.malformed = 0
        self.new_customers = 0
        self.repeat_customers = 0
        self.day_orders = Counter()
        self.day_rev = defaultdict(float)

camps = defaultdict(Campaign)

# Track first-purchase emails for new vs repeat
first_purchase = {}
print("Pass 1: first-purchase tagging...")
for fp in sorted(glob.glob(f"{ROOT}/raw/2026-0*.json")):
    data = json.load(open(fp))
    for o in data:
        e = (o.get("billing",{}).get("email") or "").lower()
        d = o.get("date_created","")
        if e and d and (e not in first_purchase or d < first_purchase[e]):
            first_purchase[e] = d

print("Pass 2: campaign extraction...")
for fp in sorted(glob.glob(f"{ROOT}/raw/2026-0*.json")):
    data = json.load(open(fp))
    for o in data:
        try: total = float(o.get("total") or 0)
        except: total = 0.0
        d = o.get("date_created","")
        if not d: continue
        meta = {m.get("key"): m.get("value") for m in (o.get("meta_data") or []) if m.get("key")}
        utm_src = (meta.get("_wc_order_attribution_utm_source") or "").lower()
        utm_camp = meta.get("_wc_order_attribution_utm_campaign") or ""
        utm_content = meta.get("_wc_order_attribution_utm_content") or ""
        utm_term = meta.get("_wc_order_attribution_utm_term") or ""
        ref = (meta.get("_wc_order_attribution_referrer") or "").lower()
        is_fb = "facebook" in utm_src or utm_src=="fb" or "facebook" in ref or "fbclid" in ref
        if not is_fb: continue
        cname = utm_camp or "(no campaign tag)"
        c = camps[cname]
        c.name = cname
        c.orders += 1
        c.revenue += total
        e = (o.get("billing",{}).get("email") or "").lower()
        if e:
            c.unique_buyers.add(e)
            if first_purchase.get(e) == d: c.new_customers += 1
            else: c.repeat_customers += 1
        if c.first_seen is None or d < c.first_seen: c.first_seen = d[:10]
        if c.last_seen is None or d > c.last_seen: c.last_seen = d[:10]
        c.adsets.add(utm_term[:60])
        c.ads.add(utm_content[:60])
        if "zap" in cname.lower(): c.is_zap = True
        if "advantage" in cname.lower(): c.is_advantage = True
        if "fbclid" in ref: c.fbclid_present += 1
        if utm_content=="/" or not utm_term: c.malformed += 1
        c.day_orders[d[:10]] += 1
        c.day_rev[d[:10]] += total

print(f"\nTotal Meta-attributed campaigns: {len(camps)}")
zap_camps = [c for c in camps.values() if c.is_zap]
print(f"Zap-tagged campaigns: {len(zap_camps)}")
print(f"Total Zap orders: {sum(c.orders for c in zap_camps):,}")
print(f"Total Zap revenue: £{sum(c.revenue for c in zap_camps):,.0f}")

# Sort all campaigns
all_camps = sorted(camps.values(), key=lambda x:-x.revenue)

# Group by tier
tier_top = [c for c in all_camps if c.revenue >= 1000]
tier_mid = [c for c in all_camps if 200 <= c.revenue < 1000]
tier_low = [c for c in all_camps if 0 < c.revenue < 200]
tier_zero = [c for c in all_camps if c.revenue == 0]

print(f"\n  {len(tier_top)} campaigns earned £1,000+ (£{sum(c.revenue for c in tier_top):,.0f})")
print(f"  {len(tier_mid)} campaigns earned £200-£1,000 (£{sum(c.revenue for c in tier_mid):,.0f})")
print(f"  {len(tier_low)} campaigns earned £0-£200 (£{sum(c.revenue for c in tier_low):,.0f}) — likely loss-makers")
print(f"  {len(tier_zero)} campaigns earned £0 — pure waste candidates")

# Save for HTML
out_data = {
    "summary": {
        "total_campaigns": len(camps),
        "zap_campaigns": len(zap_camps),
        "total_meta_orders": sum(c.orders for c in camps.values()),
        "total_meta_revenue": sum(c.revenue for c in camps.values()),
        "zap_orders": sum(c.orders for c in zap_camps),
        "zap_revenue": sum(c.revenue for c in zap_camps),
        "advantage_camps": len([c for c in camps.values() if c.is_advantage]),
        "tier_top_count": len(tier_top),
        "tier_top_revenue": sum(c.revenue for c in tier_top),
        "tier_mid_count": len(tier_mid),
        "tier_mid_revenue": sum(c.revenue for c in tier_mid),
        "tier_low_count": len(tier_low),
        "tier_low_revenue": sum(c.revenue for c in tier_low),
        "tier_zero_count": len(tier_zero),
    },
    "all_campaigns": [{
        "name": c.name,
        "orders": c.orders,
        "revenue": c.revenue,
        "unique_buyers": len(c.unique_buyers),
        "new_customers": c.new_customers,
        "repeat_customers": c.repeat_customers,
        "aov": c.revenue/max(1,c.orders),
        "first_seen": c.first_seen,
        "last_seen": c.last_seen,
        "n_adsets": len(c.adsets),
        "n_ads": len(c.ads),
        "is_zap": c.is_zap,
        "is_advantage": c.is_advantage,
        "fbclid_present": c.fbclid_present,
        "malformed": c.malformed,
    } for c in all_camps],
}

with open(f"{ROOT}/zap_audit.json","w") as f:
    json.dump(out_data, f, indent=2, default=str)
print(f"\nSaved zap_audit.json")

# CSV for finance review
with open(f"{ROOT}/zap_campaigns.csv","w",newline="") as f:
    w = csv.writer(f)
    w.writerow(["campaign","orders","revenue","unique_buyers","new","repeat","aov","first_seen","last_seen","n_adsets","n_ads","is_zap","fbclid","malformed_url"])
    for c in all_camps:
        w.writerow([c.name, c.orders, f"{c.revenue:.2f}", len(c.unique_buyers), c.new_customers, c.repeat_customers, f"{c.revenue/max(1,c.orders):.2f}", c.first_seen, c.last_seen, len(c.adsets), len(c.ads), "Y" if c.is_zap else "", c.fbclid_present, c.malformed])
print(f"Saved zap_campaigns.csv")

# Print findings
print("\n" + "="*70)
print("THE ZAP-WASTE LIST — campaigns that ran but earned <£200")
print("="*70)
for c in sorted(tier_low + tier_zero, key=lambda x: x.revenue):
    days = 0
    try:
        if c.first_seen and c.last_seen:
            days = (datetime.fromisoformat(c.last_seen) - datetime.fromisoformat(c.first_seen)).days + 1
    except: days = 0
    if days >= 7 or c.revenue == 0:
        print(f"  £{c.revenue:>8,.0f}  {c.orders:>4} orders  ran {days:>2}d  ads:{len(c.ads):>3}  ZAP:{'Y' if c.is_zap else 'N'}  {c.name[:65]}")
