#!/usr/bin/env python3
"""Full 90-day Meta deep-dive from order attribution metadata.

What this tells us:
- Day-by-day Meta-attributed orders + revenue
- Campaign-level breakdown (utm_campaign)
- Ad-level breakdown (utm_content)
- Ad-set level breakdown (utm_term)
- Malformed UTM detection
- fbclid capture rate (proxy for tracking health)
- Cross-device leakage estimate
- Estimated Meta-driven 'direct' revenue
"""
import json, glob, csv, re
from collections import Counter, defaultdict
from datetime import datetime

ROOT = "/home/sol1/Desktop/MARKETING/nitrous_3mo"
OUT_JSON = f"{ROOT}/meta_intel.json"

print("Scanning all 90 days of raw orders for Meta attribution...")

# Counters
total_orders = 0
fb_orders_revenue = 0.0
fb_orders_n = 0
ig_orders_n = 0
ig_orders_revenue = 0.0
direct_orders_n = 0
direct_orders_revenue = 0.0
fbclid_orders_n = 0
fbclid_orders_revenue = 0.0
gclid_orders_n = 0
gclid_orders_revenue = 0.0

day_fb_orders = Counter()
day_fb_rev = defaultdict(float)
day_ig_rev = defaultdict(float)
day_direct_rev = defaultdict(float)
day_fbclid = Counter()
day_total_rev = defaultdict(float)

hour_fb_orders = Counter()
hour_fb_rev = defaultdict(float)

campaigns = Counter()
campaign_rev = defaultdict(float)
campaign_orders = defaultdict(int)
campaign_unique = defaultdict(set)
adsets = Counter()
adset_rev = defaultdict(float)
adset_orders = defaultdict(int)
ads = Counter()
ad_rev = defaultdict(float)
ad_orders = defaultdict(int)

malformed_utm = 0
malformed_with_revenue = 0.0
no_utm_at_all = 0
referrer_only = 0
fb_referrer_no_utm = 0
fb_referrer_no_utm_rev = 0.0

landing_paths = Counter()

# Time-of-fbclid peaks
fb_orders_with_fbclid_rev = 0.0

zap_coupon_total = 0
zap_coupon_value = 0.0
zap_credit_orders = 0

for fp in sorted(glob.glob(f"{ROOT}/raw/2026-0*.json")):
    data = json.load(open(fp))
    for o in data:
        try:
            total = float(o.get("total") or 0)
        except: total = 0.0
        d = o.get("date_created","")
        if not d: continue
        dt = datetime.fromisoformat(d.replace("Z",""))
        day = d[:10]
        h = dt.hour
        total_orders += 1
        day_total_rev[day] += total

        meta = {m.get("key"): m.get("value") for m in (o.get("meta_data") or []) if m.get("key")}
        ref = (meta.get("_wc_order_attribution_referrer") or "").lower()
        utm_src = (meta.get("_wc_order_attribution_utm_source") or "").lower()
        utm_med = (meta.get("_wc_order_attribution_utm_medium") or "").lower()
        utm_camp = meta.get("_wc_order_attribution_utm_campaign") or ""
        utm_content = meta.get("_wc_order_attribution_utm_content") or ""
        utm_term = meta.get("_wc_order_attribution_utm_term") or ""
        landing = meta.get("_wc_order_attribution_session_entry") or ""
        stype = (meta.get("_wc_order_attribution_source_type") or "").lower()
        email = (o.get("billing",{}).get("email") or "").lower()

        # fbclid / gclid presence
        if "fbclid" in ref:
            fbclid_orders_n += 1
            fbclid_orders_revenue += total
            day_fbclid[day] += 1
        if "gclid" in ref:
            gclid_orders_n += 1
            gclid_orders_revenue += total

        # Source classification
        is_fb = "facebook" in utm_src or utm_src == "fb" or "facebook" in ref or "fbclid" in ref or "m.facebook" in ref
        is_ig = "instagram" in utm_src or "instagram" in ref
        is_direct = (stype == "typein" or utm_src in ("","(none)","(direct)","direct"))

        if is_fb:
            fb_orders_n += 1
            fb_orders_revenue += total
            day_fb_orders[day] += 1
            day_fb_rev[day] += total
            hour_fb_orders[h] += 1
            hour_fb_rev[h] += total

            # Campaign/adset/ad breakdown
            if utm_camp:
                campaigns[utm_camp[:80]] += 1
                campaign_rev[utm_camp[:80]] += total
                campaign_orders[utm_camp[:80]] += 1
                if email: campaign_unique[utm_camp[:80]].add(email)
            if utm_term:
                adsets[utm_term[:80]] += 1
                adset_rev[utm_term[:80]] += total
                adset_orders[utm_term[:80]] += 1
            if utm_content:
                ads[utm_content[:80]] += 1
                ad_rev[utm_content[:80]] += total
                ad_orders[utm_content[:80]] += 1

            # Malformed UTM detection
            if utm_content == "/" or utm_term == "" or utm_camp == "":
                malformed_utm += 1
                malformed_with_revenue += total
            if not utm_camp and not utm_content and not utm_term:
                no_utm_at_all += 1

            # FB referrer but no UTM at all
            if ("facebook" in ref or "fbclid" in ref) and not utm_src:
                fb_referrer_no_utm += 1
                fb_referrer_no_utm_rev += total

        elif is_ig:
            ig_orders_n += 1
            ig_orders_revenue += total
            day_ig_rev[day] += total
        elif is_direct:
            direct_orders_n += 1
            direct_orders_revenue += total
            day_direct_rev[day] += total

        # Landing page tracking
        if landing:
            try:
                from urllib.parse import urlparse
                p = urlparse(landing).path
                landing_paths[p[:60]] += 1
            except: pass

        # Zap coupon analysis (account credits used)
        for cl in (o.get("coupon_lines") or []):
            code = (cl.get("code") or "")
            if "zap" in code.lower() or "voodoo" in code.lower():
                zap_coupon_total += 1
                try:
                    zap_coupon_value += float(cl.get("discount") or 0)
                except: pass
                if "_credit" in code.lower():
                    zap_credit_orders += 1

# Compute estimated cross-device leakage
# Industry rule: ~30-50% of "direct" is actually paid social on mobile
est_meta_in_direct_low = direct_orders_revenue * 0.30
est_meta_in_direct_high = direct_orders_revenue * 0.50
true_meta_revenue_low = fb_orders_revenue + est_meta_in_direct_low
true_meta_revenue_high = fb_orders_revenue + est_meta_in_direct_high

# Output
intel = {
    "total_orders_90d": total_orders,
    "fb_tagged_orders": fb_orders_n,
    "fb_tagged_revenue": fb_orders_revenue,
    "ig_tagged_orders": ig_orders_n,
    "ig_tagged_revenue": ig_orders_revenue,
    "direct_orders": direct_orders_n,
    "direct_revenue": direct_orders_revenue,
    "fbclid_orders": fbclid_orders_n,
    "fbclid_revenue": fbclid_orders_revenue,
    "gclid_orders": gclid_orders_n,
    "gclid_revenue": gclid_orders_revenue,
    "malformed_utm_orders": malformed_utm,
    "malformed_utm_revenue": malformed_with_revenue,
    "no_utm_at_all": no_utm_at_all,
    "fb_referrer_no_utm": fb_referrer_no_utm,
    "fb_referrer_no_utm_rev": fb_referrer_no_utm_rev,
    "campaigns_count": len(campaigns),
    "adsets_count": len(adsets),
    "ads_count": len(ads),
    "top_campaigns": [(c, n, campaign_rev[c], len(campaign_unique[c])) for c, n in campaigns.most_common(20)],
    "top_adsets": [(c, n, adset_rev[c]) for c, n in adsets.most_common(20)],
    "top_ads": [(c, n, ad_rev[c]) for c, n in ads.most_common(20)],
    "day_fb_orders": dict(day_fb_orders),
    "day_fb_rev": dict(day_fb_rev),
    "day_total_rev": dict(day_total_rev),
    "day_fbclid": dict(day_fbclid),
    "hour_fb_rev": dict(hour_fb_rev),
    "true_meta_revenue_low": true_meta_revenue_low,
    "true_meta_revenue_high": true_meta_revenue_high,
    "top_landing_paths": landing_paths.most_common(20),
    "zap_coupon_redemptions": zap_coupon_total,
    "zap_coupon_credit_value": zap_coupon_value,
    "zap_credit_orders": zap_credit_orders,
}

with open(OUT_JSON, "w") as f:
    json.dump(intel, f, indent=2, default=str)

print(f"\n{'='*70}")
print("META 90-DAY DEEP DIVE — FINDINGS")
print('='*70)
print(f"Total orders:                 {total_orders:>10,}")
print(f"FB-tagged orders:             {fb_orders_n:>10,}  £{fb_orders_revenue:>12,.0f}")
print(f"IG-tagged orders:             {ig_orders_n:>10,}  £{ig_orders_revenue:>12,.0f}")
print(f"Direct/typein orders:         {direct_orders_n:>10,}  £{direct_orders_revenue:>12,.0f}")
print(f"fbclid present:               {fbclid_orders_n:>10,}  £{fbclid_orders_revenue:>12,.0f}")
print(f"gclid present:                {gclid_orders_n:>10,}  £{gclid_orders_revenue:>12,.0f}")
print()
print(f"❗ MALFORMED UTM Meta orders:  {malformed_utm:>10,}  £{malformed_with_revenue:>12,.0f}")
print(f"❗ FB referrer with NO UTM:    {fb_referrer_no_utm:>10,}  £{fb_referrer_no_utm_rev:>12,.0f}")
print(f"   No UTM at all on FB:       {no_utm_at_all:>10,}")
print()
print(f"Distinct campaigns seen:      {len(campaigns):>10,}")
print(f"Distinct ad-sets seen:        {len(adsets):>10,}")
print(f"Distinct ads seen:            {len(ads):>10,}")
print()
print(f"True Meta revenue (low est):  £{true_meta_revenue_low:>12,.0f}")
print(f"True Meta revenue (high est): £{true_meta_revenue_high:>12,.0f}")
print()
print(f"Zap-cashier coupon redemptions: {zap_coupon_total:>8,}")
print(f"Zap-cashier discount £ given:   £{zap_coupon_value:>10,.0f}")
print(f"Orders using zap credit:        {zap_credit_orders:>8,}")
print()
print("TOP 10 META CAMPAIGNS (utm_campaign):")
for c, n in campaigns.most_common(10):
    print(f"  {n:>7,} orders  £{campaign_rev[c]:>10,.0f}  {c}")
print()
print("TOP 10 META ADS (utm_content):")
for c, n in ads.most_common(10):
    print(f"  {n:>7,} orders  £{ad_rev[c]:>10,.0f}  {c}")
print()
print("Saved deep-dive to:", OUT_JSON)
