#!/usr/bin/env python3
"""Deep analytics for the £100k/day plan.

Outputs JSON intel pack: deep_intel.json with everything build_blueprint.py needs.
"""
import os, json, csv, glob, re
from collections import defaultdict, Counter
from datetime import datetime, timedelta, date

ROOT = "/home/sol1/Desktop/MARKETING/nitrous_3mo"
RAW = f"{ROOT}/raw"
OUT = f"{ROOT}/deep_intel.json"

print("Loading orders CSV...")
orders = []
with open(f"{ROOT}/orders_3mo.csv", newline="", encoding="utf-8") as f:
    for r in csv.DictReader(f):
        try: r["_total"] = float(r.get("total") or 0)
        except: r["_total"] = 0.0
        try: r["_dt"] = datetime.fromisoformat((r.get("date_created") or "").replace("Z",""))
        except: r["_dt"] = None
        if r["_dt"]:
            orders.append(r)

print(f"  {len(orders):,} orders loaded")

# ---- 1. Daily revenue & new-customer mix ----
day_rev = defaultdict(float)
day_orders = Counter()
day_new_orders = Counter()
day_new_emails = defaultdict(set)
all_emails_seen = set()
first_purchase_date = {}
for o in sorted(orders, key=lambda r: r["_dt"]):
    e = o["email"].lower().strip()
    d = o["_dt"].date().isoformat()
    if e and e not in first_purchase_date:
        first_purchase_date[e] = d
        day_new_emails[d].add(e)
    day_rev[d] += o["_total"]
    day_orders[d] += 1
    if o.get("new_or_repeat") == "new":
        day_new_orders[d] += 1
    if e: all_emails_seen.add(e)

# ---- 2. Top revenue days vs avg ----
sorted_days = sorted(day_rev.items(), key=lambda x:-x[1])
top10 = sorted_days[:10]
bot10 = sorted_days[-10:]

# ---- 3. Hour by new vs repeat ----
new_hour = Counter(); rep_hour = Counter()
new_hour_rev = defaultdict(float); rep_hour_rev = defaultdict(float)
for o in orders:
    h = o["_dt"].hour
    if o.get("new_or_repeat") == "new":
        new_hour[h] += 1; new_hour_rev[h] += o["_total"]
    else:
        rep_hour[h] += 1; rep_hour_rev[h] += o["_total"]

# ---- 4. Customer LTV deciles (for the 58,985 buyers) ----
cust_rev = defaultdict(float)
cust_orders = Counter()
for o in orders:
    e = o["email"].lower().strip()
    if not e: continue
    cust_rev[e] += o["_total"]
    cust_orders[e] += 1

revs = sorted(cust_rev.values(), reverse=True)
n = len(revs)
deciles = []
for i in range(10):
    a = int(n * i/10); b = int(n * (i+1)/10)
    chunk = revs[a:b]
    deciles.append({
        "decile": i+1,
        "customers": len(chunk),
        "revenue": sum(chunk),
        "min": min(chunk) if chunk else 0,
        "max": max(chunk) if chunk else 0,
        "avg": sum(chunk)/len(chunk) if chunk else 0,
    })

# ---- 5. Subject line analysis (Klaviyo) ----
print("Analyzing email subject patterns...")
em_camp = json.load(open(f"{ROOT}/klaviyo/email_campaigns.json"))
em_metrics = json.load(open(f"{ROOT}/klaviyo/email_campaign_metrics.json"))
sm_camp = json.load(open(f"{ROOT}/klaviyo/sms_campaigns.json"))
sm_metrics = json.load(open(f"{ROOT}/klaviyo/sms_campaign_metrics.json"))

def flatten(b):
    out = {}
    for x in b:
        for r in (x.get("data",{}).get("attributes",{}).get("results",[]) or []):
            cid = r.get("groupings",{}).get("campaign_id")
            if cid: out[cid] = r.get("statistics",{}) or {}
    return out
em_perf = flatten(em_metrics)
sm_perf = flatten(sm_metrics)

# Word frequency in winning vs losing subject lines
def words(s):
    return re.findall(r"[A-Za-z£0-9]+", (s or "").lower())

em_with_perf = []
for c in em_camp:
    cid = c["id"]; a = c.get("attributes",{}) or {}
    p = em_perf.get(cid,{}) or {}
    if int(p.get("recipients",0) or 0) < 5000: continue
    em_with_perf.append({
        "name": a.get("name",""),
        "rev_per_recipient": float(p.get("conversion_value",0) or 0) / max(1,int(p.get("recipients",0) or 0)),
        "open_rate": float(p.get("open_rate",0) or 0),
        "revenue": float(p.get("conversion_value",0) or 0),
        "recipients": int(p.get("recipients",0) or 0),
    })
em_with_perf.sort(key=lambda x:-x["rev_per_recipient"])
top20 = em_with_perf[:20]
bot20 = em_with_perf[-20:]
top_words = Counter()
bot_words = Counter()
for c in top20:
    for w in words(c["name"]):
        if len(w) >= 3: top_words[w] += 1
for c in bot20:
    for w in words(c["name"]):
        if len(w) >= 3: bot_words[w] += 1

word_lift = {}
for w in set(list(top_words.keys()) + list(bot_words.keys())):
    if top_words[w] + bot_words[w] >= 3:
        word_lift[w] = top_words[w] - bot_words[w]
winning_words = sorted(word_lift.items(), key=lambda x:-x[1])[:15]
losing_words = sorted(word_lift.items(), key=lambda x: x[1])[:15]

# ---- 6. Live competitions analysis ----
print("Analyzing live products...")
live_products = []
for fp in sorted(glob.glob("/tmp/live_comps/p*.json")):
    live_products.extend(json.load(open(fp)))
DRAW_RE = re.compile(r"\[Draw\s+([A-Za-z]+)\s+(\d+)(?:st|nd|rd|th)?\s*\]", re.I)
MO = {m:i for i,m in enumerate(["January","February","March","April","May","June","July","August","September","October","November","December"],1)}
TODAY = date(2026,5,6)
upcoming30 = []
for p in live_products:
    nm = p.get("name","")
    m = DRAW_RE.search(nm)
    if not m: continue
    mo = MO.get(m.group(1).title());
    if not mo: continue
    try: dd = date(2026, mo, int(m.group(2)))
    except: continue
    if not (TODAY <= dd <= TODAY+timedelta(days=30)): continue
    try: price = float(p.get("price") or 0)
    except: price = 0.0
    upcoming30.append({
        "id": p.get("id"),
        "name": nm,
        "draw_date": dd.isoformat(),
        "draw_dow": dd.strftime("%A"),
        "days_until_draw": (dd - TODAY).days,
        "price": price,
        "stock": p.get("stock_quantity") or 0,
        "total_sales": p.get("total_sales") or 0,
    })
upcoming30.sort(key=lambda x: (x["draw_date"], -x["total_sales"]))

# Predict winners/losers for next 30 days using historical performance of similar formats
# Use historical competitions.csv to assign expected revenue
hist_rev_by_keyword = defaultdict(list)
with open(f"{ROOT}/competitions.csv", newline="") as f:
    for r in csv.DictReader(f):
        nm = (r.get("name","") or "").upper()
        rev = float(r.get("revenue") or 0)
        days = int(r.get("days_on_sale") or 0)
        if days < 3: continue
        rev_per_day = rev/days
        # Categorize
        cats = []
        if "INSTANT WIN" in nm: cats.append("instant_wins")
        if "FLIP" in nm: cats.append("flip_and_win")
        if "SCRATCH" in nm: cats.append("scratch_and_win")
        if "CASH" in nm or "TAX FREE" in nm: cats.append("cash")
        if "GOLD" in nm or "WATCH" in nm or "ROLEX" in nm: cats.append("gold_watch")
        if "FORD" in nm or "AUDI" in nm or "BMW" in nm or "VW" in nm or "VOLKSWAGEN" in nm or "MERCEDES" in nm or "PORSCHE" in nm or "RANGE ROVER" in nm or "TRANSIT" in nm or "CADDY" in nm or "GTI" in nm or "GOLF" in nm or "SUV" in nm or "AMAROK" in nm or "TIGUAN" in nm: cats.append("car")
        if "BIKE" in nm or "MX" in nm or "MOTOCROSS" in nm or "SUR-RON" in nm or "ULTRA BEE" in nm or "REVVI" in nm or "HAIBIKE" in nm: cats.append("bike")
        if "PS5" in nm or "META QUEST" in nm or "NINJA" in nm or "SHARK" in nm or "DJI" in nm or "TRAXXAS" in nm or "PLAYSTATION" in nm or "TV" in nm or "MACBOOK" in nm or "NINTENDO" in nm or "PHILIPS" in nm or "REVVI" in nm or "LAY-Z-SPA" in nm or "INSTA360" in nm: cats.append("tech_appliance")
        if "BUNDLE" in nm or "PICK YOUR PRIZE" in nm: cats.append("bundle")
        if "QUICK DRAW" in nm: cats.append("quick_draw")
        for c in cats:
            hist_rev_by_keyword[c].append(rev_per_day)

cat_avg = {k: (sum(v)/len(v), len(v)) for k,v in hist_rev_by_keyword.items() if len(v)>=3}

def predict(n):
    nu = n.upper()
    cats = []
    if "INSTANT WIN" in nu: cats.append("instant_wins")
    if "CASH" in nu or "TAX FREE" in nu: cats.append("cash")
    if "GOLD" in nu or "WATCH" in nu or "ROLEX" in nu: cats.append("gold_watch")
    if any(x in nu for x in ["FORD","AUDI","BMW","VW","VOLKSWAGEN","MERCEDES","PORSCHE","RANGE ROVER","TRANSIT","CADDY","GTI","GOLF","SUV","AMAROK","TIGUAN","M4","SIERRA","RENAULT","COSWORTH"]): cats.append("car")
    if any(x in nu for x in ["BIKE","MX","SUR-RON","HAIBIKE","REVVI","MOTOCROSS"]): cats.append("bike")
    if any(x in nu for x in ["PS5","META QUEST","NINJA","SHARK","DJI","TRAXXAS","PLAYSTATION","TV","MACBOOK","NINTENDO","PHILIPS","LAY-Z-SPA","INSTA360"]): cats.append("tech_appliance")
    if "BUNDLE" in nu or "PICK YOUR" in nu: cats.append("bundle")
    if "QUICK DRAW" in nu: cats.append("quick_draw")
    if not cats:
        return ("unknown", 0, 0)
    # Use the strongest matching category
    best = max(cats, key=lambda c: cat_avg.get(c, (0,0))[0])
    avg, n_hist = cat_avg.get(best, (0,0))
    return (best, avg, n_hist)

for u in upcoming30:
    cat, pred_per_day, hist_n = predict(u["name"])
    u["predicted_category"] = cat
    u["predicted_rev_per_day"] = pred_per_day
    u["hist_n_for_pred"] = hist_n
    u["expected_total_revenue"] = pred_per_day * max(3, u["days_until_draw"])

# ---- 7. Acquisition cost scenarios for Meta ----
fb_orders = sum(1 for o in orders if "facebook" in (o.get("traffic_source") or "").lower() or "facebook" in (o.get("referrer") or "").lower())
fb_revenue = sum(o["_total"] for o in orders if "facebook" in (o.get("traffic_source") or "").lower() or "facebook" in (o.get("referrer") or "").lower())
fb_unique = len({o["email"].lower() for o in orders if o["email"] and ("facebook" in (o.get("traffic_source") or "").lower() or "facebook" in (o.get("referrer") or "").lower())})
fb_new = len({o["email"].lower() for o in orders if o["email"] and o.get("new_or_repeat")=="new" and ("facebook" in (o.get("traffic_source") or "").lower() or "facebook" in (o.get("referrer") or "").lower())})

# ---- 8. Send-day vs no-send-day rev comparison (refine) ----
em_send_dates = set()
sm_send_dates = set()
for c in em_camp:
    a = c.get("attributes",{}) or {}
    if a.get("status") not in ("Sent","Sending","Sent (Recovery Run)"): continue
    st = a.get("send_time") or a.get("scheduled_at") or ""
    if st:
        em_send_dates.add(st[:10])
for c in sm_camp:
    a = c.get("attributes",{}) or {}
    if a.get("status") not in ("Sent","Sending","Sent (Recovery Run)"): continue
    st = a.get("send_time") or a.get("scheduled_at") or ""
    if st:
        sm_send_dates.add(st[:10])

# ---- Bundle output ----
intel = {
    "as_of": datetime.utcnow().isoformat(),
    "summary": {
        "total_orders_90d": len(orders),
        "total_revenue_90d": sum(o["_total"] for o in orders),
        "unique_buyers_90d": len(all_emails_seen),
        "avg_revenue_per_day": sum(o["_total"] for o in orders) / 90,
        "best_day": top10[0],
        "worst_day": bot10[0],
        "customer_db_total": 442229,
        "customer_db_paying": 407853,
        "activation_rate_pct": round(100 * len(all_emails_seen) / 407853, 2),
    },
    "top_10_days": top10,
    "bottom_10_days": bot10,
    "hour_split": {
        "new": {h: {"orders": new_hour[h], "revenue": new_hour_rev[h]} for h in range(24)},
        "repeat": {h: {"orders": rep_hour[h], "revenue": rep_hour_rev[h]} for h in range(24)},
    },
    "ltv_deciles": deciles,
    "winning_subject_words": winning_words,
    "losing_subject_words": losing_words,
    "top20_email_campaigns": top20,
    "bottom20_email_campaigns": bot20,
    "live_competitions_total": len(live_products),
    "upcoming_30day_draws": upcoming30,
    "category_avg_rev_per_day": cat_avg,
    "facebook_attribution": {
        "orders": fb_orders,
        "revenue": fb_revenue,
        "unique_buyers": fb_unique,
        "new_customers_acquired": fb_new,
    },
    "send_dates": {
        "email_send_days": len(em_send_dates),
        "sms_send_days": len(sm_send_dates),
        "either_send_days": len(em_send_dates | sm_send_dates),
        "no_send_days": 90 - len(em_send_dates | sm_send_dates),
    },
    "daily_new_customers": {d: len(s) for d,s in day_new_emails.items()},
}

with open(OUT,"w") as f:
    json.dump(intel, f, indent=2, default=str)
print(f"Wrote {OUT}")
print(f"Top 3 days: {[(d, round(r)) for d,r in top10[:3]]}")
print(f"Activation rate: {intel['summary']['activation_rate_pct']}% of paying customer DB")
print(f"Send-days: {intel['send_dates']['either_send_days']}/90  No-send: {intel['send_dates']['no_send_days']}")
print(f"Winning subject words: {[w for w,_ in winning_words[:10]]}")
print(f"Losing subject words: {[w for w,_ in losing_words[:10]]}")
print(f"Upcoming 30d draws: {len(upcoming30)}")