#!/usr/bin/env python3
"""Build the unified 3-month marketing & sales audit report.

Combines:
- WooCommerce orders (orders_3mo.csv)
- Competitions performance (competitions.csv)
- New-customer signups (new_customers.csv)
- Klaviyo email campaigns + metrics
- Klaviyo SMS campaigns + metrics
- Klaviyo flows, lists, segments

Outputs MASTER_AUDIT.md and MASTER_AUDIT.html.
"""
import os, csv, json, re
from collections import defaultdict, Counter
from datetime import datetime, timedelta

ROOT = "/home/sol1/Desktop/MARKETING/nitrous_3mo"
OUT_MD = f"{ROOT}/MASTER_AUDIT.md"
OUT_HTML = f"{ROOT}/MASTER_AUDIT.html"

# ----- Load datasets -----
def load_csv(p):
    with open(p, newline="") as f:
        return list(csv.DictReader(f))

# Order summary already in audit.py output - re-derive from orders_3mo.csv
print("Loading orders...")
orders = load_csv(f"{ROOT}/orders_3mo.csv")
total_orders = len(orders)
total_revenue = sum(float(r.get("total") or 0) for r in orders)
unique_emails = {r["email"].lower() for r in orders if r["email"]}
new_orders = sum(1 for r in orders if r.get("new_or_repeat")=="new")

print(f"  {total_orders:,} orders, £{total_revenue:,.0f} revenue, {len(unique_emails):,} unique buyers")

competitions = load_csv(f"{ROOT}/competitions.csv")
new_signups = load_csv(f"{ROOT}/new_customers.csv")
total_new = sum(int(r["new_customers_first_ever_purchase"]) for r in new_signups)

# ----- Klaviyo -----
print("Loading Klaviyo data...")
em_camp = json.load(open(f"{ROOT}/klaviyo/email_campaigns.json"))
sm_camp = json.load(open(f"{ROOT}/klaviyo/sms_campaigns.json"))
em_perf_batches = json.load(open(f"{ROOT}/klaviyo/email_campaign_metrics.json"))
sm_perf_batches = json.load(open(f"{ROOT}/klaviyo/sms_campaign_metrics.json"))
flows = json.load(open(f"{ROOT}/klaviyo/flows.json"))
lists_data = json.load(open(f"{ROOT}/klaviyo/lists.json"))
list_sizes = json.load(open(f"{ROOT}/klaviyo/list_sizes.json"))
segments = json.load(open(f"{ROOT}/klaviyo/segments.json"))

# Flatten campaign metrics (id -> stats dict)
def flatten_metrics(batches):
    out = {}
    for b in batches:
        rows = b.get("data", {}).get("attributes", {}).get("results", []) or []
        for r in rows:
            cid = r.get("groupings", {}).get("campaign_id")
            if cid:
                out[cid] = r.get("statistics", {}) or {}
    return out

em_perf = flatten_metrics(em_perf_batches)
sm_perf = flatten_metrics(sm_perf_batches)
print(f"  Email campaigns: {len(em_camp)}, perf rows: {len(em_perf)}")
print(f"  SMS campaigns: {len(sm_camp)}, perf rows: {len(sm_perf)}")

# Build campaign rows
def campaign_rows(camps, perf, channel):
    rows = []
    for c in camps:
        cid = c["id"]
        a = c.get("attributes", {}) or {}
        st = perf.get(cid, {}) or {}
        send_time = a.get("send_time") or a.get("scheduled_at") or ""
        if a.get("status") not in ("Sent","Sending","Sent (Recovery Run)"):
            continue
        try:
            sdt = datetime.fromisoformat(send_time.replace("Z","").split("+")[0]) if send_time else None
        except Exception:
            sdt = None
        aud = a.get("audiences") or {}
        rows.append({
            "id": cid,
            "name": a.get("name") or "",
            "channel": channel,
            "status": a.get("status"),
            "send_time": send_time,
            "send_dt": sdt,
            "send_hour": sdt.hour if sdt else None,
            "send_dow": sdt.strftime("%A") if sdt else None,
            "send_date": sdt.date().isoformat() if sdt else None,
            "audiences_included": aud.get("included") or [],
            "audiences_excluded": aud.get("excluded") or [],
            "recipients": int(st.get("recipients", 0) or 0),
            "delivered": int(st.get("delivered", 0) or 0),
            "opens_unique": int(st.get("opens_unique", 0) or 0),
            "open_rate": float(st.get("open_rate", 0) or 0),
            "clicks_unique": int(st.get("clicks_unique", 0) or 0),
            "click_rate": float(st.get("click_rate", 0) or 0),
            "ctor": float(st.get("click_to_open_rate", 0) or 0),
            "conversions": int(st.get("conversions", 0) or 0),
            "conversion_uniques": int(st.get("conversion_uniques", 0) or 0),
            "conversion_value": float(st.get("conversion_value", 0) or 0),
            "conversion_rate": float(st.get("conversion_rate", 0) or 0),
            "rev_per_recipient": float(st.get("revenue_per_recipient", 0) or 0),
            "unsubscribes": int(st.get("unsubscribes", 0) or 0),
            "unsubscribe_rate": float(st.get("unsubscribe_rate", 0) or 0),
            "spam": int(st.get("spam_complaints", 0) or 0),
            "bounce_rate": float(st.get("bounce_rate", 0) or 0),
        })
    return rows

em_rows = campaign_rows(em_camp, em_perf, "email")
sm_rows = campaign_rows(sm_camp, sm_perf, "sms")

# Resolve segment / list names for audience IDs
seg_name = {s["id"]: s.get("attributes",{}).get("name") for s in segments}
list_name = {L["id"]: L.get("attributes",{}).get("name") for L in lists_data}
def resolve_aud(ids):
    return [seg_name.get(i) or list_name.get(i) or i for i in ids]

# ---- Aggregate metrics ----
def agg(rows):
    sent = sum(r["recipients"] for r in rows)
    delivered = sum(r["delivered"] for r in rows)
    opens = sum(r["opens_unique"] for r in rows)
    clicks = sum(r["clicks_unique"] for r in rows)
    conv = sum(r["conversions"] for r in rows)
    rev = sum(r["conversion_value"] for r in rows)
    unsubs = sum(r["unsubscribes"] for r in rows)
    return {
        "campaigns": len(rows), "sent": sent, "delivered": delivered,
        "opens": opens, "clicks": clicks, "conversions": conv, "revenue": rev,
        "unsubs": unsubs,
        "open_rate": opens/delivered if delivered else 0,
        "click_rate": clicks/delivered if delivered else 0,
        "ctor": clicks/opens if opens else 0,
        "rev_per_send": rev/sent if sent else 0,
        "rev_per_campaign": rev/len(rows) if rows else 0,
    }
em_total = agg([r for r in em_rows if r["recipients"]>0])
sm_total = agg([r for r in sm_rows if r["recipients"]>0])

# SMS estimated cost (UK Klaviyo standard rate ~£0.040 per SMS as of 2026, includes long-codes; user can override)
SMS_UK_COST = 0.040
sms_cost_est = sm_total["sent"] * SMS_UK_COST

# ---- Hour-of-day performance (when sends actually convert) ----
em_hour_conv = Counter(); em_hour_rev = defaultdict(float); em_hour_sent = Counter()
sm_hour_conv = Counter(); sm_hour_rev = defaultdict(float); sm_hour_sent = Counter()
em_dow_rev = defaultdict(float); em_dow_sent = Counter()
sm_dow_rev = defaultdict(float); sm_dow_sent = Counter()
for r in em_rows:
    if r["send_hour"] is None or r["recipients"]==0: continue
    em_hour_sent[r["send_hour"]] += r["recipients"]
    em_hour_conv[r["send_hour"]] += r["conversions"]
    em_hour_rev[r["send_hour"]] += r["conversion_value"]
    em_dow_rev[r["send_dow"]] += r["conversion_value"]
    em_dow_sent[r["send_dow"]] += r["recipients"]
for r in sm_rows:
    if r["send_hour"] is None or r["recipients"]==0: continue
    sm_hour_sent[r["send_hour"]] += r["recipients"]
    sm_hour_conv[r["send_hour"]] += r["conversions"]
    sm_hour_rev[r["send_hour"]] += r["conversion_value"]
    sm_dow_rev[r["send_dow"]] += r["conversion_value"]
    sm_dow_sent[r["send_dow"]] += r["recipients"]

# ---- Build markdown ----
md = []
md.append("# Nitrous Competitions — MASTER 3-Month Marketing & Sales Audit")
md.append("")
md.append(f"**Window:** 2026-02-06 → 2026-05-06 (90 days)")
md.append(f"**Sources:** WooCommerce REST API (orders + line items + attribution metadata) · Klaviyo API (email/SMS campaigns + flows + lists + segments)")
md.append(f"**Generated:** {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}")
md.append("")

md.append("## At-a-glance — three reports merged")
md.append("")
md.append(f"| Stream | Headline |")
md.append(f"|---|---|")
md.append(f"| **Sales (WooCommerce)** | {total_orders:,} completed orders · £{total_revenue:,.0f} revenue · {len(unique_emails):,} unique buyers · AOV £{total_revenue/total_orders:.2f} |")
md.append(f"| **Acquisition** | {total_new:,} brand-new buyers (deduped, each email counted once on first-ever purchase) — that's {total_new/90:.0f}/day average |")
md.append(f"| **Email (Klaviyo)** | {em_total['campaigns']} campaigns sent · {em_total['sent']:,} recipients · open rate {100*em_total['open_rate']:.1f}% · click rate {100*em_total['click_rate']:.2f}% · attributed revenue **£{em_total['revenue']:,.0f}** |")
md.append(f"| **SMS (Klaviyo)** | {sm_total['campaigns']} campaigns sent · {sm_total['sent']:,} sends · est. cost ~£{sms_cost_est:,.0f} (@£{SMS_UK_COST:.3f}/SMS) · attributed revenue **£{sm_total['revenue']:,.0f}** |")
md.append(f"| **Email + SMS combined** | £{em_total['revenue']+sm_total['revenue']:,.0f} attributed revenue ({100*(em_total['revenue']+sm_total['revenue'])/total_revenue:.1f}% of all sales) |")
md.append("")

# ---- 1. Sales summary ----
md.append("## 1. Sales summary (WooCommerce)")
md.append("")
md.append(f"- Completed orders: **{total_orders:,}**")
md.append(f"- Revenue: **£{total_revenue:,.2f}**")
md.append(f"- AOV: £{total_revenue/total_orders:.2f}")
md.append(f"- Unique customers: {len(unique_emails):,}")
md.append(f"- Brand-new (first-ever) buyers in window: **{total_new:,}** (deduped)")
md.append(f"- Repeat-customer share: {100*(total_orders-new_orders)/total_orders:.1f}%")
md.append("")

# Channel reuse from existing audit_report
md.append("Where the sales come from (refreshed view, traffic-source classification):")
md.append("")
ch_orders = Counter(); ch_rev = defaultdict(float); ch_unique = defaultdict(set)
def classify(s, st, ref):
    s=(s or '').lower(); st=(st or '').lower(); ref=(ref or '').lower()
    if not s or s in ('(none)','(direct)','direct','typein'): return 'Direct / Typein'
    if 'klaviyo' in s or 'email' in s or 'klaviyo' in ref: return 'Email (Klaviyo)'
    if 'sms' in s or 'voodoo' in s or 'sms' in ref: return 'SMS'
    if 'facebook' in s or 'fb' == s or 'facebook' in ref: return 'Facebook'
    if 'instagram' in s or 'ig'==s: return 'Instagram'
    if 'tiktok' in s or 'tiktok' in ref: return 'TikTok'
    if 'youtube' in s: return 'YouTube'
    if 'google' in s or 'google' in ref:
        if 'ad' in s or 'cpc' in s or 'gclid' in ref: return 'Google Ads'
        return 'Google Organic'
    if 'bing' in s: return 'Bing'
    if 'snapchat' in s: return 'Snapchat'
    if st=='referral': return f"Referral: {ref[:40]}"
    if st=='organic': return 'Organic Search'
    return s.title()[:40] or 'Unknown'
for r in orders:
    ch = classify(r.get("traffic_source"), r.get("source_type"), r.get("referrer"))
    rev = float(r.get("total") or 0)
    ch_orders[ch] += 1; ch_rev[ch] += rev
    if r["email"]: ch_unique[ch].add(r["email"].lower())

md.append("| Channel | Orders | % | Revenue | % | Unique buyers | AOV |")
md.append("|---|--:|--:|--:|--:|--:|--:|")
for ch, rev in sorted(ch_rev.items(), key=lambda x:-x[1])[:15]:
    n=ch_orders[ch]; u=len(ch_unique[ch])
    md.append(f"| {ch} | {n:,} | {100*n/total_orders:.1f}% | £{rev:,.0f} | {100*rev/total_revenue:.1f}% | {u:,} | £{rev/n:.2f} |")
md.append("")

# ---- 2. Competitions ----
md.append("## 2. Best-selling competitions (top 20 by revenue)")
md.append("")
md.append("| # | Competition | Draw date | Revenue | £/day on sale | Orders | Unique buyers |")
md.append("|---|---|---|--:|--:|--:|--:|")
top_comps = sorted([c for c in competitions if c["name"]], key=lambda x: -float(x["revenue"]))[:20]
for i, c in enumerate(top_comps, 1):
    name = c["name"][:60]+("…" if len(c["name"])>60 else "")
    md.append(f"| {i} | {name} | {c['draw_date'] or '—'} | £{float(c['revenue']):,.0f} | £{float(c['rev_per_day']):,.0f} | {int(c['orders']):,} | {int(c['unique_buyers']):,} |")
md.append("")

# Competitions to drop
md.append("### Competitions to DROP — ran ≥7 days but earned <£500 (drag on revenue)")
md.append("")
md.append("| Competition | Draw date | Revenue | Days on sale | £/day |")
md.append("|---|---|--:|--:|--:|")
flops = [c for c in competitions if int(c['days_on_sale'])>=7 and float(c['revenue'])<500]
flops.sort(key=lambda x: float(x['rev_per_day']))
for c in flops[:25]:
    md.append(f"| {c['name'][:60]} | {c['draw_date'] or '—'} | £{float(c['revenue']):.0f} | {c['days_on_sale']} | £{float(c['rev_per_day']):.0f} |")
md.append("")

# Best draw-end day
md.append("### Best draw day (when to schedule the live draw)")
md.append("")
drop_day_rev = defaultdict(float); drop_day_n = Counter()
for c in competitions:
    dd = c.get("draw_date") or ""
    if not dd: continue
    try:
        dt = datetime.fromisoformat(dd)
        d = dt.strftime("%A")
        drop_day_rev[d] += float(c['revenue']); drop_day_n[d] += 1
    except: pass
md.append("| Day | # comps | Total revenue | Avg / comp |")
md.append("|---|--:|--:|--:|")
for d in ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]:
    n=drop_day_n[d]; r=drop_day_rev[d]
    md.append(f"| {d} | {n} | £{r:,.0f} | £{r/n if n else 0:,.0f} |")
md.append("")

# ---- 3. New customers per day ----
md.append("## 3. New unique customers per day (deduped)")
md.append("")
md.append(f"Total brand-new buyers: **{total_new:,}** in 90 days = **{total_new/90:.0f} per day** average")
md.append("")
md.append("| Date | New first-ever buyers |")
md.append("|---|--:|")
for r in new_signups:
    md.append(f"| {r['date']} | {int(r['new_customers_first_ever_purchase']):,} |")
md.append("")

# ---- 4. Email campaigns ----
md.append("## 4. Email campaigns (Klaviyo)")
md.append("")
md.append(f"- Campaigns sent: **{em_total['campaigns']}**")
md.append(f"- Total recipients: {em_total['sent']:,}")
md.append(f"- Total delivered: {em_total['delivered']:,}")
md.append(f"- Open rate: **{100*em_total['open_rate']:.1f}%** (industry benchmark ecom UK: 35–45%)")
md.append(f"- Click rate: **{100*em_total['click_rate']:.2f}%** (benchmark: 2–4%)")
md.append(f"- Click-to-open rate: {100*em_total['ctor']:.2f}%")
md.append(f"- Attributed revenue: **£{em_total['revenue']:,.0f}**")
md.append(f"- Revenue per send: £{em_total['rev_per_send']:.4f}")
md.append(f"- Revenue per campaign: £{em_total['rev_per_campaign']:,.0f}")
md.append(f"- Unsubscribes: {em_total['unsubs']:,} ({100*em_total['unsubs']/em_total['delivered']:.3f}% — keep <0.2%)")
md.append("")

md.append("### Top 20 email campaigns by attributed revenue")
md.append("")
md.append("| Campaign | Sent at | DOW | Hour | Recipients | Open % | Click % | Conversions | Revenue | Targeted lists/segments |")
md.append("|---|---|---|--:|--:|--:|--:|--:|--:|---|")
top_em = sorted([r for r in em_rows if r["recipients"]>0], key=lambda x:-x["conversion_value"])[:20]
for r in top_em:
    aud = ", ".join(resolve_aud(r["audiences_included"][:3])) + (f" +{len(r['audiences_included'])-3} more" if len(r['audiences_included'])>3 else "")
    md.append(f"| {r['name'][:55]} | {r['send_time'][:16]} | {r['send_dow']} | {r['send_hour']:02d} | {r['recipients']:,} | {100*r['open_rate']:.0f}% | {100*r['click_rate']:.2f}% | {r['conversions']} | £{r['conversion_value']:,.0f} | {aud[:80]} |")
md.append("")

md.append("### Bottom 10 email campaigns (lowest revenue per send) — review messaging/audience")
md.append("")
md.append("| Campaign | Sent at | Recipients | Open % | Revenue | £/send | Targeted lists/segments |")
md.append("|---|---|--:|--:|--:|--:|---|")
worst_em = sorted([r for r in em_rows if r["recipients"]>500], key=lambda x: x["conversion_value"]/max(1,x["recipients"]))[:10]
for r in worst_em:
    aud = ", ".join(resolve_aud(r["audiences_included"][:2]))
    md.append(f"| {r['name'][:55]} | {r['send_time'][:16]} | {r['recipients']:,} | {100*r['open_rate']:.0f}% | £{r['conversion_value']:,.0f} | £{r['conversion_value']/max(1,r['recipients']):.4f} | {aud[:60]} |")
md.append("")

# Email best send time (correlate hour with revenue per recipient)
md.append("### Best email send-hour (UTC) — revenue per recipient by send hour")
md.append("")
md.append("| Hour (UTC) | Campaigns | Recipients sent | Revenue | £ per recipient |")
md.append("|---|--:|--:|--:|--:|")
for h in range(24):
    sent = em_hour_sent[h]; rev = em_hour_rev[h]
    n = sum(1 for r in em_rows if r["send_hour"]==h and r["recipients"]>0)
    md.append(f"| {h:02d}:00 | {n} | {sent:,} | £{rev:,.0f} | £{rev/sent if sent else 0:.4f} |")
md.append("")

md.append("### Best email send-day — revenue per recipient by day-of-week")
md.append("")
md.append("| Day | Recipients sent | Revenue | £ per recipient |")
md.append("|---|--:|--:|--:|")
for d in ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]:
    sent = em_dow_sent[d]; rev = em_dow_rev[d]
    md.append(f"| {d} | {sent:,} | £{rev:,.0f} | £{rev/sent if sent else 0:.4f} |")
md.append("")

# ---- 5. SMS ----
md.append("## 5. SMS campaigns (Klaviyo)")
md.append("")
md.append(f"- Campaigns sent: **{sm_total['campaigns']}**")
md.append(f"- Total sends: **{sm_total['sent']:,}**")
md.append(f"- Click rate: {100*sm_total['click_rate']:.2f}% (benchmark: 6–15%)")
md.append(f"- Conversions: {sm_total['conversions']:,}")
md.append(f"- Attributed revenue: **£{sm_total['revenue']:,.0f}**")
md.append(f"- Revenue per send: £{sm_total['rev_per_send']:.4f}")
md.append(f"- Revenue per campaign: £{sm_total['rev_per_campaign']:,.0f}")
md.append(f"- Estimated SMS cost @ £{SMS_UK_COST:.3f}/SMS (UK Klaviyo standard): **£{sms_cost_est:,.0f}**")
md.append(f"- ROAS on SMS = **{sm_total['revenue']/max(1,sms_cost_est):.2f}x**")
md.append(f"- Unsubscribes: {sm_total['unsubs']:,}")
md.append("")

md.append("### Top 15 SMS campaigns by attributed revenue")
md.append("")
md.append("| Campaign | Sent at | DOW | Hour | Sends | Click % | Conversions | Revenue | £/send | Audience |")
md.append("|---|---|---|--:|--:|--:|--:|--:|--:|---|")
top_sm = sorted([r for r in sm_rows if r["recipients"]>0], key=lambda x:-x["conversion_value"])[:15]
for r in top_sm:
    aud = ", ".join(resolve_aud(r["audiences_included"][:3]))
    md.append(f"| {r['name'][:55]} | {r['send_time'][:16]} | {r['send_dow']} | {r['send_hour']:02d} | {r['recipients']:,} | {100*r['click_rate']:.2f}% | {r['conversions']} | £{r['conversion_value']:,.0f} | £{r['conversion_value']/max(1,r['recipients']):.4f} | {aud[:60]} |")
md.append("")

md.append("### Bottom 10 SMS campaigns (under-performers)")
md.append("")
md.append("| Campaign | Sent at | Sends | Revenue | £/send | Audience |")
md.append("|---|---|--:|--:|--:|---|")
worst_sm = sorted([r for r in sm_rows if r["recipients"]>500], key=lambda x: x["conversion_value"]/max(1,x["recipients"]))[:10]
for r in worst_sm:
    aud = ", ".join(resolve_aud(r["audiences_included"][:2]))
    md.append(f"| {r['name'][:55]} | {r['send_time'][:16]} | {r['recipients']:,} | £{r['conversion_value']:,.0f} | £{r['conversion_value']/max(1,r['recipients']):.4f} | {aud[:60]} |")
md.append("")

md.append("### SMS — best send hour & day")
md.append("")
md.append("| Hour (UTC) | Sends | Revenue | £/send |")
md.append("|---|--:|--:|--:|")
for h in range(24):
    s=sm_hour_sent[h]; r=sm_hour_rev[h]
    if s==0 and r==0: continue
    md.append(f"| {h:02d}:00 | {s:,} | £{r:,.0f} | £{r/s if s else 0:.4f} |")
md.append("")
md.append("| Day | Sends | Revenue | £/send |")
md.append("|---|--:|--:|--:|")
for d in ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]:
    s=sm_dow_sent[d]; r=sm_dow_rev[d]
    md.append(f"| {d} | {s:,} | £{r:,.0f} | £{r/s if s else 0:.4f} |")
md.append("")

# ---- 6. Audience / lists ----
md.append("## 6. Audience size & targeting")
md.append("")
md.append("### Master Klaviyo lists")
md.append("")
md.append("| List | Profiles |")
md.append("|---|--:|")
for L in list_sizes:
    md.append(f"| {L['name']} | {L['profile_count']:,} |" if L.get('profile_count') else f"| {L['name']} | — |")
md.append("")

# Top targeted segments by usage
seg_usage = Counter()
for r in em_rows + sm_rows:
    for sid in r["audiences_included"]:
        if sid in seg_name:
            seg_usage[seg_name[sid]] += 1
md.append("### Top 20 segments used as campaign targeting")
md.append("")
md.append("| Segment | Times targeted (90d) |")
md.append("|---|--:|")
for nm, n in seg_usage.most_common(20):
    md.append(f"| {nm} | {n} |")
md.append("")

# Excluded segments
ex_usage = Counter()
for r in em_rows + sm_rows:
    for sid in r["audiences_excluded"]:
        if sid in seg_name:
            ex_usage[seg_name[sid]] += 1
md.append("### Top 10 segments most often EXCLUDED (suppressions)")
md.append("")
md.append("| Segment | Times excluded |")
md.append("|---|--:|")
for nm, n in ex_usage.most_common(10):
    md.append(f"| {nm} | {n} |")
md.append("")

# ---- 7. Flows ----
md.append("## 7. Flows (automation)")
md.append("")
md.append(f"Total flows in account: **{len(flows)}**")
md.append("")
md.append("| Flow | Status | Trigger type | Created |")
md.append("|---|---|---|---|")
for fl in flows[:30]:
    a = fl.get("attributes",{}) or {}
    md.append(f"| {a.get('name','')[:60]} | {a.get('status','')} | {a.get('trigger_type','')} | {(a.get('created') or '')[:10]} |")
md.append("")

# ---- 8. Cross-reference: do email/SMS sends correlate with sales spikes? ----
md.append("## 8. Cross-reference — email/SMS sends vs daily sales")
md.append("")
md.append("Sales by day on send-days (any email or SMS) vs no-send days:")
md.append("")
send_dates = set()
for r in em_rows + sm_rows:
    if r["send_date"] and r["recipients"]>0:
        send_dates.add(r["send_date"])
day_rev = defaultdict(float); day_n = Counter()
for o in orders:
    d = (o.get("date_created") or "")[:10]
    if not d: continue
    day_rev[d] += float(o.get("total") or 0)
    day_n[d] += 1

send_revs = [day_rev[d] for d in day_rev if d in send_dates]
nosend_revs = [day_rev[d] for d in day_rev if d not in send_dates]
def avg(L): return sum(L)/len(L) if L else 0
md.append(f"- Days with at least one email/SMS sent: **{len(send_revs)}** (avg revenue £{avg(send_revs):,.0f}/day)")
md.append(f"- Days with NO send: {len(nosend_revs)} (avg revenue £{avg(nosend_revs):,.0f}/day)")
if avg(nosend_revs)>0:
    md.append(f"- Lift from sending: **{100*(avg(send_revs)/avg(nosend_revs)-1):+.1f}%**")
md.append("")

# ---- 9. Action plan ----
md.append("## 9. Action plan — what to do this week")
md.append("")
# Best email hour
em_hr_rev = sorted([(h, em_hour_rev[h]/em_hour_sent[h] if em_hour_sent[h] else 0) for h in range(24)], key=lambda x:-x[1])
top_hours = [h for h,_ in em_hr_rev[:3] if em_hour_sent[h]>0]
em_dow_score = sorted([(d, em_dow_rev[d]/em_dow_sent[d] if em_dow_sent[d] else 0) for d in ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]], key=lambda x:-x[1])
top_em_days = [d for d,_ in em_dow_score[:3] if em_dow_sent[d]>0]

md.append(f"1. **Best email send window:** {', '.join(f'{h:02d}:00 UTC' for h in top_hours)} on {', '.join(top_em_days[:2])}. Schedule all flagship campaigns here.")
md.append(f"2. **Drop {len(flops)} under-performing competitions** (formats listed in §2). Each draw slot is worth ~£{total_revenue/(90*4):,.0f}/day on average — small-prize drags are costing real revenue.")
md.append(f"3. **SMS ROAS is {sm_total['revenue']/max(1,sms_cost_est):.1f}x** at est. £{sms_cost_est:,.0f} cost — keep volume; the channel is profitable. But the bottom 10 SMS campaigns generated £{sum(r['conversion_value'] for r in worst_sm):,.0f} from {sum(r['recipients'] for r in worst_sm):,} sends — review messaging/timing for those.")
md.append(f"4. **Klaviyo Newsletter list = {next((L['profile_count'] for L in list_sizes if L['name']=='Newsletter'),0):,} subscribers** but only {len(unique_emails):,} unique buyers in 90d → only {100*len(unique_emails)/max(1,next((L['profile_count'] for L in list_sizes if L['name']=='Newsletter'),1)):.1f}% of list converted. Add a 4-step welcome flow for first-time subscribers if not already running.")
md.append(f"5. **Direct/Typein traffic is 57% of revenue** — that's mostly untagged paid traffic. Add UTMs to all Facebook & Google ad URLs (this is your single biggest attribution gap).")
md.append(f"6. **Top revenue hour (orders): 20:00 UK** = 13% of all daily sales. Schedule send-time-optimised emails and biggest SMS to land 18:00–20:00.")
md.append(f"7. **6th of the month is your peak day** (£153K), then 3rd, 22nd, 27th — schedule the highest-value draws to end on those dates.")
md.append("")

md_text = "\n".join(md)
with open(OUT_MD, "w") as f:
    f.write(md_text)
print(f"Wrote {OUT_MD}")

# HTML
try:
    import markdown
    html_body = markdown.markdown(md_text, extensions=["tables","fenced_code"])
except Exception:
    html_body = "<pre>" + md_text.replace("<","&lt;") + "</pre>"
html = f"""<!doctype html><html><head><meta charset=utf-8>
<title>Nitrous Master Audit</title>
<style>
body{{font-family:-apple-system,Segoe UI,Roboto,sans-serif;max-width:1300px;margin:30px auto;padding:0 24px;color:#0f172a;line-height:1.55}}
h1{{color:#0b1220;border-bottom:3px solid #2563eb;padding-bottom:10px}}
h2{{color:#1e3a8a;margin-top:34px}}
h3{{color:#1e40af;margin-top:22px}}
table{{border-collapse:collapse;width:100%;margin:14px 0;font-size:13px}}
th,td{{padding:7px 9px;border:1px solid #e5e7eb;text-align:left;vertical-align:top}}
th{{background:#eff6ff;color:#1e3a8a;font-weight:600}}
tr:nth-child(even){{background:#fafbff}}
code{{background:#eef2ff;padding:1px 5px;border-radius:3px;font-size:0.9em}}
strong{{color:#0b1220}}
</style></head><body>{html_body}</body></html>"""
with open(OUT_HTML,"w") as f: f.write(html)
print(f"Wrote {OUT_HTML}")
