from customer_retention.analysis.notebook_progress import track_and_export_previous

track_and_export_previous("09_business_alignment.ipynb")

import pandas as pd

from customer_retention.analysis.auto_explorer import ExplorationFindings
from customer_retention.core.config.experiments import (
    FINDINGS_DIR,
)

from customer_retention.analysis.auto_explorer import load_notebook_findings

FINDINGS_PATH, _namespace, _ = load_notebook_findings("09_business_alignment.ipynb")
print(f"Using: {FINDINGS_PATH}")

findings = ExplorationFindings.load(FINDINGS_PATH)
print(f"\nLoaded findings for {findings.column_count} columns")

Using: /Users/Vital/python/CustomerRetention/experiments/runs/email-6301db6c/datasets/customer_emails/findings/customer_emails_aggregated_findings.yaml

Loaded findings for 217 columns

BUSINESS_CONTEXT = {
    "project_name": "Customer Churn Prediction",
    "business_objective": "Reduce customer churn by 20% through proactive retention campaigns",
    "stakeholders": ["Marketing Team", "Customer Success", "Data Science"],
    "timeline": "Q1 2025",
    "budget_constraints": "$50k for retention campaigns per month"
}

print("Business Context:")
for key, value in BUSINESS_CONTEXT.items():
    print(f"  {key}: {value}")

Business Context:
  project_name: Customer Churn Prediction
  business_objective: Reduce customer churn by 20% through proactive retention campaigns
  stakeholders: ['Marketing Team', 'Customer Success', 'Data Science']
  timeline: Q1 2025
  budget_constraints: $50k for retention campaigns per month

SUCCESS_METRICS = [
    {
        "Metric": "Model AUC",
        "Target": ">= 0.80",
        "Priority": "High",
        "Rationale": "Need strong discrimination to prioritize high-risk customers"
    },
    {
        "Metric": "Precision at 20%",
        "Target": ">= 0.60",
        "Priority": "High",
        "Rationale": "Limited budget means we can only target top 20% of predictions"
    },
    {
        "Metric": "Churn Rate Reduction",
        "Target": "20%",
        "Priority": "High",
        "Rationale": "Primary business objective"
    },
    {
        "Metric": "Model Latency",
        "Target": "< 100ms",
        "Priority": "Medium",
        "Rationale": "Required for real-time scoring"
    },
    {
        "Metric": "Fairness (Demographic Parity)",
        "Target": "Ratio >= 0.8",
        "Priority": "Medium",
        "Rationale": "Ensure equitable treatment across segments"
    }
]

metrics_df = pd.DataFrame(SUCCESS_METRICS)
print("Success Metrics:")
display(metrics_df)

Success Metrics:

DEPLOYMENT_REQUIREMENTS = {
    "scoring_mode": "Both batch and real-time",
    "batch_frequency": "Daily",
    "real_time_latency": "< 100ms p99",
    "infrastructure": "Databricks",
    "model_registry": "MLflow",
    "monitoring": "Required - drift detection and performance tracking",
    "retraining": "Monthly or on significant drift"
}

print("Deployment Requirements:")
for key, value in DEPLOYMENT_REQUIREMENTS.items():
    print(f"  {key}: {value}")

Deployment Requirements:
  scoring_mode: Both batch and real-time
  batch_frequency: Daily
  real_time_latency: < 100ms p99
  infrastructure: Databricks
  model_registry: MLflow
  monitoring: Required - drift detection and performance tracking
  retraining: Monthly or on significant drift

DATA_CONSTRAINTS = [
    {
        "Constraint": "PII Handling",
        "Requirement": "No direct PII in features (names, SSN, etc.)",
        "Status": "To verify"
    },
    {
        "Constraint": "Data Freshness",
        "Requirement": "Features must be available within 24 hours",
        "Status": "To verify"
    },
    {
        "Constraint": "Historical Depth",
        "Requirement": "Minimum 12 months of history for training",
        "Status": "To verify"
    },
    {
        "Constraint": "Protected Attributes",
        "Requirement": "Age, gender, race should not be direct features",
        "Status": "To verify"
    }
]

constraints_df = pd.DataFrame(DATA_CONSTRAINTS)
print("Data Constraints:")
display(constraints_df)

Data Constraints:

INTERVENTIONS = [
    {
        "Risk Level": "High (>0.8)",
        "Intervention": "Personal call from account manager",
        "Cost": "$50/customer",
        "Expected Effectiveness": "40% retention"
    },
    {
        "Risk Level": "Medium (0.5-0.8)",
        "Intervention": "Personalized email + discount offer",
        "Cost": "$10/customer",
        "Expected Effectiveness": "20% retention"
    },
    {
        "Risk Level": "Low (<0.5)",
        "Intervention": "Automated engagement email",
        "Cost": "$0.50/customer",
        "Expected Effectiveness": "5% retention"
    }
]

interventions_df = pd.DataFrame(INTERVENTIONS)
print("Intervention Strategy:")
display(interventions_df)

Intervention Strategy:

findings.metadata = findings.metadata or {}
findings.metadata["business_context"] = BUSINESS_CONTEXT
findings.metadata["success_metrics"] = SUCCESS_METRICS
findings.metadata["deployment_requirements"] = DEPLOYMENT_REQUIREMENTS

findings.save(FINDINGS_PATH)
print(f"Business context saved to: {FINDINGS_PATH}")

Business context saved to: /Users/Vital/python/CustomerRetention/experiments/runs/email-6301db6c/datasets/customer_emails/findings/customer_emails_aggregated_findings.yaml

Chapter 9: Business Alignment¶

9.1 Setup¶

9.2 Business Context¶

9.3 Success Metrics¶

9.4 Deployment Requirements¶

9.5 Data Constraints¶

9.6 Intervention Strategy¶

9.7 Save Business Context to Findings¶

Next Steps¶

	Metric	Target	Priority	Rationale
0	Model AUC	>= 0.80	High	Need strong discrimination to prioritize high-...
1	Precision at 20%	>= 0.60	High	Limited budget means we can only target top 20...
2	Churn Rate Reduction	20%	High	Primary business objective
3	Model Latency	< 100ms	Medium	Required for real-time scoring
4	Fairness (Demographic Parity)	Ratio >= 0.8	Medium	Ensure equitable treatment across segments

	Constraint	Requirement	Status
0	PII Handling	No direct PII in features (names, SSN, etc.)	To verify
1	Data Freshness	Features must be available within 24 hours	To verify
2	Historical Depth	Minimum 12 months of history for training	To verify
3	Protected Attributes	Age, gender, race should not be direct features	To verify

	Risk Level	Intervention	Cost	Expected Effectiveness
0	High (>0.8)	Personal call from account manager	$50/customer	40% retention
1	Medium (0.5-0.8)	Personalized email + discount offer	$10/customer	20% retention
2	Low (<0.5)	Automated engagement email	$0.50/customer	5% retention