Spaces:
Running
Running
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import time | |
| import xgboost as xgb | |
| from ddgs import DDGS | |
| from textblob import TextBlob | |
| import pathlib | |
| app = FastAPI( | |
| title="FairValue Strategic AI API", | |
| description="Investor-ready Player Valuation Engine" | |
| ) | |
| # Fixed: allow_credentials=True is a CORS spec violation when allow_origins=["*"]. | |
| # Browsers silently reject credentialed requests to wildcard origins. | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # Tighten to Vercel domain after first deploy | |
| allow_credentials=False, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| ACCESS_CODES_PATH = pathlib.Path(__file__).parent / "access_codes.csv" | |
| class CodeRequest(BaseModel): | |
| code: str | |
| async def validate_code(req: CodeRequest): | |
| """ | |
| Validates a secret access code against the local CSV database. | |
| Each code is restricted to a maximum of 15 uses. | |
| """ | |
| # Always allow master bypass code | |
| if req.code == "FairValue-103": | |
| return {"status": "success", "message": "Master bypass active"} | |
| if not os.path.exists(ACCESS_CODES_PATH): | |
| raise HTTPException(status_code=500, detail="Access database unavailable") | |
| try: | |
| df = pd.read_csv(ACCESS_CODES_PATH) | |
| df['code'] = df['code'].astype(str) | |
| if req.code not in df['code'].values: | |
| raise HTTPException(status_code=403, detail="Invalid access code") | |
| row_idx = df.index[df['code'] == req.code].tolist()[0] | |
| current_uses = int(df.at[row_idx, 'uses']) | |
| if current_uses >= 15: | |
| raise HTTPException(status_code=403, detail="Access code expired (max 15 uses reached)") | |
| # Increment and persist | |
| df.at[row_idx, 'uses'] = current_uses + 1 | |
| df.to_csv(ACCESS_CODES_PATH, index=False) | |
| return { | |
| "status": "success", | |
| "uses_remaining": 15 - (current_uses + 1) | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Database error: {str(e)}") | |
| def health_check(): | |
| """Required by Render to confirm the service is alive.""" | |
| return { | |
| "status": "healthy", | |
| "model_loaded": model_global is not None, | |
| "data_loaded": df_global is not None, | |
| } | |
| def get_players(q: str = ""): | |
| """ | |
| Returns unique player names from the training database. | |
| Accepts an optional ?q= filter for autocomplete. | |
| The React frontend uses this to power the player search input. | |
| """ | |
| if df_global is None: | |
| return {"players": []} | |
| name_col = next( | |
| (c for c in ["name", "name_x", "Player_Name", "Name"] if c in df_global.columns), | |
| None, | |
| ) | |
| if not name_col: | |
| return {"players": []} | |
| all_names = df_global[name_col].astype(str).dropna().unique() | |
| if q: | |
| all_names = [n for n in all_names if q.lower() in n.lower()] | |
| return {"players": sorted(all_names)[:100]} | |
| async def scout_player(player: str, club: str = "", interested_club: str = ""): | |
| """ | |
| Standalone NLP-only intelligence endpoint — used by the Live Intel page. | |
| Does NOT run the ML model, just returns 3-axis DDGS sentiment scores. | |
| Shares the same 1-hour TTL cache as the full /api/evaluate endpoint. | |
| """ | |
| if not player.strip(): | |
| raise HTTPException(status_code=422, detail="player query param is required") | |
| nlp = _fetch_nlp_intelligence(player.strip(), club.strip(), interested_club.strip()) | |
| return { | |
| "player": player, | |
| "durability": nlp["durability"], | |
| "recency": nlp["recency"], | |
| "agent": nlp["agent"], | |
| "logs": nlp.get("_logs", []), | |
| "links": nlp.get("_links", []), | |
| "from_cache": nlp.get("_from_cache", False), | |
| "nlp_found": nlp.get("_found_any", False) | |
| } | |
| # ── Currency Config ──────────────────────────────────────────────────────────── | |
| EUR_TO_GBP = 0.85 # Approximate — review quarterly | |
| # ── Path resolution: works locally AND inside the Docker container ───────────── | |
| # Using __file__ means paths are always relative to api/main.py, not cwd. | |
| import pathlib | |
| _ROOT = pathlib.Path(__file__).parent.parent.resolve() | |
| DATA_PATH = str(_ROOT / "data" / "processed" / "app_features.csv") | |
| MODEL_PATH = str(_ROOT / "fairvalue_xgboost.json") | |
| # ── Data / Model Globals ─────────────────────────────────────────────────────── | |
| df_global = None | |
| model_global = None | |
| expected_cols_global = None | |
| def startup_event(): | |
| global df_global, model_global, expected_cols_global | |
| if os.path.exists(DATA_PATH): | |
| df_global = pd.read_csv(DATA_PATH) | |
| mv_rename_map = { | |
| col: 'market_value_in_eur' | |
| for col in df_global.columns | |
| if 'market' in col.lower() and 'value' in col.lower() | |
| } | |
| if mv_rename_map: | |
| df_global.rename(columns=mv_rename_map, inplace=True) | |
| df_global = df_global.loc[:, ~df_global.columns.duplicated()].copy() | |
| if os.path.exists(MODEL_PATH): | |
| model_global = xgb.XGBRegressor() | |
| model_global.load_model(MODEL_PATH) | |
| expected_cols_global = model_global.feature_names_in_ | |
| def _format_feature_label(f: str) -> str: | |
| """Converts raw model feature names to boardroom-ready English.""" | |
| mapping = { | |
| 'Highest_Market_Value_In_Eur': 'Peak Historical Valuation', | |
| 'Highest MarketValue In Eur': 'Peak Historical Valuation', | |
| 'Contract_Years_Left': 'Contractual Duration', | |
| 'Contract YearsLeft': 'Contractual Duration', | |
| 'Injury_Days_Total_24m': 'Physical Availability Risk', | |
| 'Injury Days Total 24M': 'Physical Availability Risk', | |
| 'League_Index': 'League Quality Index', | |
| 'height_in_cm': 'Aerial/Physical Profile', | |
| 'Height In Cm': 'Aerial/Physical Profile', | |
| 'international_caps': 'International Experience', | |
| 'market_value_in_eur': 'Baseline Market Valuation', | |
| } | |
| # Fallback to Title Case with underscores replaced by spaces | |
| return mapping.get(f, f.replace('_', ' ').title()) | |
| # ── NLP Intelligence Cache (TTL = 1 hour) ──────────────────────────────────── | |
| # Fixed: previously ran 3 live DDGS searches on every API call — caused | |
| # rate-limiting errors and high latency. Now cached per player+club for 1 hour. | |
| _nlp_cache: dict = {} | |
| _NLP_CACHE_TTL = 3600 # seconds | |
| def _fetch_nlp_intelligence( | |
| player_name: str, current_club: str, interested_club: str | |
| ) -> dict: | |
| """ | |
| Returns DDGS sentiment scores for durability, recency, and agent axes. | |
| Results are cached per player+club combination for 1 hour to prevent | |
| rate-limiting and reduce API latency. | |
| """ | |
| cache_key = f"v2|{player_name.lower()}|{current_club.lower()}" | |
| cached = _nlp_cache.get(cache_key) | |
| # Logic: If we have a cached result with real data, keep it for 1 hour. | |
| # If the cached result was "Empty" (no news found), allow a retry after 5 mins. | |
| if cached: | |
| age = time.time() - cached.get('_ts', 0) | |
| has_signals = cached.get('_found_any', False) | |
| if age < _NLP_CACHE_TTL: | |
| if has_signals or age < 300: # 300s = 5 mins | |
| return {**cached, '_from_cache': True} | |
| ddgs = DDGS() | |
| axes = { | |
| 'durability': f"{player_name} {current_club} injury status games missed medical", | |
| 'recency': f"{player_name} {current_club} recent form impact stats", | |
| 'agent': f"{player_name} {current_club} transfer rumors {interested_club} fee", | |
| } | |
| scores = {'durability': 0.0, 'recency': 0.0, 'agent': 0.0} | |
| logs = [] | |
| scraped_links = [] | |
| found_any = False | |
| for axis, query in axes.items(): | |
| try: | |
| # Increase results to 10 for better sentiment spread | |
| snippets = list(ddgs.text(query.strip(), max_results=10)) | |
| # Fallback: if no results, try a broader search without the clubs | |
| if not snippets: | |
| fallback_query = f"{player_name} {axis} news" | |
| snippets = list(ddgs.text(fallback_query, max_results=5)) | |
| if snippets: | |
| found_any = True | |
| sentiments = [] | |
| for r in snippets: | |
| title = r.get('title', '') | |
| href = r.get('href', '') | |
| body = r.get('body', '') | |
| sentiments.append(TextBlob(body + ' ' + title).sentiment.polarity) | |
| if href and href not in [lnk['url'] for lnk in scraped_links]: | |
| scraped_links.append({"title": title, "url": href}) | |
| avg_pol = sum(sentiments) / len(sentiments) if sentiments else 0.0 | |
| scores[axis] = float(avg_pol) | |
| logs.append(f"Scraped {axis}: Polarity {avg_pol:.2f} ({len(snippets)} results)") | |
| else: | |
| logs.append(f"No results for {axis} (Primary & Fallback)") | |
| except Exception as e: | |
| logs.append(f"Failed {axis}: {str(e)}") | |
| # Deduplicate and limit to top 10 links | |
| scraped_links = scraped_links[:10] | |
| result = {**scores, '_ts': time.time(), '_logs': logs, '_links': scraped_links, '_from_cache': False, '_found_any': found_any} | |
| _nlp_cache[cache_key] = result | |
| return result | |
| # ── Request Schema ──────────────────────────────────────────────────────────── | |
| class PlayerEvaluateRequest(BaseModel): | |
| selected_name: str | |
| position: str = "Midfielder" | |
| current_club: str = "" | |
| interested_club: str = "" | |
| contract_years: float = 2.0 | |
| age: int = 24 | |
| injuries_24m: int = 10 | |
| asking_price: float = 45.0 | |
| market_value_estimation: float = 20.0 | |
| async def evaluate_player(req: PlayerEvaluateRequest): | |
| if df_global is None or model_global is None: | |
| raise HTTPException( | |
| status_code=500, | |
| detail="Model or data not loaded on startup. Check server logs." | |
| ) | |
| name_col = next( | |
| (c for c in ['name', 'name_x', 'Player_Name', 'Name'] if c in df_global.columns), | |
| None | |
| ) | |
| player_data = df_global.median(numeric_only=True).to_frame().T | |
| if name_col and req.selected_name in df_global[name_col].astype(str).tolist(): | |
| player_data = df_global[ | |
| df_global[name_col].astype(str) == req.selected_name | |
| ].iloc[0:1].copy() | |
| player_data['Contract_Years_Left'] = req.contract_years | |
| player_data['Age'] = req.age | |
| if 'Injury_Days_Total_24m' in player_data.columns: | |
| player_data['Injury_Days_Total_24m'] = req.injuries_24m | |
| if 'market_value_in_eur' in player_data.columns: | |
| player_data['market_value_in_eur'] = ( | |
| req.market_value_estimation * 1_000_000 | |
| ) / EUR_TO_GBP | |
| X_infer = player_data.reindex(columns=expected_cols_global, fill_value=0) | |
| raw_preds = model_global.predict(X_infer) | |
| log_pv = float(raw_preds[0]) | |
| baseline_pv = max(float(np.expm1(log_pv)), 0.0) | |
| baseline_pv_m = baseline_pv / 1_000_000 | |
| conservative_bound_m = baseline_pv_m * 0.85 | |
| # ── Extract SHAP Values for UI Chart ────────────────────────────────────── | |
| dmatrix = xgb.DMatrix(X_infer) | |
| shap_contribs = model_global.get_booster().predict(dmatrix, pred_contribs=True)[0] | |
| feature_shaps = shap_contribs[:-1] # Last element is the SHAP base value | |
| # ── Position-Specific Career Pathing (Dynamic Aging Curves) ─────────────── | |
| pos = req.position.lower() | |
| age_multiplier = 1.0 | |
| if "forward" in pos or "striker" in pos or "winger" in pos or "attacker" in pos: | |
| # Attackers peak early (24-27), decline steeply after 30 | |
| if req.age <= 23: age_multiplier = 1.25 | |
| elif req.age >= 30: age_multiplier = 0.75 | |
| elif "defender" in pos or "goalkeeper" in pos or "gk" in pos or "cb" in pos: | |
| # Defenders/GKs peak late (28-32), sustain longer | |
| if req.age <= 23: age_multiplier = 1.05 | |
| elif req.age >= 32: age_multiplier = 0.85 | |
| else: | |
| # Midfielders peak 25-29 | |
| if req.age <= 23: age_multiplier = 1.15 | |
| elif req.age >= 31: age_multiplier = 0.80 | |
| # Contract Security Premium | |
| contract_multiplier = 1.0 | |
| if req.contract_years >= 4.0: contract_multiplier = 1.20 | |
| elif req.contract_years <= 1.0: contract_multiplier = 0.70 | |
| structural_multiplier = age_multiplier * contract_multiplier | |
| # ── Re-evaluating Intrinsic vs Baseline ────────────────────────────────── | |
| # Apply structural multipliers to the raw ML baseline to correct the "Youth Penalty" bias in the data. | |
| adjusted_baseline_pv_m = baseline_pv_m * structural_multiplier | |
| # Talent is the baseline WITHOUT the age/contract multipliers | |
| talent_pv_m = baseline_pv_m | |
| # Positive = Appreciation (added value). Negative = Depreciation (lost value). | |
| status_impact_m = adjusted_baseline_pv_m - talent_pv_m | |
| # ── MTP Calculation (Replaces Flat Risk & Conservative Bound) ───────────── | |
| # We drop the arbitrary 15% discount and fixed penalties. | |
| # Instead, we define a probabilistic Market Transaction Price (MTP) range. | |
| # ── External NLP Intelligence (1-hour TTL cache) ────────────────────────── | |
| nlp = _fetch_nlp_intelligence(req.selected_name, req.current_club, req.interested_club) | |
| dur = nlp['durability'] | |
| rec = nlp['recency'] | |
| agnt = nlp['agent'] | |
| logs = nlp.get('_logs', []) | |
| links = nlp.get('_links', []) | |
| # Tier-aware hype ceiling prevents NLP from distorting low-value players | |
| if adjusted_baseline_pv_m > 80.0: | |
| rec_ceiling_pct = 0.35 | |
| tier_name = "Generational Superstar (>£80m)" | |
| elif adjusted_baseline_pv_m > 40.0: | |
| rec_ceiling_pct = 0.25 | |
| tier_name = "Elite Tier (>£40m)" | |
| elif adjusted_baseline_pv_m >= 10.0: | |
| rec_ceiling_pct = 0.10 | |
| tier_name = "Core Tier (£10m–£40m)" | |
| else: | |
| rec_ceiling_pct = 0.05 | |
| tier_name = "Base Tier (<£10m)" | |
| dur_adj = min(0.0, dur) * 0.15 # Injury news can only discount | |
| rec_adj = max(0.0, rec) * rec_ceiling_pct # Form can only add premium | |
| agt_adj = min(0.0, agnt) * 0.05 # Agent leverage only discounts | |
| external_multiplier = 1.0 + rec_adj + dur_adj + agt_adj | |
| # ── Scarcity Index & Buyer's Premium ────────────────────────────────────── | |
| # Elite players command a massive scarcity premium. | |
| if adjusted_baseline_pv_m > 80.0: | |
| scarcity_premium = 0.40 # +40% for generational talents | |
| elif adjusted_baseline_pv_m > 40.0: | |
| scarcity_premium = 0.15 # +15% for elite | |
| elif adjusted_baseline_pv_m >= 10.0: | |
| scarcity_premium = 0.05 | |
| else: | |
| scarcity_premium = 0.0 | |
| mtp_base = adjusted_baseline_pv_m * external_multiplier | |
| mtp_lower = mtp_base * 0.90 | |
| mtp_upper = mtp_base * (1.0 + scarcity_premium) | |
| # ── CFO Dashboard (PSR Integration) ─────────────────────────────────────── | |
| # Amortization is capped at 5 years under UEFA/Premier League rules. | |
| # We assume a standard 5-year new contract for the incoming transfer. | |
| amortization_years = min(5.0, 5.0) | |
| annual_amortization_cost = req.asking_price / amortization_years | |
| # ── SHAP Feature Contribution Table ────────────────────────────────────── | |
| shap_data = sorted( | |
| [ | |
| {"feature": _format_feature_label(f), "impact": float(s)} | |
| for f, s in zip(expected_cols_global, feature_shaps) | |
| ], | |
| key=lambda x: abs(x['impact']), | |
| reverse=True, | |
| )[:10] | |
| return { | |
| "ledger": { | |
| "fiv": talent_pv_m, | |
| "category": tier_name, | |
| "depreciation": status_impact_m, | |
| "baseline_value": adjusted_baseline_pv_m, | |
| "external_multiplier": external_multiplier, | |
| "mtp_lower": mtp_lower, | |
| "mtp_upper": mtp_upper, | |
| "scarcity_premium": scarcity_premium, | |
| }, | |
| "cfo_dashboard": { | |
| "asking_price": req.asking_price, | |
| "amortization_years": amortization_years, | |
| "annual_amortization_cost": annual_amortization_cost, | |
| }, | |
| "nlp_results": {"durability": dur, "recency": rec, "agent": agnt}, | |
| "nlp_cached": nlp.get('_from_cache', False), | |
| "nlp_found": nlp.get('_found_any', False), | |
| "logs": logs, | |
| "links": links, | |
| "shap_data": shap_data, | |
| } | |