#!/usr/bin/env python3
import asyncio
import requests
from datetime import datetime
from lxml import etree
from playwright.async_api import async_playwright

# Hardcoded API
QUARTERLY_API = (
    "https://www.nseindia.com/api/integrated-filing-results?"
    "index=equities&period_ended=31-Mar-2025&type=Integrated%20Filing-%20Financials&size=5000"
)

def safe_print(*args, **kwargs):
    print(*args, **kwargs)

def get_first(result):
    return result[0].text.strip() if result and result[0].text else None

async def fetch_api_with_cookies(context, user_agent):
    cookies = await context.cookies()
    cookie_dict = {c["name"]: c["value"] for c in cookies}
    session = requests.Session()
    headers = {
        "User-Agent": user_agent,
        "Referer": "https://www.nseindia.com/",
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "en-US,en;q=0.9",
        "X-Requested-With": "XMLHttpRequest",
    }
    try:
        resp = session.get(QUARTERLY_API, headers=headers, cookies=cookie_dict, timeout=30)
        try:
            j = resp.json()
        except ValueError:
            j = None
        return j, resp
    except Exception as e:
        return None, e

async def main():
    safe_print("📦 Starting diagnostic: checking which XBRLs are half-yearly instead of quarterly")
    safe_print(f"📡 Using API: {QUARTERLY_API}\n")

    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=True)
        context = await browser.new_context(user_agent="Mozilla/5.0 (X11; Linux x86_64)")
        page = await context.new_page()

        # Prepare cookies for API call
        safe_print("🌐 Visiting NSE homepage to solve Akamai...")
        try:
            await page.goto("https://www.nseindia.com", timeout=60000)
            await page.wait_for_timeout(4000)
        except Exception as e:
            safe_print(f"   ⚠️ Warning: homepage visit failed: {e}")

        safe_print("📦 Opening Integrated Filing page...")
        try:
            await page.goto("https://www.nseindia.com/companies-listing/corporate-integrated-filing", timeout=60000)
            await page.wait_for_timeout(8000)
        except Exception as e:
            safe_print(f"   ⚠️ Warning: integrated-filing page failed: {e}")

        # Optional warm-up
        try:
            await page.goto("https://www.nseindia.com/api/allindices", timeout=60000)
            await page.wait_for_timeout(1500)
        except Exception:
            pass

        try:
            user_agent = await page.evaluate("() => navigator.userAgent")
        except Exception:
            user_agent = "Mozilla/5.0 (X11; Linux x86_64)"

        safe_print("🔍 Fetching API using browser cookies...")
        json_data, resp_or_err = await fetch_api_with_cookies(context, user_agent)

        if isinstance(resp_or_err, Exception):
            safe_print(f"❌ HTTP request failed: {resp_or_err}")
            await browser.close()
            return
        if resp_or_err is None:
            safe_print("❌ No response object returned.")
            await browser.close()
            return

        if json_data is None:
            safe_print(f"⚠️ No JSON. HTTP status: {resp_or_err.status_code}")
            safe_print(resp_or_err.text[:800].replace("\n", " "))
            await browser.close()
            return

        records = json_data.get("data", []) if isinstance(json_data, dict) else []
        safe_print(f"✅ API returned {len(records)} records.\n")

        ns = {"xbrli": "http://www.xbrl.org/2003/instance"}
        total = 0
        halfyearly = []
        quarterly = []
        invalid = []

        for idx, item in enumerate(records, 1):
            symbol = item.get("symbol") or item.get("isin") or f"rec{idx}"
            xbrl_field = item.get("xbrl", "")
            if not xbrl_field:
                continue

            xbrl_url = xbrl_field.split("<br>")[0].strip()
            if not xbrl_url:
                continue

            total += 1
            safe_print(f"[{idx}/{len(records)}] 🔎 {symbol} — {xbrl_url}")

            try:
                resp = await page.goto(xbrl_url, timeout=90000)
                if not resp or not resp.ok:
                    status = resp.status if resp else "no response"
                    safe_print(f"   ❌ Failed to load XBRL ({status})")
                    invalid.append((symbol, xbrl_url, f"HTTP {status}"))
                    continue

                xml_text = await resp.text()
                if not xml_text.strip():
                    safe_print("   ⚠️ Empty XML content")
                    invalid.append((symbol, xbrl_url, "empty xml"))
                    continue

                try:
                    root = etree.fromstring(xml_text.encode("utf-8"))
                except Exception as e:
                    safe_print(f"   ❌ XML parse error: {e}")
                    invalid.append((symbol, xbrl_url, "xml parse error"))
                    continue

                start_nodes = root.xpath("//xbrli:context[@id='OneD']/xbrli:period/xbrli:startDate/text()", namespaces=ns)
                end_nodes   = root.xpath("//xbrli:context[@id='OneD']/xbrli:period/xbrli:endDate/text()", namespaces=ns)

                if not start_nodes or not end_nodes:
                    safe_print("   ⚠️ No OneD context found — invalid or missing XBRL")
                    invalid.append((symbol, xbrl_url, "no OneD"))
                    continue

                s = start_nodes[0].strip()
                e = end_nodes[0].strip()

                try:
                    d1 = datetime.fromisoformat(s)
                    d2 = datetime.fromisoformat(e)
                    diff = (d2 - d1).days
                except Exception:
                    try:
                        d1 = datetime.strptime(s, "%Y-%m-%d")
                        d2 = datetime.strptime(e, "%Y-%m-%d")
                        diff = (d2 - d1).days
                    except Exception:
                        safe_print(f"   ⚠️ Date parse error: '{s}' -> '{e}'")
                        invalid.append((symbol, xbrl_url, f"date parse '{s}' '{e}'"))
                        continue

                # ✅ Fixed logic
                if 80 <= diff <= 100:
                    safe_print(f"   ✅ Quarterly ({s} → {e}, {diff} days)")
                    quarterly.append((symbol, s, e, diff))
                elif 170 <= diff <= 190:
                    safe_print(f"   ⚠️ Half-yearly ({s} → {e}, {diff} days)")
                    halfyearly.append((symbol, s, e, diff))
                else:
                    safe_print(f"   ⚠️ Invalid / unusual period ({s} → {e}, {diff} days)")
                    invalid.append((symbol, xbrl_url, f"{diff} days"))

            except Exception as e:
                safe_print(f"   ❌ Unexpected error: {e}")
                invalid.append((symbol, xbrl_url, str(e)))
                continue

        # Summary
        safe_print("\n--- SUMMARY ---")
        safe_print(f"Total XBRLs checked: {total}")
        safe_print(f"✅ Quarterly count: {len(quarterly)}")
        safe_print(f"⚠️ Half-yearly count: {len(halfyearly)}")
        safe_print(f"❌ Invalid / other: {len(invalid)}\n")

        if halfyearly:
            safe_print("Half-yearly entries:")
            for sym, s, e, diff in halfyearly:
                safe_print(f" - {sym}: {s} → {e} ({diff} days)")

        if invalid:
            safe_print("\nInvalid / failed entries:")
            for sym, url, reason in invalid[:50]:
                safe_print(f" - {sym}: {reason}")

        await browser.close()
        safe_print("\n🏁 Diagnostic finished.")

if __name__ == "__main__":
    asyncio.run(main())
