#!/usr/bin/env python3 import asyncio import requests from datetime import datetime from lxml import etree from playwright.async_api import async_playwright # Hardcoded API QUARTERLY_API = ( "https://www.nseindia.com/api/integrated-filing-results?" "index=equities&period_ended=31-Mar-2025&type=Integrated%20Filing-%20Financials&size=5000" ) def safe_print(*args, **kwargs): print(*args, **kwargs) def get_first(result): return result[0].text.strip() if result and result[0].text else None async def fetch_api_with_cookies(context, user_agent): cookies = await context.cookies() cookie_dict = {c["name"]: c["value"] for c in cookies} session = requests.Session() headers = { "User-Agent": user_agent, "Referer": "https://www.nseindia.com/", "Accept": "application/json, text/plain, */*", "Accept-Language": "en-US,en;q=0.9", "X-Requested-With": "XMLHttpRequest", } try: resp = session.get(QUARTERLY_API, headers=headers, cookies=cookie_dict, timeout=30) try: j = resp.json() except ValueError: j = None return j, resp except Exception as e: return None, e async def main(): safe_print("📦 Starting diagnostic: checking which XBRLs are half-yearly instead of quarterly") safe_print(f"📡 Using API: {QUARTERLY_API}\n") async with async_playwright() as p: browser = await p.firefox.launch(headless=True) context = await browser.new_context(user_agent="Mozilla/5.0 (X11; Linux x86_64)") page = await context.new_page() # Prepare cookies for API call safe_print("🌐 Visiting NSE homepage to solve Akamai...") try: await page.goto("https://www.nseindia.com", timeout=60000) await page.wait_for_timeout(4000) except Exception as e: safe_print(f" ⚠️ Warning: homepage visit failed: {e}") safe_print("📦 Opening Integrated Filing page...") try: await page.goto("https://www.nseindia.com/companies-listing/corporate-integrated-filing", timeout=60000) await page.wait_for_timeout(8000) except Exception as e: safe_print(f" ⚠️ Warning: integrated-filing page failed: {e}") # Optional warm-up try: await page.goto("https://www.nseindia.com/api/allindices", timeout=60000) await page.wait_for_timeout(1500) except Exception: pass try: user_agent = await page.evaluate("() => navigator.userAgent") except Exception: user_agent = "Mozilla/5.0 (X11; Linux x86_64)" safe_print("🔍 Fetching API using browser cookies...") json_data, resp_or_err = await fetch_api_with_cookies(context, user_agent) if isinstance(resp_or_err, Exception): safe_print(f"❌ HTTP request failed: {resp_or_err}") await browser.close() return if resp_or_err is None: safe_print("❌ No response object returned.") await browser.close() return if json_data is None: safe_print(f"⚠️ No JSON. HTTP status: {resp_or_err.status_code}") safe_print(resp_or_err.text[:800].replace("\n", " ")) await browser.close() return records = json_data.get("data", []) if isinstance(json_data, dict) else [] safe_print(f"✅ API returned {len(records)} records.\n") ns = {"xbrli": "http://www.xbrl.org/2003/instance"} total = 0 halfyearly = [] quarterly = [] invalid = [] for idx, item in enumerate(records, 1): symbol = item.get("symbol") or item.get("isin") or f"rec{idx}" xbrl_field = item.get("xbrl", "") if not xbrl_field: continue xbrl_url = xbrl_field.split("
")[0].strip() if not xbrl_url: continue total += 1 safe_print(f"[{idx}/{len(records)}] 🔎 {symbol} — {xbrl_url}") try: resp = await page.goto(xbrl_url, timeout=90000) if not resp or not resp.ok: status = resp.status if resp else "no response" safe_print(f" ❌ Failed to load XBRL ({status})") invalid.append((symbol, xbrl_url, f"HTTP {status}")) continue xml_text = await resp.text() if not xml_text.strip(): safe_print(" ⚠️ Empty XML content") invalid.append((symbol, xbrl_url, "empty xml")) continue try: root = etree.fromstring(xml_text.encode("utf-8")) except Exception as e: safe_print(f" ❌ XML parse error: {e}") invalid.append((symbol, xbrl_url, "xml parse error")) continue start_nodes = root.xpath("//xbrli:context[@id='OneD']/xbrli:period/xbrli:startDate/text()", namespaces=ns) end_nodes = root.xpath("//xbrli:context[@id='OneD']/xbrli:period/xbrli:endDate/text()", namespaces=ns) if not start_nodes or not end_nodes: safe_print(" ⚠️ No OneD context found — invalid or missing XBRL") invalid.append((symbol, xbrl_url, "no OneD")) continue s = start_nodes[0].strip() e = end_nodes[0].strip() try: d1 = datetime.fromisoformat(s) d2 = datetime.fromisoformat(e) diff = (d2 - d1).days except Exception: try: d1 = datetime.strptime(s, "%Y-%m-%d") d2 = datetime.strptime(e, "%Y-%m-%d") diff = (d2 - d1).days except Exception: safe_print(f" ⚠️ Date parse error: '{s}' -> '{e}'") invalid.append((symbol, xbrl_url, f"date parse '{s}' '{e}'")) continue # ✅ Fixed logic if 80 <= diff <= 100: safe_print(f" ✅ Quarterly ({s} → {e}, {diff} days)") quarterly.append((symbol, s, e, diff)) elif 170 <= diff <= 190: safe_print(f" ⚠️ Half-yearly ({s} → {e}, {diff} days)") halfyearly.append((symbol, s, e, diff)) else: safe_print(f" ⚠️ Invalid / unusual period ({s} → {e}, {diff} days)") invalid.append((symbol, xbrl_url, f"{diff} days")) except Exception as e: safe_print(f" ❌ Unexpected error: {e}") invalid.append((symbol, xbrl_url, str(e))) continue # Summary safe_print("\n--- SUMMARY ---") safe_print(f"Total XBRLs checked: {total}") safe_print(f"✅ Quarterly count: {len(quarterly)}") safe_print(f"⚠️ Half-yearly count: {len(halfyearly)}") safe_print(f"❌ Invalid / other: {len(invalid)}\n") if halfyearly: safe_print("Half-yearly entries:") for sym, s, e, diff in halfyearly: safe_print(f" - {sym}: {s} → {e} ({diff} days)") if invalid: safe_print("\nInvalid / failed entries:") for sym, url, reason in invalid[:50]: safe_print(f" - {sym}: {reason}") await browser.close() safe_print("\n🏁 Diagnostic finished.") if __name__ == "__main__": asyncio.run(main())