import asyncio
import json
from lxml import etree
from playwright.async_api import async_playwright

XML_URL = "https://nsearchives.nseindia.com/corporate/xbrl/INTEGRATED_FILING_BANKING_1420280_19042025082141_WEB.xml"

def get_first(result):
    return result[0].text.strip() if result and result[0].text else ""

def to_million(val):
    try:
        return str(round(float(val.replace(",", "")) / 1_000_000, 2))
    except:
        return ""

def extract_banking_data(xml_bytes):
    root = etree.fromstring(xml_bytes)
    ns = {'in-capmkt': "http://www.sebi.gov.in/xbrl/2025-01-31/in-capmkt"}
    xp = lambda path: root.xpath(path, namespaces=ns)
    def gstr(path): return get_first(xp(path))
    def gnum(path): return to_million(get_first(xp(path)))

    # All keys to extract with corresponding XPath and type
    fields = {
        "bsecode": ("//in-capmkt:ScripCode[@contextRef='OneD']", "str"),
        "isin_number": ("//in-capmkt:ISIN[@contextRef='OneD']", "str"),
        "company_name": ("//in-capmkt:NameOfBank[@contextRef='OneD']", "str"),
        "Interest Earned/Net Income from sales/services": ("//in-capmkt:InterestEarned[@contextRef='OneD']", "num"),
        "Interest / discount on Advances / bills": ("//in-capmkt:InterestOrDiscountOnAdvancesOrBills[@contextRef='OneD']", "num"),
        "Income on Investments": ("//in-capmkt:RevenueOnInvestments[@contextRef='OneD']", "num"),
        "Interest on Balances with RBI & other inter bank funds": ("//in-capmkt:InterestOnBalancesWithReserveBankOfIndiaAndOtherInterBankFunds[@contextRef='OneD']", "num"),
        "Others": ("//in-capmkt:OtherInterest[@contextRef='OneD']", "num"),
        "Other Income": ("//in-capmkt:OtherIncome[@contextRef='OneD']", "num"),
        "Total Income": ("//in-capmkt:Income[@contextRef='OneD']", "num"),
        "Interest Expended": ("//in-capmkt:InterestExpended[@contextRef='OneD']", "num"),
        "Operating Expenses": ("//in-capmkt:OperatingExpenses[@contextRef='OneD']", "num"),
        "Employee Cost": ("//in-capmkt:EmployeesCost[@contextRef='OneD']", "num"),
        "Total Expenditure for Banks": ("//in-capmkt:ExpenditureExcludingProvisionsAndContingencies[@contextRef='OneD']", "num"),
        "Operating Profit Before Provisions and Contingencies": ("//in-capmkt:OperatingProfitBeforeProvisionAndContingencies[@contextRef='OneD']", "num"),
        "Provisions (other than tax) and Contingencies": ("//in-capmkt:ProvisionsOtherThanTaxAndContingencies[@contextRef='OneD']", "num"),
        "Exceptional Item": ("//in-capmkt:ExceptionalItems[@contextRef='OneD']", "num"),
        "Profit (+)/ Loss (-) from Ordinary Activities before Tax": ("//in-capmkt:ProfitLossFromOrdinaryActivitiesBeforeTax[@contextRef='OneD']", "num"),
        "Tax": ("//in-capmkt:TaxExpense[@contextRef='OneD']", "num"),
        "Net Profit (+)/ Loss (-) from Ordinary Activities after Tax": ("//in-capmkt:ProfitLossFromOrdinaryActivitiesAfterTax[@contextRef='OneD']", "num"),
        "Extraordinary Items": ("//in-capmkt:ExtraordinaryItems[@contextRef='OneD']", "num"),
        "Net Profit": ("//in-capmkt:ProfitLossForThePeriod[@contextRef='OneD']", "num"),
        "Minority Interest": ("//in-capmkt:ProfitLossOfMinorityInterest[@contextRef='OneD']", "num"),
        "Share of Profit & Loss of Asso": ("//in-capmkt:ShareOfProfitLossOfAssociates[@contextRef='OneD']", "num"),
        "Net Profit after Mino Inter & Share of P & L": ("//in-capmkt:ProfitLossAfterTaxesMinorityInterestAndShareOfProfitLossOfAssociates[@contextRef='OneD']", "num"),
        "Income Attributable to Consolidated Group": ("//in-capmkt:ProfitLossAfterTaxesMinorityInterestAndShareOfProfitLossOfAssociates[@contextRef='OneD']", "num"),
        "Equity Capital": ("//in-capmkt:PaidUpValueOfEquityShareCapital[@contextRef='OneD']", "num"),
        "Capital Adequacy Ratio": ("//in-capmkt:CET1Ratio[@contextRef='OneD']", "num"),
        "Face Value (in Rs)": ("//in-capmkt:FaceValueOfEquityShareCapital[@contextRef='OneD']", "str"),
        "Basic EPS before Extraordinary items": ("//in-capmkt:BasicEarningsPerShareBeforeExtraordinaryItems[@contextRef='OneD']", "str"),
        "Diluted EPS before Extraordinary items": ("//in-capmkt:DilutedEarningsPerShareBeforeExtraordinaryItems[@contextRef='OneD']", "str"),
        "Basic EPS after Extraordinary items": ("//in-capmkt:BasicEarningsPerShareAfterExtraordinaryItems[@contextRef='OneD']", "str"),
        "Diluted EPS after Extraordinary items": ("//in-capmkt:DilutedEarningsPerShareAfterExtraordinaryItems[@contextRef='OneD']", "str"),
        "Gross/Net NPA": ("//in-capmkt:GrossNonPerformingAssets[@contextRef='OneD']", "num"),
        "Percentage of Gross/Net NPA": ("//in-capmkt:PercentageOfGrossNpa[@contextRef='OneD']", "num"),
        "Data Type": ("//in-capmkt:NatureOfReportStandaloneConsolidated[@contextRef='OneD']", "str"),
    }

    data = {}
    missing_keys = []

    for key, (xpath, xtype) in fields.items():
        val = get_first(xp(xpath))
        if val:
            data[key] = to_million(val) if xtype == "num" else val
        else:
            missing_keys.append(key)

    return data, missing_keys

async def main():
    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=True)
        context = await browser.new_context(user_agent="Mozilla/5.0")
        page = await context.new_page()

        print("🌐 Visiting NSE homepage to bypass Akamai...")
        await page.goto("https://www.nseindia.com", timeout=60000)
        await page.wait_for_timeout(3000)

        print(f"📥 Fetching XML data from {XML_URL} ...")
        resp = await page.goto(XML_URL, timeout=60000)
        if not resp.ok:
            print("❌ Failed to fetch XML.")
            return

        xml_content = await resp.text()
        print("📄 Raw XML Content Snippet:")
        print(xml_content[:300])  # Print snippet for debug

        print("🔍 Parsing XML content into tree...")
        result, missing = extract_banking_data(xml_content.encode("utf-8"))

        print("✅ Final Extracted JSON:")
        print(json.dumps(result, indent=2))

        if missing:
            print("\n⚠️ Missing keys (not found in XML):")
            for key in missing:
                print(f" - {key}")

if __name__ == "__main__":
    asyncio.run(main())
