import os, json, asyncio
from datetime import datetime
from lxml import etree
from playwright.async_api import async_playwright

# Config
QUARTERLY_API = "https://www.nseindia.com/api/corporates-financial-results?index=equities&period=Quarterly"
ISIN_JSON_PATH = "124.json"
QUARTER_CODE = 124
OUTPUT_DIRS = [
    './json/'
]

for path in OUTPUT_DIRS:
    os.makedirs(path, exist_ok=True)

def get_first(result): return result[0].text.strip() if result and result[0].text else None
def sanitize(name): import re; return re.sub(r"(limited|ltd|\.|,)", "", name or "", flags=re.I).strip()

def parse_nse_date(raw_date):
    for fmt in ("%d-%b-%Y", "%Y-%m-%d"):
        try:
            return datetime.strptime(raw_date, fmt).strftime("%d-%b-%y")
        except ValueError:
            continue
    raise ValueError(f"Unrecognized date format: {raw_date}")


def extract_data(xml_bytes):
    root = etree.fromstring(xml_bytes)
    ns = {'in-bse-fin': "http://www.bseindia.com/xbrl/fin/2020-03-31/in-bse-fin"}
    xp = lambda path: root.xpath(path, namespaces=ns)
    def gstr(path): return get_first(xp(path)) or ""
    def gnum(path):
        val = get_first(xp(path))
        return str(round(float(val.replace(',', '')) / 1_000_000, 2)) if val else "0"

    data = {
        'bsecode': gstr("//in-bse-fin:ScripCode[@contextRef='OneD']"),
        'company_name': gstr("//in-bse-fin:NameOfTheCompany[@contextRef='OneD']"),
        'Net Sales': gnum("//in-bse-fin:RevenueFromOperations[@contextRef='OneD']"),
        'Other Income': gnum("//in-bse-fin:OtherIncome[@contextRef='OneD']"),
        'Total Income': gnum("//in-bse-fin:Income[@contextRef='OneD']"),
        'Expenditure': gnum("//in-bse-fin:Expenses[@contextRef='OneD']"),
        'Cost of Materials Consumed': gnum("//in-bse-fin:CostOfMaterialsConsumed[@contextRef='OneD']"),
        'Finance Costs': gnum("//in-bse-fin:FinanceCosts[@contextRef='OneD']"),
        'Changes in inventories of finished goods, work-in-progress and stock-in-trade': gnum("//in-bse-fin:ChangesInInventoriesOfFinishedGoodsWorkInProgressAndStockInTrade[@contextRef='OneD']"),
        'Purchases of stock-in-trade': gnum("//in-bse-fin:PurchasesOfStockInTrade[@contextRef='OneD']"),
        'Employee benefit expense': gnum("//in-bse-fin:EmployeeBenefitExpense[@contextRef='OneD']"),
        'Depreciation and amortisation expense': gnum("//in-bse-fin:DepreciationDepletionAndAmortisationExpense[@contextRef='OneD']"),
        'Profit after Interest but before Exceptional Items': gnum("//in-bse-fin:ProfitBeforeExceptionalItemsAndTax[@contextRef='OneD']"),
        'Exceptional Item': gnum("//in-bse-fin:ExceptionalItemsBeforeTax[@contextRef='OneD']"),
        'Profit (+)/ Loss (-) from Ordinary Activities before Tax': gnum("//in-bse-fin:ProfitBeforeTax[@contextRef='OneD']"),
        'Tax': gnum("//in-bse-fin:TaxExpense[@contextRef='OneD']"),
        'Net Profit (+)/ Loss (-) from Ordinary Activities after Tax': gnum("//in-bse-fin:ProfitLossForPeriodFromContinuingOperations[@contextRef='OneD']"),
        'Net Profit': gnum("//in-bse-fin:ProfitLossForPeriod[@contextRef='OneD']"),
        'Current tax': gnum("//in-bse-fin:CurrentTax[@contextRef='OneD']"),
        'Deferred tax': gnum("//in-bse-fin:DeferredTax[@contextRef='OneD']"),
        'Other Comprehensive Income Net of Taxes': gnum("//in-bse-fin:OtherComprehensiveIncomeNetOfTaxes[@contextRef='OneD']"),
        'Total Comprehensive Income for the Period': gnum("//in-bse-fin:ComprehensiveIncomeForThePeriod[@contextRef='OneD']"),
        'Share of profit(loss) of associates and joint ventures': gnum("//in-bse-fin:ShareOfProfitLossOfAssociatesAndJointVenturesAccountedForUsingEquityMethod[@contextRef='OneD']"),
        'Basic EPS for continuing operation': gstr("//in-bse-fin:BasicEarningsLossPerShareFromContinuingOperations[@contextRef='OneD']"),
        'Diluted EPS for continuing operation': gstr("//in-bse-fin:DilutedEarningsLossPerShareFromContinuingOperations[@contextRef='OneD']"),
        'Basic for discontinued & continuing operation': gstr("//in-bse-fin:BasicEarningsLossPerShareFromContinuingAndDiscontinuedOperations[@contextRef='OneD']"),
        'Diluted for discontinued & continuing operation': gstr("//in-bse-fin:DilutedEarningsLossPerShareFromContinuingAndDiscontinuedOperations[@contextRef='OneD']"),
        'Data Type': gstr("//in-bse-fin:NatureOfReportStandaloneConsolidated[@contextRef='OneD']")
    }

    for i in range(1, 50):
        ctx = f"OneOperatingExpenses0{i}D"
        desc = gstr(f"//in-bse-fin:DescriptionOfOtherExpenses[@contextRef='{ctx}']")
        if desc.lower() in ["other expenses", "other expense", "others"]:
            data["Other Expenses"] = gnum(f"//in-bse-fin:OtherExpenses[@contextRef='{ctx}']")
            break
    else:
        data["Other Expenses"] = gnum("//in-bse-fin:OtherExpenses[@contextRef='OneD']")
    return data

async def fetch_nse_quarterly():
    with open(ISIN_JSON_PATH) as f:
        isin_raw = json.load(f)
    isin_map = {x['isin_number']: x for x in isin_raw if 'isin_number' in x}
    print(f"✅ ISINs loaded: {len(isin_map)}")

    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=True)
        context = await browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120 Safari/537.36")
        page = await context.new_page()

        print("🌐 Visiting NSE homepage to solve Akamai...")
        await page.goto("https://www.nseindia.com", timeout=60000)
        await page.wait_for_timeout(3000)

        print("📦 Fetching Quarterly Results API from inside browser context...")
        response = await context.request.get(QUARTERLY_API, headers={
            "Accept": "application/json",
            "Referer": "https://www.nseindia.com/"
        })
        json_data = await response.json()
        print(f"✅ Fetched {len(json_data)} results from API.")

        valid = [x for x in json_data if x.get("isin") in isin_map and x.get("bank") != 'B']
        print(f"✅ Valid ISIN entries to process: {len(valid)}")

        for item in valid:
            xml_url = item["xbrl"].split("<br>")[0]
            print(f"➡️ Processing: {item['symbol']} | {xml_url}")

            try:
                print(f"🌐 Opening XML in browser context: {xml_url}")
                response = await page.goto(xml_url, timeout=90000)
                if response and response.ok:
                    xml_text = await response.text()
                    xml_bytes = xml_text.encode("utf-8")
                else:
                    print(f"❌ Failed to load XML via browser tab: Status {response.status if response else 'No Response'}")
                    continue
            except Exception as e:
                print(f"❌ Exception fetching XML in browser context: {e}")
                continue

            data = extract_data(xml_bytes)
            meta = isin_map[item['isin']]

            data.update({
                "isin_number": item["isin"],
                "Type": item["audited"],
                "Date Begin": parse_nse_date(item["fromDate"]),
                "Date End": parse_nse_date(item["toDate"]),
                "Description": "Amount (Rs. million)",
                "qtr_code": QUARTER_CODE,
                "image_id": meta.get("image_id"),
                "image_url": meta.get("image_url", ""),
                "BSE/NSE": "NSE"
            })

            bse = data.get("bsecode")
            cname = sanitize(data.get("company_name", item['symbol']))
            for path in OUTPUT_DIRS:
                fname = f"{bse}.json" if "socialMedia" not in path else f"{cname}.json"
                with open(os.path.join(path, fname), "w") as f:
                    json.dump(data, f, indent=4)

            print(f"✅ Saved: {bse} - {data['company_name']}")

        processed_isins = [i['isin'] for i in valid]
        new_isin_list = [i for i in isin_raw if i['isin_number'] not in processed_isins]
        with open(ISIN_JSON_PATH, "w") as f:
            json.dump(new_isin_list, f, indent=4)
        print(f"🧹 Removed {len(processed_isins)} ISINs from {ISIN_JSON_PATH}")
        print("🏁 Done!")

if __name__ == "__main__":
    asyncio.run(fetch_nse_quarterly())
