from playwright.sync_api import sync_playwright, Playwright import os from config import load_env import re from datetime import date, datetime class Scraper: def __init__(self, playwright: Playwright, headless: bool = True): load_env("EmoneyScraper") self._require_env("SCRAPER_URL") self._require_env("SCRAPER_USERNAME") self._require_env("SCRAPER_PASSWORD") self.playwright = playwright self.firefox = self.playwright.firefox # or "firefox" or "webkit". self.browser = self.firefox.launch(headless=headless) self.page = self.browser.new_page() self.response = self.page.goto(os.getenv("SCRAPER_URL")) self.page.fill("input#ctl00_ContentPlaceHolder1_txtLoginID", os.getenv("SCRAPER_USERNAME")) self.page.fill("input#ctl00_ContentPlaceHolder1_txtPassword", os.getenv("SCRAPER_PASSWORD")) self.page.click("input#ctl00_ContentPlaceHolder1_btnLogin") def get_balance(self): current_balance = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]").inner_text() return current_balance def get_snapshot(self) -> dict[str, list[dict[str, object]]]: balance_text = self.get_balance() balance_value = self._parse_money(balance_text) snapshot_date = date.today() return { "accounts": [ { "date": snapshot_date, "balance": balance_value, } ] } def get_transactions(self): self.page.click("xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[1]/span[2]/a") transaction_body = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody").inner_text() return transaction_body def parse_transactions(self, raw: str = None) -> list[dict[str, any]]: """ Parse raw transactions text into a list of dicts: [{'date': date, 'description': str, 'amount': float, 'balance': float}, ...] Lines containing 'INSUFF FUNDS' (case-insensitive) are ignored. If raw is None, get_transactions() is called to fetch the data. """ if raw is None: raw = self.get_transactions() or "" lines = [ln.strip() for ln in raw.splitlines() if ln.strip()] pattern = re.compile( r'^(?P\d{2}-\d{2}-\d{4})\s+(?P.*?)\s+(?P[-\$\d,\.]+)\s+(?P[-\$\d,\.]+)\s*$' ) parsed: list[dict[str, any]] = [] for ln in lines: m = pattern.match(ln) if not m: # skip lines that don't match the expected pattern continue desc = m.group('desc').strip() if 'INSUFF FUNDS' in desc.upper(): continue def _to_float(s: str) -> float: return float(s.replace('$', '').replace(',', '').strip()) try: amount = _to_float(m.group('amount')) balance = _to_float(m.group('balance')) date_obj = datetime.strptime(m.group('date'), '%d-%m-%Y').date() except Exception: continue parsed.append({ 'date': date_obj, 'description': desc, 'amount': amount, 'balance': balance, }) return parsed def get_transactions_parsed(self) -> list[dict[str, any]]: raw = self.get_transactions() return self.parse_transactions(raw) def close(self): self.browser.close() @staticmethod def _parse_money(value: str) -> float: return float(value.replace("$", "").replace(",", "").strip()) @staticmethod def _require_env(name: str) -> str: value = os.getenv(name) if not value: raise ValueError(f"Please set {name} in your environment.") return value #xpathbody=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody #xpathaccountbutton = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[1]/span[2]/a #xpath = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]