From 5e840c14292f199cca107d89808d63aa9c88eb15 Mon Sep 17 00:00:00 2001 From: jethro Date: Wed, 13 May 2026 16:44:54 +1200 Subject: [PATCH] added a parser to format output transactions --- emoneyscraper/main.py | 4 +++- emoneyscraper/scraper.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/emoneyscraper/main.py b/emoneyscraper/main.py index 3be8e99..3a50f2e 100755 --- a/emoneyscraper/main.py +++ b/emoneyscraper/main.py @@ -4,6 +4,8 @@ from playwright.sync_api import sync_playwright, Playwright playwright = sync_playwright().start() scraper = Scraper(playwright, True) print(scraper.get_balance()) -print(scraper.get_transactions()) +transactions = scraper.get_transactions() +parsed = scraper.parse_transactions(transactions) +print(parsed) scraper.close() diff --git a/emoneyscraper/scraper.py b/emoneyscraper/scraper.py index d42ca8d..b351ff2 100755 --- a/emoneyscraper/scraper.py +++ b/emoneyscraper/scraper.py @@ -1,6 +1,8 @@ from playwright.sync_api import sync_playwright, Playwright import os from dotenv import load_dotenv +import re +from datetime import datetime load_dotenv() class Scraper: @@ -23,6 +25,44 @@ class Scraper: transaction_body = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody").inner_text() return transaction_body + def parse_transactions(self, raw: str = None) -> list[dict[str, any]]: + """ + Parse raw transactions text into a list of dicts: + [{'date': date, 'description': str, 'amount': float, 'balance': float}, ...] + Lines containing 'INSUFF FUNDS' (case-insensitive) are ignored. + If raw is None, get_transactions() is called to fetch the data. + """ + if raw is None: + raw = self.get_transactions() or "" + lines = [ln.strip() for ln in raw.splitlines() if ln.strip()] + pattern = re.compile( + r'^(?P\d{2}-\d{2}-\d{4})\s+(?P.*?)\s+(?P[-\$\d,\.]+)\s+(?P[-\$\d,\.]+)\s*$' + ) + parsed: list[dict[str, any]] = [] + for ln in lines: + m = pattern.match(ln) + if not m: + # skip lines that don't match the expected pattern + continue + desc = m.group('desc').strip() + if 'INSUFF FUNDS' in desc.upper(): + continue + def _to_float(s: str) -> float: + return float(s.replace('$', '').replace(',', '').strip()) + try: + amount = _to_float(m.group('amount')) + balance = _to_float(m.group('balance')) + date_obj = datetime.strptime(m.group('date'), '%d-%m-%Y').date() + except Exception: + continue + parsed.append({ + 'date': date_obj, + 'description': desc, + 'amount': amount, + 'balance': balance, + }) + return parsed + def close(self): self.browser.close()