added a parser to format output transactions

This commit is contained in:
jethro 2026-05-13 16:44:54 +12:00
parent dbcc95f2de
commit 5e840c1429
2 changed files with 43 additions and 1 deletions

View file

@ -4,6 +4,8 @@ from playwright.sync_api import sync_playwright, Playwright
playwright = sync_playwright().start() playwright = sync_playwright().start()
scraper = Scraper(playwright, True) scraper = Scraper(playwright, True)
print(scraper.get_balance()) print(scraper.get_balance())
print(scraper.get_transactions()) transactions = scraper.get_transactions()
parsed = scraper.parse_transactions(transactions)
print(parsed)
scraper.close() scraper.close()

View file

@ -1,6 +1,8 @@
from playwright.sync_api import sync_playwright, Playwright from playwright.sync_api import sync_playwright, Playwright
import os import os
from dotenv import load_dotenv from dotenv import load_dotenv
import re
from datetime import datetime
load_dotenv() load_dotenv()
class Scraper: class Scraper:
@ -23,6 +25,44 @@ class Scraper:
transaction_body = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody").inner_text() transaction_body = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody").inner_text()
return transaction_body return transaction_body
def parse_transactions(self, raw: str = None) -> list[dict[str, any]]:
"""
Parse raw transactions text into a list of dicts:
[{'date': date, 'description': str, 'amount': float, 'balance': float}, ...]
Lines containing 'INSUFF FUNDS' (case-insensitive) are ignored.
If raw is None, get_transactions() is called to fetch the data.
"""
if raw is None:
raw = self.get_transactions() or ""
lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
pattern = re.compile(
r'^(?P<date>\d{2}-\d{2}-\d{4})\s+(?P<desc>.*?)\s+(?P<amount>[-\$\d,\.]+)\s+(?P<balance>[-\$\d,\.]+)\s*$'
)
parsed: list[dict[str, any]] = []
for ln in lines:
m = pattern.match(ln)
if not m:
# skip lines that don't match the expected pattern
continue
desc = m.group('desc').strip()
if 'INSUFF FUNDS' in desc.upper():
continue
def _to_float(s: str) -> float:
return float(s.replace('$', '').replace(',', '').strip())
try:
amount = _to_float(m.group('amount'))
balance = _to_float(m.group('balance'))
date_obj = datetime.strptime(m.group('date'), '%d-%m-%Y').date()
except Exception:
continue
parsed.append({
'date': date_obj,
'description': desc,
'amount': amount,
'balance': balance,
})
return parsed
def close(self): def close(self):
self.browser.close() self.browser.close()