added a parser to format output transactions
This commit is contained in:
parent
dbcc95f2de
commit
5e840c1429
|
|
@ -4,6 +4,8 @@ from playwright.sync_api import sync_playwright, Playwright
|
|||
playwright = sync_playwright().start()
|
||||
scraper = Scraper(playwright, True)
|
||||
print(scraper.get_balance())
|
||||
print(scraper.get_transactions())
|
||||
transactions = scraper.get_transactions()
|
||||
parsed = scraper.parse_transactions(transactions)
|
||||
print(parsed)
|
||||
scraper.close()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
from playwright.sync_api import sync_playwright, Playwright
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
load_dotenv()
|
||||
class Scraper:
|
||||
|
|
@ -23,6 +25,44 @@ class Scraper:
|
|||
transaction_body = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody").inner_text()
|
||||
return transaction_body
|
||||
|
||||
def parse_transactions(self, raw: str = None) -> list[dict[str, any]]:
|
||||
"""
|
||||
Parse raw transactions text into a list of dicts:
|
||||
[{'date': date, 'description': str, 'amount': float, 'balance': float}, ...]
|
||||
Lines containing 'INSUFF FUNDS' (case-insensitive) are ignored.
|
||||
If raw is None, get_transactions() is called to fetch the data.
|
||||
"""
|
||||
if raw is None:
|
||||
raw = self.get_transactions() or ""
|
||||
lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
|
||||
pattern = re.compile(
|
||||
r'^(?P<date>\d{2}-\d{2}-\d{4})\s+(?P<desc>.*?)\s+(?P<amount>[-\$\d,\.]+)\s+(?P<balance>[-\$\d,\.]+)\s*$'
|
||||
)
|
||||
parsed: list[dict[str, any]] = []
|
||||
for ln in lines:
|
||||
m = pattern.match(ln)
|
||||
if not m:
|
||||
# skip lines that don't match the expected pattern
|
||||
continue
|
||||
desc = m.group('desc').strip()
|
||||
if 'INSUFF FUNDS' in desc.upper():
|
||||
continue
|
||||
def _to_float(s: str) -> float:
|
||||
return float(s.replace('$', '').replace(',', '').strip())
|
||||
try:
|
||||
amount = _to_float(m.group('amount'))
|
||||
balance = _to_float(m.group('balance'))
|
||||
date_obj = datetime.strptime(m.group('date'), '%d-%m-%Y').date()
|
||||
except Exception:
|
||||
continue
|
||||
parsed.append({
|
||||
'date': date_obj,
|
||||
'description': desc,
|
||||
'amount': amount,
|
||||
'balance': balance,
|
||||
})
|
||||
return parsed
|
||||
|
||||
def close(self):
|
||||
self.browser.close()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue