2026-05-09 04:39:54 +00:00
|
|
|
from playwright.sync_api import sync_playwright, Playwright
|
2026-05-09 05:24:37 +00:00
|
|
|
import os
|
2026-05-18 22:47:46 +00:00
|
|
|
from config import load_env
|
2026-05-13 04:44:54 +00:00
|
|
|
import re
|
2026-05-18 22:47:46 +00:00
|
|
|
from datetime import date, datetime
|
2026-05-09 05:24:37 +00:00
|
|
|
|
2026-05-13 04:31:12 +00:00
|
|
|
class Scraper:
|
2026-05-31 23:18:27 +00:00
|
|
|
ACCOUNT_CONFIGS = [
|
|
|
|
|
{
|
|
|
|
|
"account_key": "loan",
|
|
|
|
|
"account_name": "emoney-loan",
|
|
|
|
|
"balance_xpath": "xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]",
|
|
|
|
|
"link_xpath": "xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[1]/span[2]/a",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"account_key": "forrester",
|
|
|
|
|
"account_name": "emoney-forrester",
|
|
|
|
|
"balance_xpath": "xpath=/html/body/form/div[3]/div[3]/div[2]/div[1]/div[5]/span[2]",
|
|
|
|
|
"link_xpath": "xpath=/html/body/form/div[3]/div[3]/div[2]/div[1]/div[1]/span[2]/a",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"account_key": "swift",
|
|
|
|
|
"account_name": "emoney-swift",
|
|
|
|
|
"balance_xpath": "xpath=/html/body/form/div[3]/div[3]/div[2]/div[2]/div[5]/span[2]",
|
|
|
|
|
"link_xpath": "xpath=/html/body/form/div[3]/div[3]/div[2]/div[2]/div[1]/span[2]/a",
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
TRANSACTIONS_TABLE_XPATH = "xpath=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody"
|
|
|
|
|
|
2026-05-13 04:31:12 +00:00
|
|
|
def __init__(self, playwright: Playwright, headless: bool = True):
|
2026-05-18 22:47:46 +00:00
|
|
|
load_env("EmoneyScraper")
|
|
|
|
|
self._require_env("SCRAPER_URL")
|
|
|
|
|
self._require_env("SCRAPER_USERNAME")
|
|
|
|
|
self._require_env("SCRAPER_PASSWORD")
|
2026-05-13 04:31:12 +00:00
|
|
|
self.playwright = playwright
|
|
|
|
|
self.firefox = self.playwright.firefox # or "firefox" or "webkit".
|
|
|
|
|
self.browser = self.firefox.launch(headless=headless)
|
|
|
|
|
self.page = self.browser.new_page()
|
2026-05-18 22:47:46 +00:00
|
|
|
self.response = self.page.goto(os.getenv("SCRAPER_URL"))
|
|
|
|
|
self.page.fill("input#ctl00_ContentPlaceHolder1_txtLoginID", os.getenv("SCRAPER_USERNAME"))
|
|
|
|
|
self.page.fill("input#ctl00_ContentPlaceHolder1_txtPassword", os.getenv("SCRAPER_PASSWORD"))
|
2026-05-13 04:31:12 +00:00
|
|
|
self.page.click("input#ctl00_ContentPlaceHolder1_btnLogin")
|
2026-05-09 04:39:54 +00:00
|
|
|
|
2026-05-13 04:31:12 +00:00
|
|
|
def get_balance(self):
|
2026-05-31 23:18:27 +00:00
|
|
|
balances: dict[str, str] = {}
|
|
|
|
|
for account in self.ACCOUNT_CONFIGS:
|
|
|
|
|
balance_text = self.page.locator(account["balance_xpath"]).inner_text()
|
|
|
|
|
balances[account["account_key"]] = balance_text
|
|
|
|
|
return balances
|
2026-05-09 04:39:54 +00:00
|
|
|
|
2026-05-18 22:47:46 +00:00
|
|
|
def get_snapshot(self) -> dict[str, list[dict[str, object]]]:
|
|
|
|
|
snapshot_date = date.today()
|
2026-05-31 23:18:27 +00:00
|
|
|
balances = self.get_balance()
|
|
|
|
|
accounts: list[dict[str, object]] = []
|
|
|
|
|
for account in self.ACCOUNT_CONFIGS:
|
|
|
|
|
balance_text = balances.get(account["account_key"], "")
|
|
|
|
|
if not balance_text:
|
|
|
|
|
continue
|
|
|
|
|
account_num = self.page.locator(account["link_xpath"]).inner_text().strip()
|
|
|
|
|
balance_value = self._parse_money(balance_text)
|
|
|
|
|
accounts.append({
|
|
|
|
|
"date": snapshot_date,
|
|
|
|
|
"balance": balance_value,
|
|
|
|
|
"account_name": account["account_name"],
|
|
|
|
|
"account_num": account_num,
|
|
|
|
|
"org_name": "Emoney",
|
|
|
|
|
})
|
2026-05-18 22:47:46 +00:00
|
|
|
return {
|
2026-05-31 23:18:27 +00:00
|
|
|
"accounts": accounts
|
2026-05-18 22:47:46 +00:00
|
|
|
}
|
|
|
|
|
|
2026-05-13 04:31:12 +00:00
|
|
|
def get_transactions(self):
|
2026-05-31 23:18:27 +00:00
|
|
|
raw_sets: list[dict[str, object]] = []
|
|
|
|
|
for account in self.ACCOUNT_CONFIGS:
|
|
|
|
|
account_num = self.page.locator(account["link_xpath"]).inner_text().strip()
|
|
|
|
|
self.page.click(account["link_xpath"])
|
|
|
|
|
self.page.wait_for_selector(self.TRANSACTIONS_TABLE_XPATH)
|
|
|
|
|
transaction_body = self.page.locator(self.TRANSACTIONS_TABLE_XPATH).inner_text()
|
|
|
|
|
raw_sets.append({
|
|
|
|
|
"account": {
|
|
|
|
|
**account,
|
|
|
|
|
"account_num": account_num,
|
|
|
|
|
},
|
|
|
|
|
"raw": transaction_body,
|
|
|
|
|
})
|
|
|
|
|
self.page.go_back()
|
|
|
|
|
self.page.wait_for_selector(account["link_xpath"])
|
|
|
|
|
return raw_sets
|
2026-05-13 04:01:14 +00:00
|
|
|
|
2026-05-31 23:18:27 +00:00
|
|
|
def parse_transactions(self, raw: str | list[dict[str, object]] | None = None) -> list[dict[str, object]]:
|
2026-05-13 04:44:54 +00:00
|
|
|
"""
|
|
|
|
|
Parse raw transactions text into a list of dicts:
|
|
|
|
|
[{'date': date, 'description': str, 'amount': float, 'balance': float}, ...]
|
|
|
|
|
Lines containing 'INSUFF FUNDS' (case-insensitive) are ignored.
|
|
|
|
|
If raw is None, get_transactions() is called to fetch the data.
|
|
|
|
|
"""
|
|
|
|
|
if raw is None:
|
|
|
|
|
raw = self.get_transactions() or ""
|
2026-05-31 23:18:27 +00:00
|
|
|
if isinstance(raw, list):
|
|
|
|
|
combined: list[dict[str, object]] = []
|
|
|
|
|
for entry in raw:
|
|
|
|
|
if not isinstance(entry, dict):
|
|
|
|
|
continue
|
|
|
|
|
entry_raw = entry.get("raw") or ""
|
|
|
|
|
account_info = entry.get("account") if isinstance(entry.get("account"), dict) else None
|
|
|
|
|
combined.extend(self._parse_transactions_for_account(entry_raw, account_info))
|
|
|
|
|
return combined
|
|
|
|
|
return self._parse_transactions_for_account(raw)
|
|
|
|
|
|
|
|
|
|
def _parse_transactions_for_account(
|
|
|
|
|
self,
|
|
|
|
|
raw: str,
|
|
|
|
|
account_info: dict[str, object] | None = None,
|
|
|
|
|
) -> list[dict[str, object]]:
|
2026-05-13 04:44:54 +00:00
|
|
|
lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
|
|
|
|
|
pattern = re.compile(
|
|
|
|
|
r'^(?P<date>\d{2}-\d{2}-\d{4})\s+(?P<desc>.*?)\s+(?P<amount>[-\$\d,\.]+)\s+(?P<balance>[-\$\d,\.]+)\s*$'
|
|
|
|
|
)
|
2026-05-31 23:18:27 +00:00
|
|
|
parsed: list[dict[str, object]] = []
|
2026-05-13 04:44:54 +00:00
|
|
|
for ln in lines:
|
|
|
|
|
m = pattern.match(ln)
|
|
|
|
|
if not m:
|
|
|
|
|
# skip lines that don't match the expected pattern
|
|
|
|
|
continue
|
|
|
|
|
desc = m.group('desc').strip()
|
|
|
|
|
if 'INSUFF FUNDS' in desc.upper():
|
|
|
|
|
continue
|
|
|
|
|
def _to_float(s: str) -> float:
|
|
|
|
|
return float(s.replace('$', '').replace(',', '').strip())
|
|
|
|
|
try:
|
|
|
|
|
amount = _to_float(m.group('amount'))
|
|
|
|
|
balance = _to_float(m.group('balance'))
|
|
|
|
|
date_obj = datetime.strptime(m.group('date'), '%d-%m-%Y').date()
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
2026-05-31 23:18:27 +00:00
|
|
|
entry: dict[str, object] = {
|
2026-05-13 04:44:54 +00:00
|
|
|
'date': date_obj,
|
|
|
|
|
'description': desc,
|
|
|
|
|
'amount': amount,
|
|
|
|
|
'balance': balance,
|
2026-05-31 23:18:27 +00:00
|
|
|
}
|
|
|
|
|
if account_info:
|
|
|
|
|
entry['account_name'] = account_info.get('account_name')
|
|
|
|
|
entry['account_num'] = account_info.get('account_num')
|
|
|
|
|
entry['org_name'] = "Emoney"
|
|
|
|
|
parsed.append(entry)
|
2026-05-13 04:44:54 +00:00
|
|
|
return parsed
|
|
|
|
|
|
2026-05-31 23:18:27 +00:00
|
|
|
def get_transactions_parsed(self) -> list[dict[str, object]]:
|
|
|
|
|
return self.parse_transactions()
|
2026-05-18 22:47:46 +00:00
|
|
|
|
2026-05-13 04:31:12 +00:00
|
|
|
def close(self):
|
|
|
|
|
self.browser.close()
|
2026-05-13 04:01:14 +00:00
|
|
|
|
2026-05-18 22:47:46 +00:00
|
|
|
@staticmethod
|
|
|
|
|
def _parse_money(value: str) -> float:
|
|
|
|
|
return float(value.replace("$", "").replace(",", "").strip())
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _require_env(name: str) -> str:
|
|
|
|
|
value = os.getenv(name)
|
|
|
|
|
if not value:
|
|
|
|
|
raise ValueError(f"Please set {name} in your environment.")
|
|
|
|
|
return value
|
|
|
|
|
|
2026-05-13 04:31:12 +00:00
|
|
|
#xpathbody=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody
|
|
|
|
|
#xpathaccountbutton = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[1]/span[2]/a
|
2026-05-13 04:01:14 +00:00
|
|
|
#xpath = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]
|
|
|
|
|
|