more changes

This commit is contained in:
Jethro 2026-05-19 11:02:39 +12:00
parent c8e7ddce35
commit 969191cb5c
7 changed files with 72 additions and 44 deletions

13
.dockerignore Normal file
View file

@ -0,0 +1,13 @@
.git
.venv
__pycache__
*.pyc
*.pyo
*.pyd
*.db
*.sqlite
*.sqlite3
*.log
.env
emoney_cache.json
sql/

View file

@ -1,16 +0,0 @@
from akahuclient import AkahuClient
import os
from config import load_env
load_env()
TOKEN = os.getenv("AKAHU_API_TOKEN")
APP_ID = os.getenv("AKAHU_APP_ID")
# if not TOKEN or not APP_ID:
# print("Please set AKAHU_API_TOKEN and AKAHU_APP_ID in your environment.")
# exit(1)
# client = AkahuClient(TOKEN, APP_ID)
# accounts = client.get_accounts()
#print(accounts)

13
Dockerfile Normal file
View file

@ -0,0 +1,13 @@
FROM mcr.microsoft.com/playwright/python:v1.44.0-jammy
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . ./
CMD ["python", "main.py"]

View file

@ -1,11 +0,0 @@
from scraper import Scraper
from playwright.sync_api import sync_playwright, Playwright
playwright = sync_playwright().start()
scraper = Scraper(playwright, True)
snapshot = scraper.get_snapshot()
transactions = scraper.get_transactions_parsed()
print(snapshot)
print(transactions)
scraper.close()

View file

@ -207,7 +207,7 @@ class Ingester:
def _get_scraper(self) -> Scraper:
if self.scraper is None:
self.scraper = Scraper(pw, headless=False)
self.scraper = Scraper(pw, headless=True)
return self.scraper
def _sort_emoney_snapshots_oldest_first(self, accounts: list[dict[str, object]]) -> list[dict[str, object]]:

View file

@ -8,4 +8,25 @@ make sure to install playwright (pip install playwright), then run:
```
playwright install --with-deps
```
to get the correct drivers and dependencies for the host system
to get the correct drivers and dependencies for the host system
## Runtime flags
These env vars let you split Akahu vs Emoney runs (useful for separate k8s cron jobs):
- RUN_AKAHU (default true)
- RUN_EMONEY (default true)
- RUN_NORMALIZE (default true)
Example:
```
RUN_AKAHU=true RUN_EMONEY=false RUN_NORMALIZE=true python main.py
```
## Container notes
- The Docker image uses Playwright's Python base image.
- The Emoney scraper runs headless by default.
## Normalization gotchas
- Akahu org is derived from the connection name (BNZ/Sharesies), not the source name.
- Emoney transactions are hardcoded to vendor "Finance Now".
- Akahu descriptions with "INTERNET XFR" are rewritten to include the counterparty info and meta fields.

38
main.py
View file

@ -1,5 +1,6 @@
from IngestionService.ingester import Ingester
from config import load_env
import os
load_env("IngestionService")
@ -7,21 +8,28 @@ if __name__ == "__main__":
ingester = Ingester()
ingester.test_db_connection()
akahu_accounts = ingester.fetch_akahu_snapshot_data()
print("Akahu accounts fetched:", len(akahu_accounts.get("items", [])))
ingester.write_akahu_snapshot_data(akahu_accounts)
run_akahu = os.getenv("RUN_AKAHU", "true").strip().lower() in {"1", "true", "yes"}
run_emoney = os.getenv("RUN_EMONEY", "true").strip().lower() in {"1", "true", "yes"}
run_normalize = os.getenv("RUN_NORMALIZE", "true").strip().lower() in {"1", "true", "yes"}
backfill_start = "2026-01-01"
backfill_end = "2026-12-31"
akahu_backfill = ingester.backfill_akahu_transactions(backfill_start, backfill_end)
print("Akahu backfill transactions:", len(akahu_backfill.get("items", [])))
if run_akahu:
akahu_accounts = ingester.fetch_akahu_snapshot_data()
#print("Akahu accounts fetched:", len(akahu_accounts.get("items", [])))
ingester.write_akahu_snapshot_data(akahu_accounts)
emoney_data = ingester.fetch_emoney_data()
emoney_snapshot = emoney_data.get("snapshot") or {"accounts": []}
emoney_transactions = emoney_data.get("transactions") or []
print("Emoney snapshot accounts:", len(emoney_snapshot.get("accounts", [])))
ingester.write_emoney_snapshot_data(emoney_snapshot)
print("Emoney transactions:", len(emoney_transactions))
ingester.write_emoney_transaction_data(emoney_transactions)
#backfill_start = "2026-01-01"
#backfill_end = "2026-12-31"
#akahu_backfill = ingester.backfill_akahu_transactions(backfill_start, backfill_end)
#print("Akahu backfill transactions:", len(akahu_backfill.get("items", [])))
ingester.normalize_pending_data()
if run_emoney:
emoney_data = ingester.fetch_emoney_data()
emoney_snapshot = emoney_data.get("snapshot") or {"accounts": []}
emoney_transactions = emoney_data.get("transactions") or []
#print("Emoney snapshot accounts:", len(emoney_snapshot.get("accounts", [])))
ingester.write_emoney_snapshot_data(emoney_snapshot)
#print("Emoney transactions:", len(emoney_transactions))
ingester.write_emoney_transaction_data(emoney_transactions)
if run_normalize:
ingester.normalize_pending_data()