From 969191cb5c0233b98d289d501d64010619043f1c Mon Sep 17 00:00:00 2001 From: Jethro Date: Tue, 19 May 2026 11:02:39 +1200 Subject: [PATCH] more changes --- .dockerignore | 13 ++++++++++++ AkahuClient/main.py | 16 --------------- Dockerfile | 13 ++++++++++++ EmoneyScraper/main.py | 11 ----------- IngestionService/ingester.py | 2 +- README.md | 23 +++++++++++++++++++++- main.py | 38 ++++++++++++++++++++++-------------- 7 files changed, 72 insertions(+), 44 deletions(-) create mode 100644 .dockerignore delete mode 100644 AkahuClient/main.py create mode 100644 Dockerfile delete mode 100755 EmoneyScraper/main.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..fd7a678 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.git +.venv +__pycache__ +*.pyc +*.pyo +*.pyd +*.db +*.sqlite +*.sqlite3 +*.log +.env +emoney_cache.json +sql/ diff --git a/AkahuClient/main.py b/AkahuClient/main.py deleted file mode 100644 index 08cb8be..0000000 --- a/AkahuClient/main.py +++ /dev/null @@ -1,16 +0,0 @@ -from akahuclient import AkahuClient -import os -from config import load_env - - -load_env() - -TOKEN = os.getenv("AKAHU_API_TOKEN") -APP_ID = os.getenv("AKAHU_APP_ID") - -# if not TOKEN or not APP_ID: -# print("Please set AKAHU_API_TOKEN and AKAHU_APP_ID in your environment.") -# exit(1) -# client = AkahuClient(TOKEN, APP_ID) -# accounts = client.get_accounts() -#print(accounts) \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bb55ba2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM mcr.microsoft.com/playwright/python:v1.44.0-jammy + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +COPY . ./ + +CMD ["python", "main.py"] diff --git a/EmoneyScraper/main.py b/EmoneyScraper/main.py deleted file mode 100755 index 33fd229..0000000 --- a/EmoneyScraper/main.py +++ /dev/null @@ -1,11 +0,0 @@ -from scraper import Scraper -from playwright.sync_api import sync_playwright, Playwright - -playwright = sync_playwright().start() -scraper = Scraper(playwright, True) -snapshot = scraper.get_snapshot() -transactions = scraper.get_transactions_parsed() -print(snapshot) -print(transactions) -scraper.close() - diff --git a/IngestionService/ingester.py b/IngestionService/ingester.py index f9825d6..ff56762 100644 --- a/IngestionService/ingester.py +++ b/IngestionService/ingester.py @@ -207,7 +207,7 @@ class Ingester: def _get_scraper(self) -> Scraper: if self.scraper is None: - self.scraper = Scraper(pw, headless=False) + self.scraper = Scraper(pw, headless=True) return self.scraper def _sort_emoney_snapshots_oldest_first(self, accounts: list[dict[str, object]]) -> list[dict[str, object]]: diff --git a/README.md b/README.md index 8d9b9ac..5dc7e47 100755 --- a/README.md +++ b/README.md @@ -8,4 +8,25 @@ make sure to install playwright (pip install playwright), then run: ``` playwright install --with-deps ``` -to get the correct drivers and dependencies for the host system \ No newline at end of file +to get the correct drivers and dependencies for the host system + +## Runtime flags +These env vars let you split Akahu vs Emoney runs (useful for separate k8s cron jobs): + +- RUN_AKAHU (default true) +- RUN_EMONEY (default true) +- RUN_NORMALIZE (default true) + +Example: +``` +RUN_AKAHU=true RUN_EMONEY=false RUN_NORMALIZE=true python main.py +``` + +## Container notes +- The Docker image uses Playwright's Python base image. +- The Emoney scraper runs headless by default. + +## Normalization gotchas +- Akahu org is derived from the connection name (BNZ/Sharesies), not the source name. +- Emoney transactions are hardcoded to vendor "Finance Now". +- Akahu descriptions with "INTERNET XFR" are rewritten to include the counterparty info and meta fields. \ No newline at end of file diff --git a/main.py b/main.py index bd5b4ad..5ff403a 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,6 @@ from IngestionService.ingester import Ingester from config import load_env +import os load_env("IngestionService") @@ -7,21 +8,28 @@ if __name__ == "__main__": ingester = Ingester() ingester.test_db_connection() - akahu_accounts = ingester.fetch_akahu_snapshot_data() - print("Akahu accounts fetched:", len(akahu_accounts.get("items", []))) - ingester.write_akahu_snapshot_data(akahu_accounts) + run_akahu = os.getenv("RUN_AKAHU", "true").strip().lower() in {"1", "true", "yes"} + run_emoney = os.getenv("RUN_EMONEY", "true").strip().lower() in {"1", "true", "yes"} + run_normalize = os.getenv("RUN_NORMALIZE", "true").strip().lower() in {"1", "true", "yes"} - backfill_start = "2026-01-01" - backfill_end = "2026-12-31" - akahu_backfill = ingester.backfill_akahu_transactions(backfill_start, backfill_end) - print("Akahu backfill transactions:", len(akahu_backfill.get("items", []))) + if run_akahu: + akahu_accounts = ingester.fetch_akahu_snapshot_data() + #print("Akahu accounts fetched:", len(akahu_accounts.get("items", []))) + ingester.write_akahu_snapshot_data(akahu_accounts) - emoney_data = ingester.fetch_emoney_data() - emoney_snapshot = emoney_data.get("snapshot") or {"accounts": []} - emoney_transactions = emoney_data.get("transactions") or [] - print("Emoney snapshot accounts:", len(emoney_snapshot.get("accounts", []))) - ingester.write_emoney_snapshot_data(emoney_snapshot) - print("Emoney transactions:", len(emoney_transactions)) - ingester.write_emoney_transaction_data(emoney_transactions) + #backfill_start = "2026-01-01" + #backfill_end = "2026-12-31" + #akahu_backfill = ingester.backfill_akahu_transactions(backfill_start, backfill_end) + #print("Akahu backfill transactions:", len(akahu_backfill.get("items", []))) - ingester.normalize_pending_data() + if run_emoney: + emoney_data = ingester.fetch_emoney_data() + emoney_snapshot = emoney_data.get("snapshot") or {"accounts": []} + emoney_transactions = emoney_data.get("transactions") or [] + #print("Emoney snapshot accounts:", len(emoney_snapshot.get("accounts", []))) + ingester.write_emoney_snapshot_data(emoney_snapshot) + #print("Emoney transactions:", len(emoney_transactions)) + ingester.write_emoney_transaction_data(emoney_transactions) + + if run_normalize: + ingester.normalize_pending_data()