From 0433c6aa5237104cce9e3caa76162bae50e20c7f Mon Sep 17 00:00:00 2001 From: Jethro Date: Tue, 19 May 2026 13:07:18 +1200 Subject: [PATCH] more changes --- .dockerignore | 3 ++ Dockerfile | 2 +- IngestionService/ingester.py | 63 ++++++++++++++++++++++++++++++++++++ README.md | 43 ++++++++++++++++++++++++ config.py | 16 ++++++--- 5 files changed, 122 insertions(+), 5 deletions(-) diff --git a/.dockerignore b/.dockerignore index fd7a678..aa7ef57 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,5 +9,8 @@ __pycache__ *.sqlite3 *.log .env +AkahuClient/.env +EmoneyScraper/.env +IngestionService/.env emoney_cache.json sql/ diff --git a/Dockerfile b/Dockerfile index bb55ba2..6e2c3c3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/playwright/python:v1.44.0-jammy +FROM mcr.microsoft.com/playwright/python:v1.60.0-jammy ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 diff --git a/IngestionService/ingester.py b/IngestionService/ingester.py index ff56762..3fd7117 100644 --- a/IngestionService/ingester.py +++ b/IngestionService/ingester.py @@ -114,6 +114,13 @@ class Ingester: def write_akahu_snapshot_data(self, data): with self.dbconnection.cursor() as cursor: for account in data.get("items", []): + self._record_sync( + account_num=self._string_or_none(account.get("formatted_account") or account.get("_id") or account.get("account_id")), + account_name=self._string_or_none(account.get("name") or account.get("account_name")), + org_name=self._string_or_none( + (account.get("connection") or {}).get("name") if isinstance(account.get("connection"), dict) else None + ), + ) cursor.execute( """ INSERT INTO rawsnapshots (data, source) @@ -142,6 +149,11 @@ class Ingester: accounts = self._sort_emoney_snapshots_oldest_first(data.get("accounts", [])) with self.dbconnection.cursor() as cursor: for account in accounts: + self._record_sync( + account_num="emoney", + account_name="Emoney", + org_name="Emoney", + ) cursor.execute( """ INSERT INTO rawsnapshots (data, source) @@ -232,6 +244,57 @@ class Ingester: return False, dt return sorted(items, key=key) + def _record_sync(self, account_num: str | None, account_name: str | None, org_name: str | None) -> None: + org_id = self._get_or_create_org(org_name) + account_id = self._get_or_create_account(account_num, account_name, org_id) + with self.dbconnection.cursor() as cursor: + cursor.execute( + """ + INSERT INTO syncs (datetime, accountid, orgid) + VALUES (%s, %s, %s) + """, + (date.today(), account_id, org_id), + ) + + def _get_or_create_org(self, org_name: str | None) -> int: + org_name = org_name or "unknown" + with self.dbconnection.cursor() as cursor: + cursor.execute("SELECT id FROM organizations WHERE orgname = %s", (org_name,)) + row = cursor.fetchone() + if row: + return row[0] + + cursor.execute( + "INSERT INTO organizations (orgname) VALUES (%s) RETURNING id", + (org_name,), + ) + return cursor.fetchone()[0] + + def _get_or_create_account(self, account_num: str | None, account_name: str | None, org_id: int) -> int: + account_name = account_name or "unknown" + account_num = account_num or f"{org_id}:{account_name}" + with self.dbconnection.cursor() as cursor: + cursor.execute("SELECT id FROM accounts WHERE accountnum = %s", (account_num,)) + row = cursor.fetchone() + if row: + return row[0] + + cursor.execute( + """ + INSERT INTO accounts (accountnum, accountname, orgid) + VALUES (%s, %s, %s) + RETURNING id + """, + (account_num, account_name, org_id), + ) + return cursor.fetchone()[0] + + @staticmethod + def _string_or_none(value: object) -> str | None: + if value is None: + return None + return str(value) + @staticmethod def _parse_datetime(value: object) -> datetime | None: if isinstance(value, datetime): diff --git a/README.md b/README.md index 5dc7e47..dac1ca6 100755 --- a/README.md +++ b/README.md @@ -26,6 +26,49 @@ RUN_AKAHU=true RUN_EMONEY=false RUN_NORMALIZE=true python main.py - The Docker image uses Playwright's Python base image. - The Emoney scraper runs headless by default. +## Env vars +Required: +- AKAHU_API_TOKEN +- AKAHU_APP_ID +- DB_HOST +- DB_NAME +- DB_USER +- DB_PASSWORD +- SCRAPER_URL +- SCRAPER_USERNAME +- SCRAPER_PASSWORD + +Optional: +- EMONEY_USE_CACHE (true/false, default false) +- EMONEY_CACHE_PATH (default emoney_cache.json) +- RUN_AKAHU (default true) +- RUN_EMONEY (default true) +- RUN_NORMALIZE (default true) + +## Build and test the Docker image +Build: +``` +docker build -t akahusync:local . +``` + +Run (example, pass env vars): +``` +docker run --rm \ + -e AKAHU_API_TOKEN=... \ + -e AKAHU_APP_ID=... \ + -e DB_HOST=... \ + -e DB_NAME=... \ + -e DB_USER=... \ + -e DB_PASSWORD=... \ + -e SCRAPER_URL=... \ + -e SCRAPER_USERNAME=... \ + -e SCRAPER_PASSWORD=... \ + -e RUN_AKAHU=true \ + -e RUN_EMONEY=false \ + -e RUN_NORMALIZE=true \ + akahusync:local +``` + ## Normalization gotchas - Akahu org is derived from the connection name (BNZ/Sharesies), not the source name. - Emoney transactions are hardcoded to vendor "Finance Now". diff --git a/config.py b/config.py index a450bcc..67318c1 100644 --- a/config.py +++ b/config.py @@ -1,7 +1,8 @@ from __future__ import annotations from pathlib import Path -from dotenv import load_dotenv +import os +from dotenv import dotenv_values REPO_ROOT = Path(__file__).resolve().parent @@ -9,11 +10,18 @@ REPO_ROOT = Path(__file__).resolve().parent def load_env(service: str | None = None) -> None: root_env = REPO_ROOT / ".env" + root_values: dict[str, str] = {} + service_values: dict[str, str] = {} + if root_env.exists(): - load_dotenv(root_env) + root_values = {k: v for k, v in dotenv_values(root_env).items() if v is not None} if service: service_env = REPO_ROOT / service / ".env" if service_env.exists(): - # Service-specific values should override repo defaults. - load_dotenv(service_env, override=True) + service_values = {k: v for k, v in dotenv_values(service_env).items() if v is not None} + + # Precedence: explicit env > service env > root env + merged = {**root_values, **service_values} + for key, value in merged.items(): + os.environ.setdefault(key, value)