more changes

This commit is contained in:
Jethro 2026-05-19 13:07:18 +12:00
parent 969191cb5c
commit 0433c6aa52
5 changed files with 122 additions and 5 deletions

View file

@ -9,5 +9,8 @@ __pycache__
*.sqlite3 *.sqlite3
*.log *.log
.env .env
AkahuClient/.env
EmoneyScraper/.env
IngestionService/.env
emoney_cache.json emoney_cache.json
sql/ sql/

View file

@ -1,4 +1,4 @@
FROM mcr.microsoft.com/playwright/python:v1.44.0-jammy FROM mcr.microsoft.com/playwright/python:v1.60.0-jammy
ENV PYTHONDONTWRITEBYTECODE=1 \ ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 PYTHONUNBUFFERED=1

View file

@ -114,6 +114,13 @@ class Ingester:
def write_akahu_snapshot_data(self, data): def write_akahu_snapshot_data(self, data):
with self.dbconnection.cursor() as cursor: with self.dbconnection.cursor() as cursor:
for account in data.get("items", []): for account in data.get("items", []):
self._record_sync(
account_num=self._string_or_none(account.get("formatted_account") or account.get("_id") or account.get("account_id")),
account_name=self._string_or_none(account.get("name") or account.get("account_name")),
org_name=self._string_or_none(
(account.get("connection") or {}).get("name") if isinstance(account.get("connection"), dict) else None
),
)
cursor.execute( cursor.execute(
""" """
INSERT INTO rawsnapshots (data, source) INSERT INTO rawsnapshots (data, source)
@ -142,6 +149,11 @@ class Ingester:
accounts = self._sort_emoney_snapshots_oldest_first(data.get("accounts", [])) accounts = self._sort_emoney_snapshots_oldest_first(data.get("accounts", []))
with self.dbconnection.cursor() as cursor: with self.dbconnection.cursor() as cursor:
for account in accounts: for account in accounts:
self._record_sync(
account_num="emoney",
account_name="Emoney",
org_name="Emoney",
)
cursor.execute( cursor.execute(
""" """
INSERT INTO rawsnapshots (data, source) INSERT INTO rawsnapshots (data, source)
@ -232,6 +244,57 @@ class Ingester:
return False, dt return False, dt
return sorted(items, key=key) return sorted(items, key=key)
def _record_sync(self, account_num: str | None, account_name: str | None, org_name: str | None) -> None:
org_id = self._get_or_create_org(org_name)
account_id = self._get_or_create_account(account_num, account_name, org_id)
with self.dbconnection.cursor() as cursor:
cursor.execute(
"""
INSERT INTO syncs (datetime, accountid, orgid)
VALUES (%s, %s, %s)
""",
(date.today(), account_id, org_id),
)
def _get_or_create_org(self, org_name: str | None) -> int:
org_name = org_name or "unknown"
with self.dbconnection.cursor() as cursor:
cursor.execute("SELECT id FROM organizations WHERE orgname = %s", (org_name,))
row = cursor.fetchone()
if row:
return row[0]
cursor.execute(
"INSERT INTO organizations (orgname) VALUES (%s) RETURNING id",
(org_name,),
)
return cursor.fetchone()[0]
def _get_or_create_account(self, account_num: str | None, account_name: str | None, org_id: int) -> int:
account_name = account_name or "unknown"
account_num = account_num or f"{org_id}:{account_name}"
with self.dbconnection.cursor() as cursor:
cursor.execute("SELECT id FROM accounts WHERE accountnum = %s", (account_num,))
row = cursor.fetchone()
if row:
return row[0]
cursor.execute(
"""
INSERT INTO accounts (accountnum, accountname, orgid)
VALUES (%s, %s, %s)
RETURNING id
""",
(account_num, account_name, org_id),
)
return cursor.fetchone()[0]
@staticmethod
def _string_or_none(value: object) -> str | None:
if value is None:
return None
return str(value)
@staticmethod @staticmethod
def _parse_datetime(value: object) -> datetime | None: def _parse_datetime(value: object) -> datetime | None:
if isinstance(value, datetime): if isinstance(value, datetime):

View file

@ -26,6 +26,49 @@ RUN_AKAHU=true RUN_EMONEY=false RUN_NORMALIZE=true python main.py
- The Docker image uses Playwright's Python base image. - The Docker image uses Playwright's Python base image.
- The Emoney scraper runs headless by default. - The Emoney scraper runs headless by default.
## Env vars
Required:
- AKAHU_API_TOKEN
- AKAHU_APP_ID
- DB_HOST
- DB_NAME
- DB_USER
- DB_PASSWORD
- SCRAPER_URL
- SCRAPER_USERNAME
- SCRAPER_PASSWORD
Optional:
- EMONEY_USE_CACHE (true/false, default false)
- EMONEY_CACHE_PATH (default emoney_cache.json)
- RUN_AKAHU (default true)
- RUN_EMONEY (default true)
- RUN_NORMALIZE (default true)
## Build and test the Docker image
Build:
```
docker build -t akahusync:local .
```
Run (example, pass env vars):
```
docker run --rm \
-e AKAHU_API_TOKEN=... \
-e AKAHU_APP_ID=... \
-e DB_HOST=... \
-e DB_NAME=... \
-e DB_USER=... \
-e DB_PASSWORD=... \
-e SCRAPER_URL=... \
-e SCRAPER_USERNAME=... \
-e SCRAPER_PASSWORD=... \
-e RUN_AKAHU=true \
-e RUN_EMONEY=false \
-e RUN_NORMALIZE=true \
akahusync:local
```
## Normalization gotchas ## Normalization gotchas
- Akahu org is derived from the connection name (BNZ/Sharesies), not the source name. - Akahu org is derived from the connection name (BNZ/Sharesies), not the source name.
- Emoney transactions are hardcoded to vendor "Finance Now". - Emoney transactions are hardcoded to vendor "Finance Now".

View file

@ -1,7 +1,8 @@
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
from dotenv import load_dotenv import os
from dotenv import dotenv_values
REPO_ROOT = Path(__file__).resolve().parent REPO_ROOT = Path(__file__).resolve().parent
@ -9,11 +10,18 @@ REPO_ROOT = Path(__file__).resolve().parent
def load_env(service: str | None = None) -> None: def load_env(service: str | None = None) -> None:
root_env = REPO_ROOT / ".env" root_env = REPO_ROOT / ".env"
root_values: dict[str, str] = {}
service_values: dict[str, str] = {}
if root_env.exists(): if root_env.exists():
load_dotenv(root_env) root_values = {k: v for k, v in dotenv_values(root_env).items() if v is not None}
if service: if service:
service_env = REPO_ROOT / service / ".env" service_env = REPO_ROOT / service / ".env"
if service_env.exists(): if service_env.exists():
# Service-specific values should override repo defaults. service_values = {k: v for k, v in dotenv_values(service_env).items() if v is not None}
load_dotenv(service_env, override=True)
# Precedence: explicit env > service env > root env
merged = {**root_values, **service_values}
for key, value in merged.items():
os.environ.setdefault(key, value)