more changes
This commit is contained in:
parent
c8e7ddce35
commit
969191cb5c
13
.dockerignore
Normal file
13
.dockerignore
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
.git
|
||||
.venv
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
*.log
|
||||
.env
|
||||
emoney_cache.json
|
||||
sql/
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
from akahuclient import AkahuClient
|
||||
import os
|
||||
from config import load_env
|
||||
|
||||
|
||||
load_env()
|
||||
|
||||
TOKEN = os.getenv("AKAHU_API_TOKEN")
|
||||
APP_ID = os.getenv("AKAHU_APP_ID")
|
||||
|
||||
# if not TOKEN or not APP_ID:
|
||||
# print("Please set AKAHU_API_TOKEN and AKAHU_APP_ID in your environment.")
|
||||
# exit(1)
|
||||
# client = AkahuClient(TOKEN, APP_ID)
|
||||
# accounts = client.get_accounts()
|
||||
#print(accounts)
|
||||
13
Dockerfile
Normal file
13
Dockerfile
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
FROM mcr.microsoft.com/playwright/python:v1.44.0-jammy
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . ./
|
||||
|
||||
CMD ["python", "main.py"]
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
from scraper import Scraper
|
||||
from playwright.sync_api import sync_playwright, Playwright
|
||||
|
||||
playwright = sync_playwright().start()
|
||||
scraper = Scraper(playwright, True)
|
||||
snapshot = scraper.get_snapshot()
|
||||
transactions = scraper.get_transactions_parsed()
|
||||
print(snapshot)
|
||||
print(transactions)
|
||||
scraper.close()
|
||||
|
||||
|
|
@ -207,7 +207,7 @@ class Ingester:
|
|||
|
||||
def _get_scraper(self) -> Scraper:
|
||||
if self.scraper is None:
|
||||
self.scraper = Scraper(pw, headless=False)
|
||||
self.scraper = Scraper(pw, headless=True)
|
||||
return self.scraper
|
||||
|
||||
def _sort_emoney_snapshots_oldest_first(self, accounts: list[dict[str, object]]) -> list[dict[str, object]]:
|
||||
|
|
|
|||
21
README.md
21
README.md
|
|
@ -9,3 +9,24 @@ make sure to install playwright (pip install playwright), then run:
|
|||
playwright install --with-deps
|
||||
```
|
||||
to get the correct drivers and dependencies for the host system
|
||||
|
||||
## Runtime flags
|
||||
These env vars let you split Akahu vs Emoney runs (useful for separate k8s cron jobs):
|
||||
|
||||
- RUN_AKAHU (default true)
|
||||
- RUN_EMONEY (default true)
|
||||
- RUN_NORMALIZE (default true)
|
||||
|
||||
Example:
|
||||
```
|
||||
RUN_AKAHU=true RUN_EMONEY=false RUN_NORMALIZE=true python main.py
|
||||
```
|
||||
|
||||
## Container notes
|
||||
- The Docker image uses Playwright's Python base image.
|
||||
- The Emoney scraper runs headless by default.
|
||||
|
||||
## Normalization gotchas
|
||||
- Akahu org is derived from the connection name (BNZ/Sharesies), not the source name.
|
||||
- Emoney transactions are hardcoded to vendor "Finance Now".
|
||||
- Akahu descriptions with "INTERNET XFR" are rewritten to include the counterparty info and meta fields.
|
||||
38
main.py
38
main.py
|
|
@ -1,5 +1,6 @@
|
|||
from IngestionService.ingester import Ingester
|
||||
from config import load_env
|
||||
import os
|
||||
|
||||
load_env("IngestionService")
|
||||
|
||||
|
|
@ -7,21 +8,28 @@ if __name__ == "__main__":
|
|||
ingester = Ingester()
|
||||
ingester.test_db_connection()
|
||||
|
||||
akahu_accounts = ingester.fetch_akahu_snapshot_data()
|
||||
print("Akahu accounts fetched:", len(akahu_accounts.get("items", [])))
|
||||
ingester.write_akahu_snapshot_data(akahu_accounts)
|
||||
run_akahu = os.getenv("RUN_AKAHU", "true").strip().lower() in {"1", "true", "yes"}
|
||||
run_emoney = os.getenv("RUN_EMONEY", "true").strip().lower() in {"1", "true", "yes"}
|
||||
run_normalize = os.getenv("RUN_NORMALIZE", "true").strip().lower() in {"1", "true", "yes"}
|
||||
|
||||
backfill_start = "2026-01-01"
|
||||
backfill_end = "2026-12-31"
|
||||
akahu_backfill = ingester.backfill_akahu_transactions(backfill_start, backfill_end)
|
||||
print("Akahu backfill transactions:", len(akahu_backfill.get("items", [])))
|
||||
if run_akahu:
|
||||
akahu_accounts = ingester.fetch_akahu_snapshot_data()
|
||||
#print("Akahu accounts fetched:", len(akahu_accounts.get("items", [])))
|
||||
ingester.write_akahu_snapshot_data(akahu_accounts)
|
||||
|
||||
emoney_data = ingester.fetch_emoney_data()
|
||||
emoney_snapshot = emoney_data.get("snapshot") or {"accounts": []}
|
||||
emoney_transactions = emoney_data.get("transactions") or []
|
||||
print("Emoney snapshot accounts:", len(emoney_snapshot.get("accounts", [])))
|
||||
ingester.write_emoney_snapshot_data(emoney_snapshot)
|
||||
print("Emoney transactions:", len(emoney_transactions))
|
||||
ingester.write_emoney_transaction_data(emoney_transactions)
|
||||
#backfill_start = "2026-01-01"
|
||||
#backfill_end = "2026-12-31"
|
||||
#akahu_backfill = ingester.backfill_akahu_transactions(backfill_start, backfill_end)
|
||||
#print("Akahu backfill transactions:", len(akahu_backfill.get("items", [])))
|
||||
|
||||
ingester.normalize_pending_data()
|
||||
if run_emoney:
|
||||
emoney_data = ingester.fetch_emoney_data()
|
||||
emoney_snapshot = emoney_data.get("snapshot") or {"accounts": []}
|
||||
emoney_transactions = emoney_data.get("transactions") or []
|
||||
#print("Emoney snapshot accounts:", len(emoney_snapshot.get("accounts", [])))
|
||||
ingester.write_emoney_snapshot_data(emoney_snapshot)
|
||||
#print("Emoney transactions:", len(emoney_transactions))
|
||||
ingester.write_emoney_transaction_data(emoney_transactions)
|
||||
|
||||
if run_normalize:
|
||||
ingester.normalize_pending_data()
|
||||
|
|
|
|||
Loading…
Reference in a new issue