389 lines
14 KiB
Python
389 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
from datetime import date, datetime
|
|
from typing import Any, Iterable
|
|
|
|
|
|
class DataNormalizer:
|
|
def __init__(self, dbconnection):
|
|
self.dbconnection = dbconnection
|
|
self._akahu_account_cache = self._build_akahu_account_cache()
|
|
|
|
def read_raw_transactions(self, source: str | None = None, limit: int | None = None) -> list[dict[str, Any]]:
|
|
sql = "SELECT id, data, source FROM rawtransactions WHERE processed = FALSE"
|
|
params: list[Any] = []
|
|
if source:
|
|
sql += " AND source = %s"
|
|
params.append(source)
|
|
sql += " ORDER BY received_at ASC"
|
|
if limit is not None:
|
|
sql += " LIMIT %s"
|
|
params.append(limit)
|
|
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute(sql, params)
|
|
rows = cursor.fetchall()
|
|
|
|
return [{"id": row[0], "data": row[1], "source": row[2]} for row in rows]
|
|
|
|
def read_raw_snapshots(self, source: str | None = None, limit: int | None = None) -> list[dict[str, Any]]:
|
|
sql = "SELECT id, data, source FROM rawsnapshots WHERE processed = FALSE"
|
|
params: list[Any] = []
|
|
if source:
|
|
sql += " AND source = %s"
|
|
params.append(source)
|
|
sql += " ORDER BY received_at ASC"
|
|
if limit is not None:
|
|
sql += " LIMIT %s"
|
|
params.append(limit)
|
|
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute(sql, params)
|
|
rows = cursor.fetchall()
|
|
|
|
return [{"id": row[0], "data": row[1], "source": row[2]} for row in rows]
|
|
|
|
def normalize_transactions(self, records: Iterable[dict[str, Any]]) -> None:
|
|
for record in records:
|
|
normalized = self._normalize_transaction(record.get("data"), record.get("source"))
|
|
if not normalized:
|
|
continue
|
|
self._write_transaction(normalized)
|
|
self._mark_processed("rawtransactions", record["id"])
|
|
|
|
self.dbconnection.commit()
|
|
|
|
def normalize_snapshots(self, records: Iterable[dict[str, Any]]) -> None:
|
|
for record in records:
|
|
normalized = self._normalize_snapshot(record.get("data"), record.get("source"))
|
|
if not normalized:
|
|
continue
|
|
self._write_snapshot(normalized)
|
|
self._mark_processed("rawsnapshots", record["id"])
|
|
|
|
self.dbconnection.commit()
|
|
|
|
def _normalize_transaction(self, data: Any, source: str | None) -> dict[str, Any] | None:
|
|
if not isinstance(data, dict):
|
|
return None
|
|
|
|
if source == "emoney":
|
|
return self._normalize_emoney_transaction(data)
|
|
if source == "akahu":
|
|
return self._normalize_akahu_transaction(data)
|
|
|
|
return None
|
|
|
|
def _normalize_snapshot(self, data: Any, source: str | None) -> dict[str, Any] | None:
|
|
if not isinstance(data, dict):
|
|
return None
|
|
|
|
if source == "emoney":
|
|
return self._normalize_emoney_snapshot(data)
|
|
if source == "akahu":
|
|
return self._normalize_akahu_snapshot(data)
|
|
|
|
return None
|
|
|
|
def _normalize_emoney_transaction(self, data: dict[str, Any]) -> dict[str, Any] | None:
|
|
required = {"date", "description", "amount"}
|
|
if not required.issubset(data):
|
|
return None
|
|
|
|
parsed_date = self._parse_date(data.get("date"))
|
|
if not parsed_date:
|
|
return None
|
|
|
|
return {
|
|
"datetime": parsed_date,
|
|
"description": data.get("description"),
|
|
"amount": float(data.get("amount")),
|
|
"account_name": "Emoney",
|
|
"account_num": "emoney",
|
|
"org_name": "Emoney",
|
|
"vendor_name": "Finance Now",
|
|
}
|
|
|
|
def _normalize_emoney_snapshot(self, data: dict[str, Any]) -> dict[str, Any] | None:
|
|
if "balance" not in data:
|
|
return None
|
|
|
|
parsed_date = self._parse_date(data.get("date")) or date.today()
|
|
return {
|
|
"datetime": parsed_date,
|
|
"balance": float(data.get("balance")),
|
|
"account_name": "Emoney",
|
|
"account_num": "emoney",
|
|
"org_name": "Emoney",
|
|
}
|
|
|
|
def _normalize_akahu_transaction(self, data: dict[str, Any]) -> dict[str, Any] | None:
|
|
if "amount" not in data or "description" not in data:
|
|
return None
|
|
|
|
parsed_date = self._parse_date(data.get("date") or data.get("created_at"))
|
|
if not parsed_date:
|
|
return None
|
|
|
|
account_id = self._string_or_none(data.get("_account") or data.get("account_id") or data.get("account"))
|
|
account_meta = self._akahu_account_cache.get(account_id or "", {})
|
|
account_num = account_meta.get("account_num") or account_id
|
|
account_name = account_meta.get("account_name") or self._string_or_none(data.get("account_name") or data.get("name"))
|
|
org_name = account_meta.get("org_name") or "unknown"
|
|
|
|
merchant = data.get("merchant")
|
|
if isinstance(merchant, dict):
|
|
vendor_name = self._string_or_none(merchant.get("name"))
|
|
else:
|
|
vendor_name = self._string_or_none(merchant)
|
|
if not vendor_name:
|
|
vendor_name = self._string_or_none(data.get("payee"))
|
|
if not vendor_name:
|
|
vendor_name = self._string_or_none(data.get("description"))
|
|
|
|
description = self._format_akahu_description(data)
|
|
|
|
return {
|
|
"datetime": parsed_date,
|
|
"description": description,
|
|
"amount": float(data.get("amount")),
|
|
"account_name": account_name,
|
|
"account_num": account_num,
|
|
"org_name": org_name,
|
|
"vendor_name": vendor_name,
|
|
}
|
|
|
|
def _normalize_akahu_snapshot(self, data: dict[str, Any]) -> dict[str, Any] | None:
|
|
if "balance" not in data and "current_balance" not in data:
|
|
return None
|
|
|
|
parsed_date = self._parse_date(data.get("date") or data.get("updated_at")) or date.today()
|
|
balance_value = self._parse_balance_value(data.get("balance") or data.get("current_balance"))
|
|
if balance_value is None:
|
|
return None
|
|
|
|
account_num = self._string_or_none(data.get("formatted_account") or data.get("_id") or data.get("account_id"))
|
|
connection = data.get("connection") if isinstance(data.get("connection"), dict) else {}
|
|
org_name = self._string_or_none(connection.get("name")) or "unknown"
|
|
return {
|
|
"datetime": parsed_date,
|
|
"balance": balance_value,
|
|
"account_name": self._string_or_none(data.get("name") or data.get("account_name")),
|
|
"account_num": account_num,
|
|
"org_name": org_name,
|
|
}
|
|
|
|
def _build_akahu_account_cache(self) -> dict[str, dict[str, str]]:
|
|
cache: dict[str, dict[str, str]] = {}
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute(
|
|
"SELECT data FROM rawsnapshots WHERE source = %s",
|
|
("akahu",)
|
|
)
|
|
rows = cursor.fetchall()
|
|
|
|
for (data,) in rows:
|
|
if not isinstance(data, dict):
|
|
continue
|
|
account_id = self._string_or_none(data.get("_id") or data.get("id"))
|
|
if not account_id:
|
|
continue
|
|
account_num = self._string_or_none(data.get("formatted_account") or data.get("_id") or data.get("account_id"))
|
|
account_name = self._string_or_none(data.get("name") or data.get("account_name"))
|
|
connection = data.get("connection") if isinstance(data.get("connection"), dict) else {}
|
|
org_name = self._string_or_none(connection.get("name")) or "unknown"
|
|
cache[account_id] = {
|
|
"account_num": account_num or account_id,
|
|
"account_name": account_name or "unknown",
|
|
"org_name": org_name,
|
|
}
|
|
|
|
return cache
|
|
|
|
def _format_akahu_description(self, data: dict[str, Any]) -> str:
|
|
description = self._string_or_none(data.get("description")) or "unknown"
|
|
meta = data.get("meta") if isinstance(data.get("meta"), dict) else {}
|
|
|
|
other_account = self._string_or_none(meta.get("other_account"))
|
|
reference = self._string_or_none(meta.get("reference"))
|
|
particulars = self._string_or_none(meta.get("particulars"))
|
|
code = self._string_or_none(meta.get("code"))
|
|
|
|
if "INTERNET XFR" in description:
|
|
target = other_account or reference or particulars or code
|
|
if target:
|
|
description = description.replace("INTERNET XFR", f"-> {target}")
|
|
|
|
meta_bits: list[str] = []
|
|
if reference:
|
|
meta_bits.append(f"ref={reference}")
|
|
if particulars:
|
|
meta_bits.append(f"particulars={particulars}")
|
|
if code:
|
|
meta_bits.append(f"code={code}")
|
|
if other_account:
|
|
meta_bits.append(f"other={other_account}")
|
|
|
|
if meta_bits:
|
|
description = f"{description} | " + " | ".join(meta_bits)
|
|
|
|
return description
|
|
|
|
def _write_transaction(self, normalized: dict[str, Any]) -> None:
|
|
org_id = self._get_or_create_org(normalized.get("org_name"))
|
|
account_id = self._get_or_create_account(
|
|
normalized.get("account_num"),
|
|
normalized.get("account_name"),
|
|
org_id,
|
|
)
|
|
vendor_id = None
|
|
vendor_name = normalized.get("vendor_name")
|
|
if vendor_name:
|
|
vendor_id = self._get_or_create_vendor(vendor_name, org_id)
|
|
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO transactions (datetime, description, amount, accountid, orgid, vendorid)
|
|
VALUES (%s, %s, %s, %s, %s, %s)
|
|
""",
|
|
(
|
|
normalized.get("datetime"),
|
|
normalized.get("description"),
|
|
normalized.get("amount"),
|
|
account_id,
|
|
org_id,
|
|
vendor_id,
|
|
),
|
|
)
|
|
|
|
def _write_snapshot(self, normalized: dict[str, Any]) -> None:
|
|
org_id = self._get_or_create_org(normalized.get("org_name"))
|
|
account_id = self._get_or_create_account(
|
|
normalized.get("account_num"),
|
|
normalized.get("account_name"),
|
|
org_id,
|
|
)
|
|
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO snapshots (datetime, accountid, balance, orgid)
|
|
VALUES (%s, %s, %s, %s)
|
|
""",
|
|
(
|
|
normalized.get("datetime"),
|
|
account_id,
|
|
normalized.get("balance"),
|
|
org_id,
|
|
),
|
|
)
|
|
|
|
def _mark_processed(self, table: str, record_id: int) -> None:
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute(
|
|
f"UPDATE {table} SET processed = TRUE WHERE id = %s",
|
|
(record_id,),
|
|
)
|
|
|
|
def _get_or_create_org(self, org_name: str | None) -> int:
|
|
org_name = org_name or "unknown"
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute("SELECT id FROM organizations WHERE orgname = %s", (org_name,))
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return row[0]
|
|
|
|
cursor.execute(
|
|
"INSERT INTO organizations (orgname) VALUES (%s) RETURNING id",
|
|
(org_name,),
|
|
)
|
|
return cursor.fetchone()[0]
|
|
|
|
def _get_or_create_account(self, account_num: str | None, account_name: str | None, org_id: int) -> int:
|
|
account_name = account_name or "unknown"
|
|
account_num = account_num or f"{org_id}:{account_name}"
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute("SELECT id FROM accounts WHERE accountnum = %s", (account_num,))
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return row[0]
|
|
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO accounts (accountnum, accountname, orgid)
|
|
VALUES (%s, %s, %s)
|
|
RETURNING id
|
|
""",
|
|
(account_num, account_name, org_id),
|
|
)
|
|
return cursor.fetchone()[0]
|
|
|
|
def _get_or_create_vendor(self, vendor_name: str, org_id: int) -> int:
|
|
if vendor_name == "Finance Now":
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute(
|
|
"SELECT id FROM vendors WHERE vendorname = %s",
|
|
(vendor_name,),
|
|
)
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return row[0]
|
|
with self.dbconnection.cursor() as cursor:
|
|
cursor.execute(
|
|
"SELECT id FROM vendors WHERE vendorname = %s AND orgid = %s",
|
|
(vendor_name, org_id),
|
|
)
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return row[0]
|
|
|
|
cursor.execute(
|
|
"INSERT INTO vendors (vendorname, orgid) VALUES (%s, %s) RETURNING id",
|
|
(vendor_name, org_id),
|
|
)
|
|
return cursor.fetchone()[0]
|
|
|
|
@staticmethod
|
|
def _parse_date(value: Any) -> date | None:
|
|
if isinstance(value, date) and not isinstance(value, datetime):
|
|
return value
|
|
if isinstance(value, datetime):
|
|
return value.date()
|
|
if isinstance(value, str):
|
|
for fmt in (
|
|
"%Y-%m-%d",
|
|
"%d-%m-%Y",
|
|
"%Y-%m-%dT%H:%M:%S%z",
|
|
"%Y-%m-%dT%H:%M:%SZ",
|
|
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
):
|
|
try:
|
|
return datetime.strptime(value, fmt).date()
|
|
except ValueError:
|
|
continue
|
|
return None
|
|
|
|
@staticmethod
|
|
def _string_or_none(value: Any) -> str | None:
|
|
if value is None:
|
|
return None
|
|
return str(value)
|
|
|
|
@staticmethod
|
|
def _parse_balance_value(value: Any) -> float | None:
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, (int, float)):
|
|
return float(value)
|
|
if isinstance(value, str):
|
|
try:
|
|
return float(value)
|
|
except ValueError:
|
|
return None
|
|
if isinstance(value, dict):
|
|
for key in ("amount", "value", "balance"):
|
|
if key in value:
|
|
return DataNormalizer._parse_balance_value(value.get(key))
|
|
return None
|