beancount-cde-importer/beancount_cde_importer/__init__.py

185 lines
5.7 KiB
Python
Raw Normal View History

2022-02-03 00:05:59 +01:00
import csv
2022-02-05 19:39:01 +01:00
import re
from datetime import datetime, date
2022-02-03 00:05:59 +01:00
from itertools import islice
2022-02-05 19:39:01 +01:00
from typing import Any, Optional
2022-02-03 00:05:59 +01:00
2022-02-05 19:39:01 +01:00
from beancount.core import data # type: ignore
from beancount.core.amount import Amount # type: ignore
from beancount.core.number import Decimal # type: ignore
2022-02-03 00:05:59 +01:00
from beancount.ingest import cache, importer # type: ignore
INDEX_DATE = 0
INDEX_TRANSACTION_NUMBER = 1
INDEX_LABEL = 2
INDEX_DEBIT = 3
INDEX_CREDIT = 4
INDEX_DETAIL = 5
2022-02-05 19:39:01 +01:00
END_DATE_REGEX = "Date de fin de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"
START_DATE_REGEX = "Date de début de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"
2022-02-03 00:05:59 +01:00
def is_valid_header(header: list[str]) -> bool:
return (
header[INDEX_DATE] == "Date"
and header[INDEX_TRANSACTION_NUMBER] == "Numéro d'opération"
and header[INDEX_LABEL] == "Libellé"
and header[INDEX_DEBIT] == "Débit"
and header[INDEX_CREDIT] == "Crédit"
and header[INDEX_DETAIL] == "Détail"
)
2022-02-01 23:17:34 +01:00
2022-02-05 19:39:01 +01:00
def get_date(file: cache._FileMemo, regex: str) -> Optional[date]:
match: Optional[re.Match] = re.search(regex, file.head())
if match is None:
return None
date_str: Optional[str] = match.group(1)
if date_str is None:
return None
return datetime.strptime(date_str, "%d/%m/%Y").date()
def get_end_date(file: cache._FileMemo) -> Optional[date]:
return get_date(file, END_DATE_REGEX)
def get_start_date(file: cache._FileMemo) -> Optional[date]:
return get_date(file, START_DATE_REGEX)
2022-02-01 23:17:34 +01:00
class CaisseDEpargneImporter(importer.ImporterProtocol):
2022-02-05 19:39:01 +01:00
def __init__(self, account: str):
self.account = account
2022-02-03 00:05:59 +01:00
def identify(self, file: cache._FileMemo) -> bool:
try:
# NOTE: beancount.ingest.cache._FileMemo handles automatic encoding
# detection
lines: list[str] = file.head().splitlines()
csv_reader = csv.reader(
lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
)
# header is actually on the 5th line, the previous ones contain
# miscellaneous information
header: Optional[list[str]] = next(islice(csv_reader, 4, None))
if header is None:
return False
return is_valid_header(header)
2022-02-01 23:17:34 +01:00
2022-02-03 00:05:59 +01:00
except:
return False
2022-02-05 19:39:01 +01:00
def file_account(self, file: cache._FileMemo) -> Optional[str]:
return self.account
def file_name(self, file: cache._FileMemo) -> Optional[str]:
return "CaisseEpargne_Statement.csv"
def file_date(self, file: cache._FileMemo) -> Optional[date]:
return get_end_date(file)
def extract(self, file: cache._FileMemo, existing_entries=None) -> list[Any]:
directives: list[Any] = []
end_date: Optional[date] = get_end_date(file)
start_date: Optional[date] = get_start_date(file)
if end_date is None or start_date is None:
return directives
lines: list[str] = file.contents().splitlines()
csv_reader = csv.reader(
lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
)
# first 3 lines are useless
for _ in range(3):
next(csv_reader)
# 4th line is usually the final balance
row: Optional[list[str]] = next(csv_reader)
if row is None:
return directives
if row[0] == "Solde en fin de période":
meta = data.new_metadata(file.name, 4)
balance = Decimal(row[4].replace(",", "."))
directives.append(
data.Balance(
meta=meta,
date=end_date,
account=self.account,
amount=Amount(balance, "EUR"),
tolerance=None,
diff_amount=None,
)
)
# skip headings
next(csv_reader)
for index, row in enumerate(csv_reader):
lineno: int = index + 6 # entries start at line 6
meta = data.new_metadata(file.name, lineno)
2022-02-05 19:50:59 +01:00
if row[0] == "Solde en début de période":
balance = Decimal(row[4].replace(",", "."))
directives.append(
data.Balance(
meta=meta,
date=start_date,
account=self.account,
amount=Amount(balance, "EUR"),
tolerance=None,
diff_amount=None,
)
)
# should be the last line anyway
continue
2022-02-05 19:39:01 +01:00
transaction_date: date = datetime.strptime(
row[INDEX_DATE], "%d/%m/%y"
).date()
label: str = row[INDEX_LABEL]
debit: str = row[INDEX_DEBIT]
credit: str = row[INDEX_CREDIT]
postings: list[data.Posting] = []
if debit:
amount = Decimal(debit.replace(",", "."))
postings.append(
data.Posting(
self.account, Amount(amount, "EUR"), None, None, None, None
)
)
if credit:
amount = Decimal(credit.replace(",", "."))
postings.append(
data.Posting(
self.account, Amount(amount, "EUR"), None, None, None, None
)
)
directives.append(
data.Transaction(
meta,
transaction_date,
self.FLAG,
label,
"",
data.EMPTY_SET,
data.EMPTY_SET,
postings,
)
)
return directives