diff --git a/beancount_cde_importer/__init__.py b/beancount_cde_importer/__init__.py index d716c46..1938433 100644 --- a/beancount_cde_importer/__init__.py +++ b/beancount_cde_importer/__init__.py @@ -11,44 +11,20 @@ from beancount.core.number import Decimal # type: ignore from beancount.ingest import cache, importer # type: ignore -INDEX_DATE = 0 -INDEX_TRANSACTION_NUMBER = 1 -INDEX_LABEL = 2 -INDEX_DEBIT = 3 -INDEX_CREDIT = 4 -INDEX_DETAIL = 5 +COL_DATE = "Date de comptabilisation" +COL_LABEL = "Libelle operation" +COL_DEBIT = "Debit" +COL_CREDIT = "Credit" +COL_DETAIL = "Informations complementaires" END_DATE_REGEX = "Date de fin de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})" START_DATE_REGEX = "Date de début de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})" - -def is_valid_header(header: list[str]) -> bool: - return ( - header[INDEX_DATE] == "Date" - and header[INDEX_TRANSACTION_NUMBER] == "Numéro d'opération" - and header[INDEX_LABEL] == "Libellé" - and header[INDEX_DEBIT] == "Débit" - and header[INDEX_CREDIT] == "Crédit" - and header[INDEX_DETAIL] == "Détail" - ) +EXPECTED_HEADER = "Date de comptabilisation;Libelle simplifie;Libelle operation;Reference;Informations complementaires;Type operation;Categorie;Sous categorie;Debit;Credit;Date operation;Date de valeur;Pointage operation" -def get_date(file: cache._FileMemo, regex: str) -> Optional[date]: - match: Optional[re.Match] = re.search(regex, file.head()) - if match is None: - return None - date_str: Optional[str] = match.group(1) - if date_str is None: - return None - return datetime.strptime(date_str, "%d/%m/%Y").date() - - -def get_end_date(file: cache._FileMemo) -> Optional[date]: - return get_date(file, END_DATE_REGEX) - - -def get_start_date(file: cache._FileMemo) -> Optional[date]: - return get_date(file, START_DATE_REGEX) +def is_valid_header(header: str) -> bool: + return header == EXPECTED_HEADER class CDEImporter(importer.ImporterProtocol): @@ -60,16 +36,7 @@ class CDEImporter(importer.ImporterProtocol): # NOTE: beancount.ingest.cache._FileMemo handles automatic encoding # detection lines: list[str] = file.head().splitlines() - csv_reader = csv.reader( - lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE - ) - - # header is actually on the 5th line, the previous ones contain - # miscellaneous information - header: Optional[list[str]] = next(islice(csv_reader, 4, None)) - - if header is None: - return False + header: str = lines[0] return is_valid_header(header) except: @@ -82,73 +49,38 @@ class CDEImporter(importer.ImporterProtocol): return "CaisseEpargne_Statement.csv" def file_date(self, file: cache._FileMemo) -> Optional[date]: - return get_end_date(file) + lines: list[str] = file.contents().splitlines() + csv_reader: csv.DictReader = csv.DictReader( + lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE + ) + + row: Optional[dict[str, str]] = next(csv_reader) + if row is None: + return None + + return datetime.strptime( + row[COL_DATE], "%d/%m/%Y" + ).date() def extract(self, file: cache._FileMemo, existing_entries=None) -> list[Any]: directives: list[Any] = [] - end_date: Optional[date] = get_end_date(file) - start_date: Optional[date] = get_start_date(file) - if end_date is None or start_date is None: - return directives - lines: list[str] = file.contents().splitlines() - csv_reader = csv.reader( + csv_reader = csv.DictReader( lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE ) - # first 3 lines are useless - for _ in range(3): - next(csv_reader) - - # 4th line is usually the final balance - row: Optional[list[str]] = next(csv_reader) - if row is None: - return directives - - if row[0] == "Solde en fin de période": - meta = data.new_metadata(file.name, 4) - balance = Decimal(row[4].replace(",", ".")) - directives.append( - data.Balance( - meta=meta, - date=end_date, - account=self.account, - amount=Amount(balance, "EUR"), - tolerance=None, - diff_amount=None, - ) - ) - - # skip headings - next(csv_reader) - for index, row in enumerate(csv_reader): - lineno: int = index + 6 # entries start at line 6 + lineno: int = index + 2 # entries start at line 2 meta = data.new_metadata(file.name, lineno) - if row[0] == "Solde en début de période": - balance = Decimal(row[4].replace(",", ".")) - directives.append( - data.Balance( - meta=meta, - date=start_date, - account=self.account, - amount=Amount(balance, "EUR"), - tolerance=None, - diff_amount=None, - ) - ) - - # should be the last line anyway - continue - transaction_date: date = datetime.strptime( - row[INDEX_DATE], "%d/%m/%y" + row[COL_DATE], "%d/%m/%Y" ).date() - label: str = row[INDEX_LABEL] - debit: str = row[INDEX_DEBIT] - credit: str = row[INDEX_CREDIT] + label: str = row[COL_LABEL] + debit: str = row[COL_DEBIT] + credit: str = row[COL_CREDIT] + detail: str = row[COL_DETAIL] if row[COL_DETAIL] else "" postings: list[data.Posting] = [] @@ -174,7 +106,7 @@ class CDEImporter(importer.ImporterProtocol): transaction_date, self.FLAG, label, - "", + detail, data.EMPTY_SET, data.EMPTY_SET, postings,