#!/usr/bin/env python3

"""
Python script for checking if all Ampersands in .csv journal abbreviation files are
unescaped. This convention is enforced to ensure that abbreviations of journal titles
can be done without error.

The script will raise a ValueError() in case escaped ampersands are found, and will
also provide the row and column in which they were found (1 -indexed). The script does
NOT automatically fix these errors. This should be done manually.

The script will automatically run whenever there is a push to the main branch of the
abbreviations repo (abbrv.jabref.org) using GitHub Actions.
"""

import os
import itertools

# Get all file names in journal folders
PATH_TO_JOURNALS = "./journals/"
fileNames = next(itertools.islice(os.walk(PATH_TO_JOURNALS), 0, None))[2]

# Store ALL locations of escaped ampersands so they can all be printed upon failure
errFileNames = []
errRows = []
errCols = []

for file in fileNames:
    if (file.endswith(".csv")):
        # For each .csv file in the folder, open in read mode
        with open(PATH_TO_JOURNALS + file, "r") as f:
            for i, line in enumerate(f):
                # For each line, if it has \&, store the fname, row and columns
                if ('\&' in line):
                    errFileNames.append(file)
                    errRows.append(i + 1)
                    errCols.append(
                        [index + 1 for index in range(len(line)) if line.startswith('\&', index)])


# In the case where we do find escaped &, the len() will be non-zero
if (len(errFileNames) > 0):
    err_msg = "["
    # For each file, append every row:col location to the error message
    for i, fname in enumerate(errFileNames):
        for col in errCols[i]:
            err_msg += "(" + fname + ", " + \
                str(errRows[i]) + ":" + str(col) + "), "
    # Format end of string and return as Value Error to 'fail' GitHub Actions process
    err_msg = err_msg[:len(err_msg) - 2]
    err_msg += "]"
    raise ValueError("Found Escaped Ampersands at: " + err_msg)