SoftwareQuality/jabref/buildres/abbrv.jabref.org/scripts/update_ubc.py

import csv
import json
import requests
from bs4 import BeautifulSoup

file_out = "journals/journal_abbreviations_ubc.csv"


def fetch_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        json_data = response.text.split("(", 1)[1].rsplit(")", 1)[0]
        return json.loads(json_data)["html"]
    except Exception as e:
        print("Error:", e)
        return None


def parse_html(html_content):
    soup = BeautifulSoup(html_content, "html.parser")
    journal_dict = {
        row.find_all("td")[1].get_text(strip=True): row.find_all("td")[0].get_text(
            strip=True
        )
        for row in soup.find_all("tr")
        if len(row.find_all("td")) == 2
    }
    return dict(sorted(journal_dict.items()))


def save_file(data, filename):
    with open(filename, "w", newline="", encoding="utf-8") as csv_file:
        writer = csv.writer(csv_file, delimiter=",", quoting=1)
        for full_name, abbreviation in data.items():
            if full_name and abbreviation:  # Remove empty rows
                writer.writerow([full_name, abbreviation])
    print(f"Journal abbreviation data saved as '{filename}'")


def main():
    url = "https://journal-abbreviations.library.ubc.ca/dump.php"
    html_content = fetch_data(url)
    if html_content:
        parsed_data = parse_html(html_content)
        if parsed_data:
            save_file(parsed_data, file_out)


if __name__ == "__main__":
    main()