38 lines
1.4 KiB
Python
Executable File

#!/usr/bin/env python3
import pandas as pd
import csv
import requests
from io import StringIO
file_in = "https://mathscinet.ams.org/msnhtml/annser.csv"
file_out = "journals/journal_abbreviations_mathematics.csv"
# set headers to mimic browser request
headers = {
'sec-ch-ua': '"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
}
response = requests.get(file_in, headers=headers)
if response.status_code == 200:
df_new = pd.read_csv(StringIO(response.text), usecols=[0, 1]).dropna()[["Full Title", "Abbrev"]]
else:
raise Exception(f"Failed to fetch the file. Status code: {response.status_code}")
# Get our last mathematics data file
df_old = pd.read_csv(file_out, sep=",", escapechar="\\",
header=None, names=["Full Title", "Abbrev"])
# Concatenate, remove duplicates and sort by journal name
df = pd.concat([df_new, df_old], axis=0).drop_duplicates(
).sort_values(by=["Full Title", "Abbrev"])
# Remove values where journal name is equal to abbreviation
df = df[df["Full Title"].str.lower() != df["Abbrev"].str.lower()]
# Save the end file in the same path as the old one
df.to_csv(file_out, sep=",", escapechar="\\", index=False, header=False, quoting=csv.QUOTE_ALL)