38 lines
1.4 KiB
Python
Executable File
38 lines
1.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import pandas as pd
|
|
import csv
|
|
import requests
|
|
from io import StringIO
|
|
|
|
file_in = "https://mathscinet.ams.org/msnhtml/annser.csv"
|
|
file_out = "journals/journal_abbreviations_mathematics.csv"
|
|
# set headers to mimic browser request
|
|
headers = {
|
|
'sec-ch-ua': '"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
|
|
'sec-ch-ua-mobile': '?0',
|
|
'sec-ch-ua-platform': '"Windows"',
|
|
'upgrade-insecure-requests': '1',
|
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
|
}
|
|
response = requests.get(file_in, headers=headers)
|
|
|
|
if response.status_code == 200:
|
|
df_new = pd.read_csv(StringIO(response.text), usecols=[0, 1]).dropna()[["Full Title", "Abbrev"]]
|
|
else:
|
|
raise Exception(f"Failed to fetch the file. Status code: {response.status_code}")
|
|
|
|
# Get our last mathematics data file
|
|
df_old = pd.read_csv(file_out, sep=",", escapechar="\\",
|
|
header=None, names=["Full Title", "Abbrev"])
|
|
|
|
# Concatenate, remove duplicates and sort by journal name
|
|
df = pd.concat([df_new, df_old], axis=0).drop_duplicates(
|
|
).sort_values(by=["Full Title", "Abbrev"])
|
|
|
|
# Remove values where journal name is equal to abbreviation
|
|
df = df[df["Full Title"].str.lower() != df["Abbrev"].str.lower()]
|
|
|
|
# Save the end file in the same path as the old one
|
|
df.to_csv(file_out, sep=",", escapechar="\\", index=False, header=False, quoting=csv.QUOTE_ALL)
|