52 lines
1.4 KiB
Python
52 lines
1.4 KiB
Python
import csv
|
|
import json
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
file_out = "journals/journal_abbreviations_ubc.csv"
|
|
|
|
|
|
def fetch_data(url):
|
|
try:
|
|
response = requests.get(url)
|
|
response.raise_for_status()
|
|
json_data = response.text.split("(", 1)[1].rsplit(")", 1)[0]
|
|
return json.loads(json_data)["html"]
|
|
except Exception as e:
|
|
print("Error:", e)
|
|
return None
|
|
|
|
|
|
def parse_html(html_content):
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
journal_dict = {
|
|
row.find_all("td")[1].get_text(strip=True): row.find_all("td")[0].get_text(
|
|
strip=True
|
|
)
|
|
for row in soup.find_all("tr")
|
|
if len(row.find_all("td")) == 2
|
|
}
|
|
return dict(sorted(journal_dict.items()))
|
|
|
|
|
|
def save_file(data, filename):
|
|
with open(filename, "w", newline="", encoding="utf-8") as csv_file:
|
|
writer = csv.writer(csv_file, delimiter=",", quoting=1)
|
|
for full_name, abbreviation in data.items():
|
|
if full_name and abbreviation: # Remove empty rows
|
|
writer.writerow([full_name, abbreviation])
|
|
print(f"Journal abbreviation data saved as '{filename}'")
|
|
|
|
|
|
def main():
|
|
url = "https://journal-abbreviations.library.ubc.ca/dump.php"
|
|
html_content = fetch_data(url)
|
|
if html_content:
|
|
parsed_data = parse_html(html_content)
|
|
if parsed_data:
|
|
save_file(parsed_data, file_out)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|