SoftwareQuality/jabref/scripts/bib-file-generator.py

# Creates "large-library.bib" with 100k entries.

# The file is written in UTF8 and makes use of the unicode character U+0304 (https://www.compart.com/en/unicode/U+0304)
# to create an overline on large roman numbers using the technicue "Vinculum" (https://en.wikipedia.org/wiki/Roman_numerals#Vinculum).
# The numbers are used in the journal title.

# For pseudonymization BibTeX files, org.jabref.logic.pseudonymization.PseudonymizationTest#pseudonymizeLibraryFiley can be used.

number_of_entries = 100_000

# Adapted from: https://stackoverflow.com/a/50012689/873282
def int_to_roman(num):
    _values = [
        1000000, 900000, 500000, 400000, 100000, 90000, 50000, 40000, 10000, 9000, 5000, 4000, 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]

    _strings = [
        'M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', "M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"]

    result = ""
    decimal = num

    while decimal > 0:
        for i in range(len(_values)):
            if decimal >= _values[i]:
                if _values[i] > 1000:
                    result += u'\u0304'.join(list(_strings[i])) + u'\u0304'
                else:
                    result += _strings[i]
                decimal -= _values[i]
                break
    return result

with open("generated-large-library.bib", 'w', encoding='utf-8') as file:
    for i in range(1, number_of_entries + 1):
        year = 1900 + (i - 1) % (2025 - 1900)
        entry = f"""@article{{id{i:06d},
  title   = {{This is my title{i}}},
  author  = {{FirstnameA{i} LastnameA{i} and FirstnameB{i} LastnameB{i} and FirstnameC{i} LastnameC{i}}},
  journal = {{Journal Title {int_to_roman(i)}}},
  volume  = {{{i}}},
  year    = {{{year}}},
}}

"""
        file.write(entry)