Merge multiple exported collections from Zotero into a single one
Info
import os
import bibtexparser
def load_bib_file(file_path):
"""Load a .bib file and return its entries."""
with open(file_path, 'r', encoding='utf-8') as bib_file:
bib_database = bibtexparser.load(bib_file)
return bib_database
def merge_bib_databases(bib_db1, bib_db2):
"""Merge two bib databases, avoiding duplicates."""
merged_entries = bib_db1.entries.copy() # Start with all entries from the first file
existing_keys = {entry['ID'] for entry in bib_db1.entries} # Keep track of IDs to avoid duplicates
for entry in bib_db2.entries:
if entry['ID'] not in existing_keys:
merged_entries.append(entry) # Only add entries not already in the merged list
existing_keys.add(entry['ID']) # Track new IDs
return merged_entries
def save_merged_bib(merged_entries, output_file):
"""Save the merged entries to a .bib file."""
merged_bib_db = bibtexparser.bibdatabase.BibDatabase()
merged_bib_db.entries = merged_entries
with open(output_file, 'w', encoding='utf-8') as bib_file:
bibtexparser.dump(merged_bib_db, bib_file)
def merge_multiple_bib_files(repo_path, output_file):
"""Merge all .bib files in the repository path."""
# Get all files in the directory that match libraryN.bib pattern
bib_files = sorted([f for f in os.listdir(repo_path) if f.startswith('library') and f.endswith('.bib') and f != os.path.basename(output_file)])
if not bib_files:
print(f'No .bib files found in {repo_path}')
return
# Start by loading the first .bib file
merged_bib_db = load_bib_file(os.path.join(repo_path, bib_files[0]))
# Loop through the rest of the files and merge them
for bib_file in bib_files[1:]:
current_bib_db = load_bib_file(os.path.join(repo_path, bib_file))
merged_entries = merge_bib_databases(merged_bib_db, current_bib_db)
merged_bib_db.entries = merged_entries # Update the merged bib database
# Save the merged database to the output file
save_merged_bib(merged_bib_db.entries, output_file)
print(f'Merged {len(bib_files)} .bib files and saved to {output_file}')
if __name__ == "__main__":
repo_path = '/Users/niccolozanotti/Documents/GitHub/AI4climate.science-vault/References'
output_file = f'{repo_path}/library.bib'
merge_multiple_bib_files(repo_path, output_file)
Bibliographies:
library1.bib
: AIlibrary2.bib
: Climate modelinglibrary3.bib
: Data Assimilationlibrary4.bib
: BayesianNet Climate Project
Print full list of references from the merged library
import bibtexparser
def load_bib_file(file_path):
"""Load a .bib file and return its entries."""
with open(file_path, 'r', encoding='utf-8') as bib_file:
bib_database = bibtexparser.load(bib_file)
return bib_database.entries
def format_bib_entry(entry):
"""Format a single bib entry into markdown format."""
authors = entry.get('author', 'Unknown Author')
title = entry.get('title', 'No Title').replace('{', '').replace('}', '')
# Try to get the year, fallback to date if available
year = entry.get('year', entry.get('date', 'n.d.'))
journal = entry.get('journal', '') if 'journal' in entry else ''
publisher = entry.get('publisher', '') if 'publisher' in entry else ''
doi = entry.get('doi', '') if 'doi' in entry else ''
# Format in markdown style, e.g., Author (Year). Title. Journal/Publisher, DOI.
formatted_entry = f"- **{authors}** ({year}). *{title}*."
if journal:
formatted_entry += f" _{journal}_."
elif publisher:
formatted_entry += f" {publisher}."
if doi:
formatted_entry += f" [DOI: {doi}](https://doi.org/{doi})"
return formatted_entry
def print_markdown_references(bib_entries):
"""Print the formatted list of references in markdown."""
for entry in bib_entries:
print(format_bib_entry(entry))
# print()
def markdown_references_from_bib(file_path):
"""Load a .bib file and print markdown references."""
bib_entries = load_bib_file(file_path)
print_markdown_references(bib_entries)
# Example usage
if __name__ == "__main__":
repo_path = '/Users/niccolozanotti/Documents/GitHub/AI4climate.science-vault/References'
bib_file = f'{repo_path}/library.bib'
markdown_references_from_bib(bib_file)