import os
import csv
from dotenv import load_dotenv
from openai import OpenAI
from textwrap import dedent
# Import Pydantic for data validation and typing
from pydantic import BaseModel, Field
from typing import List
Generate Anki Flashcards
Setup
Instructions:
- Create a new virtual environment:
python -m venv .venv
- Activate the environment:
source .venv/bin/activate
- Install the dependencies:
pip install -r requirements.txt
- Create a
.env
file and add your OpenAI API key:
OPENAI_API_KEY=<your-openai-api-key>
OpenAI client and pydantic models
load_dotenv()
True
If you want to see the API key, you can uncomment the following line:
# print(os.getenv("OPENAI_API_KEY"))
Initialize OpenAI client:
= OpenAI() client
Define the AnkiFlashcard and AnkiDeck pydantic models:
- AnkiFlashcard: Represents a single flashcard with a question, answer, and tags
- AnkiDeck: Represents a collection of flashcards with a deck name
class AnkiFlashcard(BaseModel):
"""
Model representing a single Anki flashcard with question, answer, and tags.
"""
# Define required fields with descriptions
str = Field(..., description="The front side of the flashcard containing the question")
question: str = Field(..., description="The back side of the flashcard containing the answer")
answer: str] = Field(..., description="List of tags associated with the flashcard")
tags: List[
class AnkiDeck(BaseModel):
"""
Model representing a complete Anki deck containing multiple flashcards.
"""
# Define required fields with descriptions
= Field(..., description="List of flashcards in the deck")
cards: List[AnkiFlashcard] str = Field(..., description="Name of the Anki deck") deck_name:
Function to generate structured flashcards
The function takes a text input and generates a structured deck of Anki flashcards using the OpenAI API:
- Validates that the requested number of cards is at least 1
- Makes an API call to OpenAI with:
- System prompt that defines the flashcard creation task
- User prompt containing the input text
- Response format set to our AnkiDeck Pydantic model
- Returns the parsed response as a validated AnkiDeck object
def generate_deck(text: str, deck_name: str, num_cards: int = 5, model: str = "gpt-4o-mini") -> AnkiDeck:
"""
Generate structured flashcards using GPT-4o or GPT-4o-mini with enforced Pydantic model output.
Args:
text (str): The input text to generate flashcards from
deck_name (str): Name for the Anki deck
num_cards (int): Number of flashcards to generate (default: 5)
Returns:
AnkiDeck: A structured deck of flashcards with validation
Raises:
ValueError: If num_cards is less than 1
"""
# Validate input
if num_cards < 1:
raise ValueError("Number of cards must be at least 1")
# Make API call with structured output format
= client.beta.chat.completions.parse(
completion =model,
model=[
messages
{"role": "system",
"content": dedent(f"""
You are an expert at creating Anki flashcards. Your task is to:
1. Read the provided text
2. Create {num_cards} Anki flashcards that cover the main concepts
3. Add relevant tags to each flashcard
4. Structure the output as an Anki deck with the name "{deck_name}".""")
},
{"role": "user",
"content": f"Please create Anki flashcards for the following text: {text}"
}
],=AnkiDeck,
response_format
)
# Return the parsed response
return completion.choices[0].message.parsed
Test the function
Weโll first test the function with the baroque essay:
with open("assets/essays/baroque.md", "r") as file:
= file.read() baroque
print the first 5 lines of the baroque essay:
print('\n'.join(baroque.splitlines()[:5]))
# The Baroque Era: Birth of Musical Drama (1600-1750)
The Baroque era represents one of the most transformative periods in Western musical history, marked by dramatic innovations in both compositional technique and musical expression. This period witnessed the birth of opera, the development of instrumental programs, and the establishment of musical practices that would influence composers for centuries to come.
## The Birth of Opera and Dramatic Expression
Call the function with the baroque essay and print the resulting deck. We are using the gpt-4o-mini
model to generate a deck containing 5 flashcards.
= generate_deck(baroque, "Baroque Period", num_cards=5, model="gpt-4o-mini") baroque_deck
baroque_deck
AnkiDeck(cards=[AnkiFlashcard(question='What significant musical genre was born during the Baroque era?', answer='Opera was born during the Baroque era, particularly through the efforts of the Camerata in Florence.', tags=['Baroque Era', 'Opera', 'Musical Innovation']), AnkiFlashcard(question='Which composition is considered the first masterpiece of opera?', answer="Monteverdi's 'L'Orfeo' (1607) is regarded as the first masterpiece of opera.", tags=['Baroque Era', "L'Orfeo", 'Opera']), AnkiFlashcard(question='What musical practice was established during the Baroque era to support harmonic structure?', answer='The practice of basso continuo was established, providing a bass line with chord symbols as the harmonic foundation.', tags=['Baroque Era', 'Basso Continuo', 'Harmony']), AnkiFlashcard(question='Who are two significant composers associated with the Baroque period?', answer='J.S. Bach and Jean-Baptiste Lully were significant composers of the Baroque period.', tags=['Baroque Era', 'J.S. Bach', 'Jean-Baptiste Lully']), AnkiFlashcard(question="What impact did the Baroque era have on Western music's future?", answer='The Baroque era established functional harmony and important musical forms that influenced Classical and Romantic music.', tags=['Baroque Era', 'Musical Legacy', 'Functional Harmony'])], deck_name='Baroque Period')
You can also access the individual cards in the deck:
baroque_deck.cards
[AnkiFlashcard(question='What significant musical genre was born during the Baroque era?', answer='Opera was born during the Baroque era, particularly through the efforts of the Camerata in Florence.', tags=['Baroque Era', 'Opera', 'Musical Innovation']),
AnkiFlashcard(question='Which composition is considered the first masterpiece of opera?', answer="Monteverdi's 'L'Orfeo' (1607) is regarded as the first masterpiece of opera.", tags=['Baroque Era', "L'Orfeo", 'Opera']),
AnkiFlashcard(question='What musical practice was established during the Baroque era to support harmonic structure?', answer='The practice of basso continuo was established, providing a bass line with chord symbols as the harmonic foundation.', tags=['Baroque Era', 'Basso Continuo', 'Harmony']),
AnkiFlashcard(question='Who are two significant composers associated with the Baroque period?', answer='J.S. Bach and Jean-Baptiste Lully were significant composers of the Baroque period.', tags=['Baroque Era', 'J.S. Bach', 'Jean-Baptiste Lully']),
AnkiFlashcard(question="What impact did the Baroque era have on Western music's future?", answer='The Baroque era established functional harmony and important musical forms that influenced Classical and Romantic music.', tags=['Baroque Era', 'Musical Legacy', 'Functional Harmony'])]
Print all the cards in the deck:
for card in baroque_deck.cards:
print(f"Question: {card.question}")
print(f"Answer: {card.answer}")
print(f"Tags: {', '.join(card.tags)}")
print("-" * 20)
Question: What significant musical genre was born during the Baroque era?
Answer: Opera was born during the Baroque era, particularly through the efforts of the Camerata in Florence.
Tags: Baroque Era, Opera, Musical Innovation
--------------------
Question: Which composition is considered the first masterpiece of opera?
Answer: Monteverdi's 'L'Orfeo' (1607) is regarded as the first masterpiece of opera.
Tags: Baroque Era, L'Orfeo, Opera
--------------------
Question: What musical practice was established during the Baroque era to support harmonic structure?
Answer: The practice of basso continuo was established, providing a bass line with chord symbols as the harmonic foundation.
Tags: Baroque Era, Basso Continuo, Harmony
--------------------
Question: Who are two significant composers associated with the Baroque period?
Answer: J.S. Bach and Jean-Baptiste Lully were significant composers of the Baroque period.
Tags: Baroque Era, J.S. Bach, Jean-Baptiste Lully
--------------------
Question: What impact did the Baroque era have on Western music's future?
Answer: The Baroque era established functional harmony and important musical forms that influenced Classical and Romantic music.
Tags: Baroque Era, Musical Legacy, Functional Harmony
--------------------
Write deck to a CSV file
Weโll create a function to write the deck to a CSV file. The function takes an AnkiDeck object and a path to save the CSV file. It ensures the directory exists and writes the deck to a CSV file with the following columns: Question, Answer, Tags.
def write_deck_to_csv(deck: AnkiDeck, output_path: str) -> None:
"""
Save an Anki deck to a CSV file.
Args:
deck (AnkiDeck): The deck of flashcards to save
output_path (str): Path where the CSV file should be saved
"""
# Ensure the directory exists
=True)
os.makedirs(os.path.dirname(output_path), exist_ok
with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
= csv.writer(csvfile)
writer # Write header row
'Question', 'Answer', 'Tags'])
writer.writerow([# Write each flashcard as a row in the CSV
for card in deck.cards:
', '.join(card.tags)])
writer.writerow([card.question, card.answer, print(f"Successfully saved deck '{deck.deck_name}' to {output_path}")
Save the baroque deck to a CSV file:
'assets/flashcards/baroque-flashcards.csv') write_deck_to_csv(baroque_deck,
Successfully saved deck 'Baroque Period' to assets/flashcards/baroque-flashcards.csv
Process all markdown files in essays
Now weโll process all the markdown files in the essays
directory and generate flashcards for each one.
For simplicity, we will loop over all files. This could be made more efficient by using a function that takes a directory as an argument and returns a list of decks.
First, weโll import the glob module for finding pathnames matching a pattern.
import glob
Find all the markdown files in the essays
directory:
= glob.glob("assets/essays/*.md")
essay_files essay_files
['assets/essays/classical.md',
'assets/essays/modern.md',
'assets/essays/baroque.md',
'assets/essays/romantic.md']
Now weโll loop over all the files and generate flashcards for each one.
for essay_file in essay_files:
# Get the base filename without extension to use as deck name
= os.path.basename(essay_file).replace('.md', '')
base_name = base_name.replace('-', ' ').title()
deck_name
print(f"Processing {deck_name}...")
# Read the essay content
with open(essay_file, "r") as file:
= file.read()
content
# Generate deck
= generate_deck(content, deck_name, num_cards=5, model="gpt-4o-mini")
deck
# Save to CSV
= f'assets/flashcards/{base_name}-flashcards.csv'
output_path
write_deck_to_csv(deck, output_path)
print(f"Saved flashcards to {output_path}")
Processing Classical...
Successfully saved deck 'Classical' to assets/flashcards/classical-flashcards.csv
Saved flashcards to assets/flashcards/classical-flashcards.csv
Processing Modern...
Successfully saved deck 'Modern' to assets/flashcards/modern-flashcards.csv
Saved flashcards to assets/flashcards/modern-flashcards.csv
Processing Baroque...
Successfully saved deck 'Baroque' to assets/flashcards/baroque-flashcards.csv
Saved flashcards to assets/flashcards/baroque-flashcards.csv
Processing Romantic...
Successfully saved deck 'Romantic' to assets/flashcards/romantic-flashcards.csv
Saved flashcards to assets/flashcards/romantic-flashcards.csv