Skip to content

Commit

Permalink
add support for anki
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Oct 3, 2024
1 parent 853a010 commit 8cb263b
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 3 deletions.
29 changes: 29 additions & 0 deletions docs/formats/anki.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
This page describes how to convert Anki cards to Markdown.

## General Information

- [Website](https://apps.ankiweb.net/)
- Typical extension: `.apkg`

## Instructions

1. Export as described [at the wiki](https://docs.ankiweb.net/exporting.html)
1. Choose "Deck (.apkg)"
2. [Install jimmy](../index.md#installation)
3. Convert to Markdown. Example: `jimmy-cli-linux MEILLEUR_DECK_ANGLAIS_3000.apkg --format anki`
4. [Import to your app](../import_instructions.md)

## Import Structure

- Decks are converted to folders.
- Cards are converted to Markdown files. The content is the "answer" data.
- Referenced resources (audio, images and other files) are converted.

A converted page looks like:

![](../images/anki_markdown_example.png)

## Known Limitations

- Nested decks are not tested and most likely not working.
- HTML formatting is lost. It's too complext to cover all HTML templates properly.
Binary file added docs/images/anki_markdown_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ nav:
- index.md
- Formats:
- Default Import: formats/default.md
- Anki: formats/anki.md
- Bear: formats/bear.md
- Cacher: formats/cacher.md
- CherryTree: formats/cherrytree.md
Expand Down Expand Up @@ -86,7 +87,7 @@ nav:
- Additional features:
- Filters: additional_features/filters.md
- Show the Note Tree: additional_features/show_note_tree.md
- Miscellaneous: miscellaneous.md
- Miscellaneous: additional_features/miscellaneous.md
- Contributing:
- How to Contribute?: contributing/contributing.md
- More Note Apps: contributing/more_note_apps.md
2 changes: 1 addition & 1 deletion src/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def prepare_input(self, input_: Path) -> Path:
return common.get_single_child_folder(temp_folder)
case ".jex" | ".tgz" | ".tar.gz":
return common.extract_tar(input_)
case ".nsx" | ".zip" | ".zkn3":
case ".apkg" | ".nsx" | ".zip" | ".zkn3":
return common.extract_zip(input_)
case _: # ".textbundle", folder
return input_
Expand Down
163 changes: 163 additions & 0 deletions src/formats/anki.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""Convert Anki cards to the intermediate format."""

from pathlib import Path
import json
import re
import sqlite3

import converter
import intermediate_format as imf


IMAGE_RE = re.compile(r"(<img src=\"(.*?)\"(?:>| >| \/>))")
SOUND_RE = re.compile(r"(\[.*?:(.*?)\])")


def get_images(body: str) -> list[tuple[str, str]]:
"""
>>> get_images('<img src="awake-55ab4bc5f5.jpg">')
[('<img src="awake-55ab4bc5f5.jpg">', 'awake-55ab4bc5f5.jpg')]
>>> get_images('<img src="prepositions_14.jpg" />')
[('<img src="prepositions_14.jpg" />', 'prepositions_14.jpg')]
"""
return IMAGE_RE.findall(body)


def get_sounds(body: str) -> list[tuple[str, str]]:
"""
>>> get_sounds("[sound:rec1430907056.mp3]")
[('[sound:rec1430907056.mp3]', 'rec1430907056.mp3')]
"""
return SOUND_RE.findall(body)


class Converter(converter.BaseConverter):
accepted_extensions = [".apkg"]

def convert(self, file_or_folder: Path):
# TODO
# pylint: disable=too-many-locals

if (self.root_path / "collection.anki21").is_file():
db_file = self.root_path / "collection.anki21"
elif (self.root_path / "collection.anki2").is_file():
db_file = self.root_path / "collection.anki2"
else:
self.logger.error("Couldn't find note database.")
return

media_dict = json.loads((self.root_path / "media").read_text(encoding="utf-8"))
# switch keys and values, because we need the names in the notes later
media_dict = {v: k for k, v in media_dict.items()}

conn = sqlite3.connect(db_file)
cur = conn.cursor()

# collection
# https://github.com/ankidroid/Anki-Android/wiki/Database-Structure#collection
collection = list(cur.execute("select * from col"))[0]
if collection[4] != 11:
self.logger.warning(
f"Only tested with version 11. Got version {collection[4]}"
)

# models
# https://github.com/ankidroid/Anki-Android/wiki/Database-Structure#models-jsonobjects
models = json.loads(collection[9])

# decks
# https://github.com/ankidroid/Anki-Android/wiki/Database-Structure#decks-jsonobjects
decks = json.loads(collection[10])
# TODO: nested decks
for deck_id, deck in decks.items():
self.root_notebook.child_notebooks.append(
imf.Notebook(deck["name"], original_id=str(deck_id))
)

# cards
# https://github.com/ankidroid/Anki-Android/wiki/Database-Structure#cards
note_deck_id_map = {}
for note_id, deck_id in cur.execute("select nid, did from cards"):
note_deck_id_map[str(note_id)] = str(deck_id)

# notes
# https://github.com/ankidroid/Anki-Android/wiki/Database-Structure#notes
for note_index, (
created,
original_id,
model_id,
updated,
tags,
data,
) in enumerate(cur.execute("select id, guid, mid, mod, tags, flds from notes")):
model = models[str(model_id)]
template_replacements = dict(
zip([f["name"] for f in model["flds"]], data.split("\x1f"))
)

# TODO: Templates are too complex for pandoc conversion.
# Just take the replacements for now.
# def replace(templ, replacements):
# for key, value in replacements.items():
# templ = templ.replace(f"{{{{{key}}}}}", value + " ")
# return templ

# for template in model["tmpls"]:
# front = replace(template["qfmt"], template_replacements)
# template_replacements["FrontSide"] = front

# # treat the backside as complete note
# back = (
# model["css"]
# + "\n\n"
# + replace(template["afmt"], template_replacements)
# )
# body = markdown_lib.common.markup_to_markdown(back)
body_md = "\n".join(
[f"- {key}: {value}" for key, value in template_replacements.items()]
)
# cleanup
body_md = (
body_md.replace("<br>\n", "\n")
.replace("&nbsp;", " ")
.replace("<div>", "")
.replace("</div>", "")
)

# find images, sounds and other attachments
resources = []
for text, filename_note in get_images(body_md) + get_sounds(body_md):
resources.append(
imf.Resource(
self.root_path / media_dict[filename_note],
text,
filename_note,
)
)

note_imf = imf.Note(
# TODO: Anki doesn't have note names. Find a robust note name.
# The index is a bit better readeable than the original_id.
f"note_{note_index:010}",
body_md,
original_id=str(original_id),
created=created,
updated=updated,
resources=resources,
tags=[imf.Tag(t) for t in tags.strip().split(" ") if t],
)

found_parent_notebook = False
parent_deck_id = note_deck_id_map.get(str(created))
for notebook in self.root_notebook.child_notebooks:
if notebook.original_id == parent_deck_id:
notebook.child_notes.append(note_imf)
found_parent_notebook = True
break
if not found_parent_notebook:
self.root_notebook.child_notes.append(note_imf)

# Don't export empty notebooks
self.root_notebook.child_notebooks = [
nb for nb in self.root_notebook.child_notebooks if not nb.is_empty()
]
4 changes: 4 additions & 0 deletions src/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ def safe_path(path: Path | str) -> Path | str:
if safe_name in forbidden_names:
safe_name += "_"

# Limit filename to 200 characters
# https://serverfault.com/a/9548
safe_name = safe_name[:200]

return safe_name if isinstance(path, str) else path.with_name(safe_name)


Expand Down
2 changes: 1 addition & 1 deletion test/data
Submodule data updated 322 files
3 changes: 3 additions & 0 deletions test/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ def compare_dirs(dir1: Path, dir2: Path):

@parameterized.expand(
[
[["anki/test_1/MEILLEUR_DECK_ANGLAIS_3000.apkg"]],
[["anki/test_2/Ukrainian_Prepositions_pictsaudio_ENG-UA__UA-ENG.apkg"]],
[["anki/test_3/Hebrew_Alphabet_with_vowels.apkg"]],
[["bear/test_1/backup.bear2bk"]],
[["bear/test_2/backup-2.bear2bk"]],
[["cacher/test_1/cacher-export-202406182304.json"]],
Expand Down

0 comments on commit 8cb263b

Please sign in to comment.