Skip to content

Commit

Permalink
change structure of RawEntryType completely, improves RAM usage for i…
Browse files Browse the repository at this point in the history
…ndirect mode
  • Loading branch information
ilius committed Dec 6, 2024
1 parent eda901a commit bcf4e11
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 27 deletions.
11 changes: 9 additions & 2 deletions pyglossary/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ def getRawEntrySortKey(
key: Callable[[bytes], Any],
) -> Callable[[RawEntryType], Any]:
def newKey(x: RawEntryType) -> Any:
# x is rawEntry, so x[0] is list[str]: list of words (entry.l_word)
return key(x[0]) # type: ignore
# x is rawEntry, so x[2:] is list[bytes]: list of words in bytes
return key([b.decode("utf-8") for b in x[2:]]) # type: ignore

return newKey

Expand Down Expand Up @@ -262,6 +262,13 @@ def l_word(self) -> list[str]:
return [self._word]
return self._word

@property
def lb_word(self) -> list[bytes]:
"""Returns list of the word and all the alternate words."""
if isinstance(self._word, str):
return [self._word.encode("utf-8")]
return [word.encode("utf-8") for word in self._word]

@property
def defi(self) -> str:
"""Returns string of definition."""
Expand Down
12 changes: 8 additions & 4 deletions pyglossary/glossary_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from collections.abc import (
Callable,
Iterator,
Sequence,
)

# -*- coding: utf-8 -*-
Expand All @@ -30,10 +31,10 @@

MultiStr: TypeAlias = "str | list[str]"

# different types in order:
# - without defiFormat
# - with defiFormat
RawEntryType: TypeAlias = tuple[list[str], bytes] | tuple[list[str], bytes, str]
# str(rawEntry[0]): defiFormat or ""
# rawEntry[1]: b_defi
# rawEntry[2:]: b_word_list
RawEntryType: TypeAlias = Sequence[bytes]


class EntryType(typing.Protocol): # noqa: PLR0904
Expand All @@ -56,6 +57,9 @@ def s_word(self) -> str: ...
@property
def l_word(self) -> list[str]: ...

@property
def lb_word(self) -> list[bytes]: ...

@property
def defi(self) -> str: ...

Expand Down
31 changes: 13 additions & 18 deletions pyglossary/glossary_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,11 +241,7 @@ def _dataEntryToRaw(self, entry: DataEntry) -> RawEntryType:
b_fpath = b""
if self.tmpDataDir:
b_fpath = entry.save(self.tmpDataDir).encode("utf-8")
return (
[entry.getFileName()],
b_fpath,
"b",
)
return (b"b", b_fpath, entry.getFileName().encode("utf-8"))

def _entryToRaw(self, entry: EntryType) -> RawEntryType:
"""
Expand All @@ -256,22 +252,21 @@ def _entryToRaw(self, entry: EntryType) -> RawEntryType:
return self._dataEntryToRaw(cast("DataEntry", entry))

defiFormat = entry.defiFormat
if defiFormat and defiFormat != self._defaultDefiFormat:
return (entry.l_word, entry.b_defi, defiFormat)
return (entry.l_word, entry.b_defi)
if defiFormat is None or defiFormat == self._defaultDefiFormat:
defiFormat = ""

return [defiFormat.encode("ascii"), entry.b_defi] + entry.lb_word

def _entryFromRaw(self, rawEntry: RawEntryType) -> EntryType:
word = rawEntry[0]
defiFormat = rawEntry[0].decode("ascii") or self._defaultDefiFormat
defi = rawEntry[1].decode("utf-8")
if len(rawEntry) > 2: # noqa: PLR2004
defiFormat = rawEntry[2]
if defiFormat == "b":
fname = word
if isinstance(fname, list):
fname = fname[0] # NESTED 4
return DataEntry(fname, tmpPath=defi) # pyright: ignore[reportReturnType]
else:
defiFormat = self._defaultDefiFormat
word = [b.decode("utf-8") for b in rawEntry[2:]]

if defiFormat == "b":
fname = word
if isinstance(fname, list):
fname = fname[0] # NESTED 4
return DataEntry(fname, tmpPath=defi) # pyright: ignore[reportReturnType]

return Entry(word, defi, defiFormat=defiFormat) # pyright: ignore[reportReturnType]

Expand Down
5 changes: 2 additions & 3 deletions pyglossary/sq_entry_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import logging
import os
from os.path import isfile
from pickle import dumps, loads
from typing import TYPE_CHECKING

from .glossary_utils import Error
Expand Down Expand Up @@ -116,10 +115,10 @@ def __len__(self) -> int:
return self._len

def _encode(self, entry: EntryType) -> bytes:
return dumps(self._entryToRaw(entry))
return b"\x00".join(self._entryToRaw(entry))

def _decode(self, data: bytes) -> EntryType:
return self._entryFromRaw(loads(data))
return self._entryFromRaw(data.split(b"\x00"))

def append(self, entry: EntryType) -> None:
self._cur.execute(
Expand Down

0 comments on commit bcf4e11

Please sign in to comment.