From 95d133f241cb3b54ada4fdece7fd3095c9aee3e9 Mon Sep 17 00:00:00 2001
From: Saeed Rasooli <saeed.gnu@gmail.com>
Date: Sat, 7 Dec 2024 23:04:13 +0330
Subject: [PATCH] stardict: some refactoring in writer.py

---
 pyglossary/plugins/stardict/writer.py | 67 ++++++++++-----------------
 1 file changed, 24 insertions(+), 43 deletions(-)

diff --git a/pyglossary/plugins/stardict/writer.py b/pyglossary/plugins/stardict/writer.py
index b5913fb24..208ad052b 100644
--- a/pyglossary/plugins/stardict/writer.py
+++ b/pyglossary/plugins/stardict/writer.py
@@ -133,6 +133,9 @@ def open(self, filename: str) -> None:
 	def write(self) -> Generator[None, EntryType, None]:
 		from pyglossary.os_utils import runDictzip
 
+		if not isdir(self._resDir):
+			os.mkdir(self._resDir)
+
 		if self._sametypesequence:
 			if self._merge_syns:
 				yield from self.writeCompactMergeSyns(self._sametypesequence)
@@ -207,8 +210,6 @@ def writeCompact(self, defiFormat: str) -> Generator[None, EntryType, None]:
 
 		t0 = now()
 		wordCount = 0
-		if not isdir(self._resDir):
-			os.mkdir(self._resDir)
 
 		entryIndex = -1
 		while True:
@@ -221,26 +222,22 @@ def writeCompact(self, defiFormat: str) -> Generator[None, EntryType, None]:
 			entryIndex += 1
 
 			words = entry.l_word  # list of strs
-			word = words[0]  # str
-			defi = self.fixDefi(entry.defi, defiFormat)
-			# defi is str
 
 			for alt in words[1:]:
 				altIndexList.append((alt.encode("utf-8"), entryIndex))
 
-			b_dictBlock = defi.encode("utf-8")
+			b_dictBlock = self.fixDefi(entry.defi, defiFormat).encode("utf-8")
 			dictFile.write(b_dictBlock)
-			blockLen = len(b_dictBlock)
 
 			b_idxBlock = (
-				word.encode("utf-8")
+				words[0].encode("utf-8")
 				+ b"\x00"
 				+ dictMarkToBytes(dictMark)
-				+ uint32ToBytes(blockLen)
+				+ uint32ToBytes(len(b_dictBlock))
 			)
 			idxFile.write(b_idxBlock)
 
-			dictMark += blockLen
+			dictMark += len(b_dictBlock)
 			wordCount += 1
 
 			if dictMark > dictMarkMax:
@@ -277,8 +274,6 @@ def writeGeneral(self) -> Generator[None, EntryType, None]:
 		t0 = now()
 		wordCount = 0
 		defiFormatCounter: typing.Counter[str] = Counter()
-		if not isdir(self._resDir):
-			os.mkdir(self._resDir)
 
 		dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc()
 
@@ -300,26 +295,23 @@ def writeGeneral(self) -> Generator[None, EntryType, None]:
 				defiFormat = "m"
 
 			words = entry.l_word  # list of strs
-			word = words[0]  # str
-			defi = self.fixDefi(entry.defi, defiFormat)
-			# defi is str
 
 			for alt in words[1:]:
 				altIndexList.append((alt.encode("utf-8"), entryIndex))
 
-			b_dictBlock = (defiFormat + defi).encode("utf-8") + b"\x00"
+			b_defi = self.fixDefi(entry.defi, defiFormat).encode("utf-8")
+			b_dictBlock = defiFormat.encode("ascii") + b_defi + b"\x00"
 			dictFile.write(b_dictBlock)
-			blockLen = len(b_dictBlock)
 
 			b_idxBlock = (
-				word.encode("utf-8")
+				words[0].encode("utf-8")
 				+ b"\x00"
 				+ dictMarkToBytes(dictMark)
-				+ uint32ToBytes(blockLen)
+				+ uint32ToBytes(len(b_dictBlock))
 			)
 			idxFile.write(b_idxBlock)
 
-			dictMark += blockLen
+			dictMark += len(b_dictBlock)
 			wordCount += 1
 
 			if dictMark > dictMarkMax:
@@ -382,7 +374,6 @@ def writeCompactMergeSyns(
 		defiFormat - format of article definition: h - html, m - plain text
 		"""
 		log.debug(f"writeCompactMergeSyns: {defiFormat=}")
-		dictMark = 0
 
 		idxBlockList = self.newIdxList()
 		altIndexList = self.newSynList()
@@ -390,12 +381,11 @@ def writeCompactMergeSyns(
 		dictFile = open(self._filename + ".dict", "wb")
 
 		t0 = now()
-		if not isdir(self._resDir):
-			os.mkdir(self._resDir)
 
 		dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc()
 
 		entryIndex = -1
+		dictMark = 0
 		while True:
 			entry = yield
 			if entry is None:
@@ -406,19 +396,15 @@ def writeCompactMergeSyns(
 			entryIndex += 1
 
 			words = entry.l_word  # list of strs
-			word = words[0]  # str
-			defi = self.fixDefi(entry.defi, defiFormat)
-			# defi is str
 
-			b_dictBlock = defi.encode("utf-8")
+			b_dictBlock = self.fixDefi(entry.defi, defiFormat).encode("utf-8")
 			dictFile.write(b_dictBlock)
-			blockLen = len(b_dictBlock)
 
-			blockData = dictMarkToBytes(dictMark) + uint32ToBytes(blockLen)
+			blockData = dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock))
 			for word in words:
 				idxBlockList.append((word.encode("utf-8"), blockData))
 
-			dictMark += blockLen
+			dictMark += len(b_dictBlock)
 
 			if dictMark > dictMarkMax:
 				raise Error(
@@ -445,7 +431,6 @@ def writeGeneralMergeSyns(self) -> Generator[None, EntryType, None]:
 		sametypesequence option is not used.
 		"""
 		log.debug("writeGeneralMergeSyns")
-		dictMark = 0
 		idxBlockList = self.newIdxList()
 		altIndexList = self.newSynList()
 
@@ -454,12 +439,11 @@ def writeGeneralMergeSyns(self) -> Generator[None, EntryType, None]:
 		t0 = now()
 		wordCount = 0
 		defiFormatCounter: typing.Counter[str] = Counter()
-		if not isdir(self._resDir):
-			os.mkdir(self._resDir)
 
 		dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc()
 
 		entryIndex = -1
+		dictMark = 0
 		while True:
 			entry = yield
 			if entry is None:
@@ -477,19 +461,16 @@ def writeGeneralMergeSyns(self) -> Generator[None, EntryType, None]:
 				defiFormat = "m"
 
 			words = entry.l_word  # list of strs
-			word = words[0]  # str
-			defi = self.fixDefi(entry.defi, defiFormat)
-			# defi is str
 
-			b_dictBlock = (defiFormat + defi).encode("utf-8") + b"\x00"
+			b_defi = self.fixDefi(entry.defi, defiFormat).encode("utf-8")
+			b_dictBlock = defiFormat.encode("ascii") + b_defi + b"\x00"
 			dictFile.write(b_dictBlock)
-			blockLen = len(b_dictBlock)
 
-			blockData = dictMarkToBytes(dictMark) + uint32ToBytes(blockLen)
+			blockData = dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock))
 			for word in words:
 				idxBlockList.append((word.encode("utf-8"), blockData))
 
-			dictMark += blockLen
+			dictMark += len(b_dictBlock)
 
 			if dictMark > dictMarkMax:
 				raise Error(
@@ -564,9 +545,9 @@ def writeIfoFile(
 			ifo.append(("synwordcount", str(synWordCount)))
 
 		desc = glos.getInfo("description")
-		_copyright = glos.getInfo("copyright")
-		if _copyright:
-			desc = f"{_copyright}\n{desc}"
+		copyright_ = glos.getInfo("copyright")
+		if copyright_:
+			desc = f"{copyright_}\n{desc}"
 		publisher = glos.getInfo("publisher")
 		if publisher:
 			desc = f"Publisher: {publisher}\n{desc}"