diff --git a/.gitignore b/.gitignore index a60fc4b..bdbee18 100644 --- a/.gitignore +++ b/.gitignore @@ -195,7 +195,8 @@ config.dev.ts # Ignore songs (we don't want them in tree) pages/songbook/songs -pages/songbook/songs.json +pages/songbook/song_index.json +songs.json venv package-lock.json diff --git a/scripts/songbook.ts b/scripts/songbook.ts index d6fe161..6aa09ec 100644 --- a/scripts/songbook.ts +++ b/scripts/songbook.ts @@ -1,5 +1,5 @@ // @ts-ignore -import songs from "../pages/songbook/songs.json"; +import songs from "../pages/songbook/song_index.json"; import {string_to_array, similiarity} from "../scripts/util/search"; import {debounce} from "../scripts/util/common"; diff --git a/util/build_songs.py b/util/build_songs.py index 4fa8d38..6447822 100644 --- a/util/build_songs.py +++ b/util/build_songs.py @@ -1,11 +1,10 @@ import os -import zipfile -import re import json import sys from urllib.request import urlretrieve from string import Template from pathlib import Path +import base64 ### LOCAL IMPORTS from tex_to_html import tex_to_html @@ -13,62 +12,19 @@ OUTPUT_IMAGE_PATH = Path(".").joinpath("media","songs") OUTPUT_PATH = Path(".").joinpath("pages", "songbook", "songs") TEMPALTE_PATH = CWD.joinpath("util","template") -ARCHIVE_PATH = Path(".").joinpath("sangbog-main.zip") -JSON_PATH = CWD.joinpath("pages", "songbook", "songs.json") +JSON_OUTPUT_PATH = CWD.joinpath("pages", "songbook", "song_index.json") +SONGBOOK_ARTIFACT_URL = 'https://github.com/f-klubben/sangbog/releases/latest/download/songs.json' ### EXTRACTING PART ### -def get_songbook(file_path): - url = 'https://github.com/f-klubben/sangbog/archive/master.zip' - urlretrieve(url, file_path) - print("") - return file_path - -def get_file_contents(archive, path): - contents = "" - with archive.open(path, mode="r") as data: - contents = data.read() - return contents - -def get_song_info(content): - reg = re.compile(r"\\begin\{sang\}\{([^\}]*)\}\{([^\}]*)\}") - match = reg.match(content) - if match != None: - return ( - match.group(1).capitalize(), - match.group(2).replace("\\ldots", "…").replace("Melodi - ", "").replace("Melodi:", "").lstrip().capitalize() - ) - -def get_verses(content): - matches = re.compile(r"(?s)\\begin\{vers\}\s?(.*?)\\end\{vers\}", re.MULTILINE|re.DOTALL) - res = [] - for match in matches.finditer(content): - start = content[0:match.start()].count("\n") - res.append((start, "v",match.group(1))) - return res - -def get_choruses(content): - matches = re.compile(r"\\begin\{omkvaed\}\[?\w?\]?\s*([^\\]*)", re.MULTILINE|re.DOTALL) - res = [] - for match in matches.finditer(content): - start = content[0:match.start()].count("\n") - res.append((start,"c", match.group(1))) - return res - -def get_images(content): - matches = re.compile(r"\\includegraphics\s*\[width=(\d*.\d)\\*\w*\]\{([^\}]*)\}", re.MULTILINE|re.DOTALL) - res = [] - for match in matches.finditer(content): - start = content[0:match.start()].count("\n") - res.append((start, "i", match.group(1),match.group(2).replace(".eps", ".png"))) - return res - -def get_song_order(content): - matches = re.compile(r"\\input\{([^\}]*)\/([^.}]*)(.tex|\})", re.MULTILINE|re.DOTALL) - res = [] - for match in matches.finditer(content): - start = content[0:match.start()].count("\n") - res.append(match.group(2)) - return res +### EXTRACTING PART ### +def get_songbook_artifact(): + url = SONGBOOK_ARTIFACT_URL + temp_path = "songs.json" + urlretrieve(url, temp_path) + songbook_json_string = "{}" + with open(temp_path, "rb") as file: + songbook_json_string = file.read().decode('UTF-8') + return json.loads(songbook_json_string) def get_template(name): contents = "" @@ -76,7 +32,7 @@ def get_template(name): contents = data.read() return Template(contents) -def get_song_body(body_list, archive): +def get_song_body(body_list): pargraph = 0 text_t = get_template("text") # type, line, text image_t = get_template("image") # b64image @@ -96,14 +52,13 @@ def get_song_body(body_list, archive): line = pargraph, text = tex_to_html(el[2]) ) - elif el[1] == "i": - image = get_file_contents( - archive, - f"sangbog-main/{el[3]}" - ) - image_path = OUTPUT_IMAGE_PATH.joinpath(el[3].split("/")[1]) + elif el[1] == "i": + image_path = OUTPUT_IMAGE_PATH.joinpath(el[3].split("/")[-1]) os.makedirs(os.path.dirname(image_path), exist_ok=True) abs_image_path = CWD.joinpath(image_path) + + image = base64.decodebytes(el[4].encode('utf-8')) + with open(abs_image_path, mode="wb")as f: f.write(image) body += image_t.substitute( @@ -112,20 +67,16 @@ def get_song_body(body_list, archive): body += "\n" return body -def generate_song(song_info, file_name, contents, counter, archive): +def generate_song(song_index, song_info, file_name): if song_info == None: return False - body_list = merge_lists( - get_verses(contents), - get_choruses(contents), - get_images(contents), - ) - song_body = get_song_body(body_list, archive) + body_list = song_info['body'] + song_body = get_song_body(body_list) song_t = get_template("song") song = song_t.substitute( - num = counter.get_count(file_name), - name = song_info[0], - melody = "Melody - "+ song_info[1].replace("\n", "") if song_info[1] != "" else song_info[1], + num = song_index, + name = song_info['title'], + melody = "Melody - " + song_info['melody'].replace("\n", "") if song_info['melody'] != "" else "", sbody = song_body ) path = OUTPUT_PATH.joinpath( file_name) @@ -139,51 +90,20 @@ def img2b64(path): encoded_string = base64.b64encode(image_file.read()) return encoded_string -def merge_lists(v, c, i): - l = [] - l.extend(v) - l.extend(c) - l.extend(i) - return sorted(l, key=lambda x: x[0]) - -class Counter: - def __init__(self, order): - self.order = order - self.count = len(order) - self.last = 0 - def get_count(self, file_name): - try: - self.last = (self.order.index(file_name) + 1) - except: - self.last = self.count - self.count += 1 - return self.last - if __name__ == "__main__": json_res = {} - if (not ARCHIVE_PATH.exists()): - get_songbook(ARCHIVE_PATH) - with zipfile.ZipFile(ARCHIVE_PATH, mode="r") as archive: - c = get_file_contents(archive, "sangbog-main/booklet/main.tex").decode('UTF-8') - counter = Counter(get_song_order(c)) - songs = list(filter( - lambda x: x.filename.startswith("sangbog-main/sange") and not x.is_dir(), - archive.infolist()) - ) - song_count = len(songs) - count = 0 - for info in songs: - count +=1 - percent = (count/song_count)*100 - sys.stdout.write("\rGenerating songbook %d%%" % (percent)) - sys.stdout.flush() - contents = get_file_contents(archive, info.filename).decode('UTF-8') - song_info = get_song_info(contents) - file_name = filename = info.filename.split("/")[-1].split(".")[0] - if generate_song(song_info, file_name, contents, counter, archive): - json_res[counter.last] = [song_info[0], f"./songs/{file_name}.html"] + songs = get_songbook_artifact() + song_count = len(songs) + count = 0 + for song in songs: + count += 1 + percent = (count/song_count)*100 + sys.stdout.write("\rGenerating songbook %d%%" % (percent)) + sys.stdout.flush() + file_name = song['path'].split("/")[-1].split(".")[0] + if generate_song(song['number'], song, file_name): + json_res[song['number']] = [song['title'], f"./songs/{file_name}.html"] + print("\n\rWriting to json") - with open(JSON_PATH, encoding="utf-8", mode="w") as f: - f.write(json.dumps(json_res, ensure_ascii=False)) - print("\rRemoving archive") - CWD.joinpath(ARCHIVE_PATH).unlink() + with open(JSON_OUTPUT_PATH, encoding="utf-8", mode="w") as f: + f.write(json.dumps(json_res, ensure_ascii=False)) \ No newline at end of file