-
Notifications
You must be signed in to change notification settings - Fork 1
/
gettext2sheets.py
executable file
·535 lines (447 loc) · 19.5 KB
/
gettext2sheets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
#!/usr/bin/python3
import argparse
import datetime
import googleapiclient.discovery
import json
import oauth2client.file, oauth2client.client, oauth2client.tools
import os
import re
from enum import Enum
from httplib2 import Http
from pathlib import Path
# ANSI color codes
C_RED = '\033[91m'
C_GREEN = '\033[92m'
C_YELLOW = '\033[93m'
C_BLUE = '\033[94m'
C_PURPLE = '\033[95m'
C_CYAN = '\033[96m'
C_RESET = '\033[0m'
RE_LC_MESSAGES = re.compile(r"/([a-zA-Z-]+)/LC_MESSAGES")
RE_HIGHLIGHT = re.compile(r"__(.*?)__")
RE_ENTRY_FIELD = re.compile(r'^(msg.*)\s"(.*)"$')
RE_EXTRA_STRING = re.compile(r'^\s*"(.*)"\s*$')
RE_ASSIGN = re.compile(r"{([a-zA-Z_-]+)}")
RE_ESCAPED_ASSIGN = re.compile(r"\\{([a-zA-Z_-]+)\\}")
GOOGLE_AUTH_SCOPES = 'https://www.googleapis.com/auth/spreadsheets'
CREDENTIALS_FILE = 'credentials.json'
TOKEN_FILE = 'token.json'
CONFIG_FILE = 'config.json'
VERBOSE = False
MODE = None
class Mode(Enum):
PUSH = 1
PULL = 2
# Load configuration
with open(CONFIG_FILE) as config:
CONFIG = json.load(config)
def main():
handle_args()
info("Requesting authorization from Google API...")
service = authorize_google_sheets()
file_list = find_files(CONFIG["path"], ".po")
if MODE == Mode.PUSH: return handle_push(service, file_list)
if MODE == Mode.PULL: return handle_pull(service, file_list)
def handle_args():
"""Handles command-line parameters."""
global MODE
global VERBOSE
parser = argparse.ArgumentParser(description='Sync Gettext messages to Google Sheets.')
parser.add_argument('-v', '--verbose', action='store_true')
parser.add_argument('action', choices=['push', 'pull'])
args = parser.parse_args()
MODE = Mode.PULL if args.action == 'pull' else Mode.PUSH
VERBOSE = args.verbose
def authorize_google_sheets():
"""
Starts the Google API client using credentials provided by a `credentials.json` file.
In order to generate this file, visit:
https://console.developers.google.com/flows/enableapi?apiid=sheets.googleapis.com
Once authenticated, a `token.json` will be generated for subsequent accesses.
"""
store = oauth2client.file.Storage(TOKEN_FILE)
creds = store.get()
if not creds or creds.invalid:
flow = oauth2client.client.flow_from_clientsecrets(CREDENTIALS_FILE, GOOGLE_AUTH_SCOPES)
creds = oauth2client.tools.run_flow(flow, store)
return googleapiclient.discovery.build('sheets', 'v4', http=creds.authorize(Http()))
def find_files(path, extension):
"""Returns a list of paths to all files that are inside `path` and have the given extension."""
glob = generate_glob_by_extension(extension)
return list(Path(path).rglob(glob))
def generate_glob_by_extension(extension):
"""
Generates a glob that matches the given extension, case-insensitively.
Example
-------
For '.po' files, the generated glob is '*.[pP][oO]'
"""
extension = extension.lstrip(".")
case_insensitive_char_list = ["[{0}{1}]".format(char, char.upper()) for char in extension]
glob = "".join(case_insensitive_char_list)
return "*.{0}".format(glob)
def handle_push(service, file_list):
"""
Sends local message data from all .po files to a Google Sheets spreadsheet.
"""
info("Mode was set to PUSH.")
locale_row_offsets = {}
for posix_path in file_list:
path_string = str(posix_path)
locale = get_locale_by_path(path_string)
info("File __{0}__ is from locale __{1}__".format(posix_path, locale))
file_entries = process_po_file(path_string)
try:
settings = CONFIG["locales"][locale]
except KeyError as e:
warn("Missing configuration for this locale, skipping...")
continue
# Only print header once
print_header = locale not in locale_row_offsets.keys()
# Cumulative offset
row_offset = locale_row_offsets.get(locale, settings["row_offset"])
# Metadata
metadata = {"file_name": posix_path.name,
"locale": locale,
"timestamp": str(datetime.datetime.now())}
range_str, body = build_request_body(settings, file_entries,
row_offset, print_header, metadata)
verbose_info("Body: {0}".format(body))
result = update_sheet(service, CONFIG["spreadsheet_id"], range_str, body)
verbose_info("Result: {0}".format(str(result)))
updated_rows = result.get('updatedRows', 0)
info("Updated {0} rows successfully.".format(updated_rows))
locale_row_offsets[locale] = row_offset + updated_rows
def get_locale_by_path(path):
"""
Determines the locale of a .po file based on its path.
This function assumes all files are within a path of the form '*/<locale>/LC_MESSAGES/*.po'.
"""
try:
return RE_LC_MESSAGES.search(path).group(1)
except Exception:
error(("Could not determine locale for file {0}. "
"Make sure it's inside an LC_MESSAGES folder.").format(path))
def process_po_file(path):
"""
Builds a list of entries from a .po file.
Each entry will contain keys such as "msgid", "msgstr", "msgid_plural", "msgstr[0]", etc.
"""
with open(path) as file:
entry = {}
entries = []
info("Reading file __{0}__...".format(path))
for line in file:
line = line.strip()
match = RE_ENTRY_FIELD.match(line)
if match:
field = match.group(1)
value = match.group(2)
# If this field is already tracked, we finished our last entry
if field in entry.keys():
# Prevent adding the empty msgid from the header
if any(entry.values()):
entries.append(entry)
entry = {}
entry[field] = value
# Append last entry
if entry:
entries.append(entry)
info("Read {0} entries!".format(len(entries)))
return entries
def build_request_body(settings, entries, row_offset, print_header, metadata):
"""
Generates the body for a request to persist entries to Google Sheets.
This includes determining the right range for the data, and placing fields into the
right columns according to the user's configuration.
"""
sheet = settings["sheet"]
entry_count = len(entries)
if not entry_count:
warn("No entries found in this file, no changes were pushed.")
row_start = 1 + row_offset
row_end = row_start + entry_count - 1 + print_header
columns = settings["columns"]
column_start = get_column_string(1 + settings["column_offset"])
column_end = get_column_string(1 + settings["column_offset"] + len(columns) - 1)
range_name = generate_range_name(sheet, row_start, row_end, column_start, column_end)
headers = [[column["header"] for column in columns]] if print_header else []
return (range_name,
{'values': headers +
[build_request_entry(columns, entry, metadata) for entry in entries]})
def build_request_entry(columns, entry, metadata):
"""Processes all columns from a single entry."""
return [populate_column(column, entry, metadata) for column in columns]
def populate_column(column, entry, metadata):
"""Decides the data that will fill a single column from a specific entry."""
keys = entry.keys()
static_text = column.get("static")
if static_text:
return parse_static_text(static_text, metadata)
eligible_fields = column["fields"]
for field in eligible_fields:
if field in keys:
return entry[field]
return ""
def parse_static_text(static_text, metadata):
"""Replaces a static text's assigns with dynamic metadata."""
return RE_ASSIGN.sub(lambda match: replace_assign(match, metadata), static_text)
def replace_assign(match_object, metadata):
return metadata.get(match_object.group(1), "")
def get_column_string(n):
"""Converts index n to a letter-based column index as is used in Google Sheets."""
chars = []
a = ord('A')
while n > 0:
n, offset = divmod(n-1, 26)
chars.append(chr(a + offset))
return "".join(reversed(chars))
def generate_range_name(sheet, row_start, row_end, column_start, column_end):
return "{0}!{1}{2}:{3}{4}".format(sheet, column_start, row_start, column_end, row_end)
def update_sheet(service, sheet_id, range_string, body):
"""Fires a spreadsheet update request through the Google API client."""
return service.spreadsheets().values().update(spreadsheetId=sheet_id,
range=range_string,
body=body,
valueInputOption='RAW').execute()
def handle_pull(service, file_list):
"""
Updates .po files with data fetched from a Google Sheets spreadsheet.
"""
info("Mode was set to PULL.")
locale_file_paths = {}
for posix_path in file_list:
path_string = str(posix_path)
locale = get_locale_by_path(path_string)
info("File __{0}__ is from locale __{1}__".format(posix_path, locale))
# Path_map associates filenames to full paths
path_map = locale_file_paths.get(locale, {})
path_map[posix_path.name] = posix_path
locale_file_paths[locale] = path_map
locale_list = locale_file_paths.keys()
for locale in locale_list:
try:
settings = CONFIG["locales"][locale]
except KeyError as e:
warn("Missing configuration for locale {0}, skipping...".format(locale))
continue
info("Handling pull for locale {0}.".format(locale))
pull_by_locale(service, locale_file_paths[locale], settings)
def pull_by_locale(service, file_path_map, settings):
"""
Pulls data from a sheet that corresponds to a single locale.
"""
sheet = settings["sheet"]
sheet_id = CONFIG["spreadsheet_id"]
row_start = 1 + settings["row_offset"] + 1 # Header
columns = settings["columns"]
column_start = get_column_string(1 + settings["column_offset"])
column_end = get_column_string(1 + settings["column_offset"] + len(columns) - 1)
column_mapping = get_column_mapping(columns)
chunk_size = CONFIG["pull_chunk_size"]
file_name_column = column_mapping["_file_name"]
file_name_template = columns[file_name_column]["static"]
file_handle_map = {}
# As we don't know how many rows there are, data is pulled in chunks until there's
# nothing else to be read.
while True:
row_end = row_start + chunk_size - 1
range_name = generate_range_name(sheet, row_start, row_end, column_start, column_end)
info("Fetching chunk {0}.".format(range_name))
count, data = fetch_chunk(service, sheet_id, range_name)
info("Data: {0}.".format(data))
if count == 0: break
process_chunk(file_path_map, file_handle_map, data, column_mapping, file_name_template)
row_start = row_end + 1
info("Finished processing. Closing file handles...")
for file_name, handles in file_handle_map.items():
handles["write"].close()
handles["read"].close()
info("Closed file {0}. Cleaning up...".format(file_name))
full_path = str(file_path_map[file_name])
full_path_tmp = "{0}.tmp".format(full_path)
full_path_old = "{0}.old".format(full_path)
try:
os.remove(full_path_old)
verbose_info("Removed {0}.".format(full_path_old))
except FileNotFoundError:
verbose_info("Old file does not exist, nothing to do.")
os.rename(full_path, full_path_old)
os.rename(full_path_tmp, full_path)
info("__All done!__")
def get_column_mapping(columns):
"""
Processes the 'columns' field in the settings map to associate each field with its
positional index in the fetched row. Also, at least one column that points to the
file name is needed.
Returns a map in which keys are fields ('msgid', 'msgstr', etc.) and values are indices.
The reserved key '_file_name' points to the column index that contains file name information.
"""
ret = {}
found_file_name = False
for i, column in enumerate(columns):
if "fields" in column.keys():
for field in column["fields"]:
ret[field] = i
continue
if "static" in column.keys():
if r"{file_name}" in column["static"]:
ret["_file_name"] = i
found_file_name = True
if not found_file_name: error(('At least one of the columns must indicate the file name!\n'
'e.g.: {"static": "{file_name}", "header": "FILE NAME"}'))
return ret
def fetch_chunk(service, spreadsheet_id, range_name):
"""Fires a spreadsheet retrieval request through the Google API client."""
result = service.spreadsheets().values().get(spreadsheetId=spreadsheet_id,
range=range_name).execute()
values = result.get('values')
row_count = len(values) if values is not None else 0
info('{0} rows retrieved.'.format(row_count))
return row_count, values
def process_chunk(file_path_map, file_handle_map, data, column_mapping, file_name_template):
"""
Translates `data` row by row into updates to a PO file.
Parameters:
file_path_map - Given a file's name, points to its full path
file_handle_map - Contains a read handle and a write handle for each file
data - The rows obtained from the spreadsheet
column_mapping - The column index that stores each field
file_name_template - The value for `static` in `config.json` for the column that contains
the file name. Used to extract the file name.
"""
file_name_column = column_mapping["_file_name"]
msgid_column = column_mapping["msgid"]
for row in data:
if any(row):
assigns = extract_string_assigns(file_name_template, row[file_name_column])
verbose_info("Extracted assigns: {0}.".format(assigns))
base_filename = assigns["file_name"]
target_file = file_path_map[base_filename]
info("Target file is {0}.".format(target_file))
if base_filename not in file_handle_map.keys():
verbose_info("Opening read and write file handles...")
file_handle_map[base_filename] = open_handle_pair(str(target_file))
handles = file_handle_map[base_filename]
buffered_lines = find_msgid_in_file(handles["read"], row[msgid_column])
write_buffered_lines_to_file(handles["write"], buffered_lines)
write_updated_entry_to_file(handles, row, column_mapping)
return False
def extract_string_assigns(template, string):
"""
Given a template and a string, extract a map containing named assigns.
e.g.: template: "{x} world!"
string: "Hello world!"
return: {"x": "Hello"}
"""
escaped = re.escape(template)
reverse_template = RE_ESCAPED_ASSIGN.sub('(?P<\\1>.*)', escaped)
result_iterator = re.finditer(reverse_template, string)
# Builds a dict out of named captures in a regex
return next(result_iterator).groupdict()
def open_handle_pair(target_file):
"""
Opens an existing .po file for reading and a new .po.tmp file for writing, copying the
header's contents of the original file to it.
"""
new_file = "{0}.tmp".format(target_file)
handle_map = {"read": open(target_file, "r"), "write": open(new_file, "w")}
# Header handling
buffered_lines = find_msgid_in_file(handle_map["read"], "")
write_buffered_lines_to_file(handle_map["write"], buffered_lines)
write_updated_entry_to_file(handle_map)
return handle_map
def find_msgid_in_file(file_handle, msgid):
"""
Moves the file read pointer in `file_handle` to the entry's first line.
Returns a list of all lines after the previous entry in the file.
"""
wrapped = False
buffered_lines = []
pre_entry_position = 0
reading_an_entry = False
info("Looking for msgid __'{0}'__.".format(msgid))
while True:
beginning_of_line = file_handle.tell()
line = file_handle.readline()
# Wrap around the file once on EOF.
if not line:
if not wrapped:
info("EOF reached, restarting from beginning of file...")
wrapped = True
buffered_lines = []
pre_entry_position = 0
reading_an_entry = False
file_handle.seek(0)
else:
error("Msgid {0} was not found in the file.".format(msgid))
match = RE_ENTRY_FIELD.match(line)
if match:
if not reading_an_entry:
reading_an_entry = True
pre_entry_position = beginning_of_line
field = match.group(1)
value = match.group(2)
if field == 'msgid' and value == msgid:
file_handle.seek(pre_entry_position)
info("Successfully found! Entry begins at position __{0}__.".format(file_handle.tell()))
return buffered_lines
else:
# Line does not contain a 'msg*' field. (e.g. comments or metadata)
# These are buffered so that they can be returned later.
if reading_an_entry:
reading_an_entry = False
buffered_lines = []
# Do not buffer precedent lines containing pure strings,
# e.g. header fields such as "Language: en\n"
if not RE_EXTRA_STRING.match(line):
buffered_lines.append(line)
def write_buffered_lines_to_file(file_handle, buffered_lines):
"""
Write all lines that precede an entry to the write file's current pointer.
These usually include comments and blank lines.
"""
verbose_info("Buffered lines to be written: {0}".format(buffered_lines))
for line in buffered_lines:
file_handle.write(line)
def write_updated_entry_to_file(handles, row=None, column_mapping=None):
"""
Reads sequential "msg*" fields from the file given by `handles`, and outputs them
with updated values from the `row` data, mapped by `column_mapping`.
This function expects that the pointer in both handles is at the start of an entry.
"""
read_handle = handles["read"]
write_handle = handles["write"]
while True:
beginning_of_line = read_handle.tell()
line = read_handle.readline()
match = RE_ENTRY_FIELD.match(line)
can_update = (column_mapping is not None)
if not match:
# Preserve lines containing pure strings,
# e.g. header fields such as "Language: en\n"
extra_match = RE_EXTRA_STRING.match(line)
can_update = False
if not extra_match:
verbose_info("Line '{0}' did not match any field. Stopping update...".format(line))
read_handle.seek(beginning_of_line)
break
if can_update:
field = match.group(1)
verbose_info("Updating field {0}...".format(field))
new_value = row[column_mapping[field]]
write_handle.write('{0} "{1}"\n'.format(field, new_value))
else:
verbose_info("Copying line {0}...".format(line))
write_handle.write(line)
def verbose_info(string):
if VERBOSE: info(string)
def info(string):
parsed = RE_HIGHLIGHT.sub('{0}\\1{1}'.format(C_BLUE, C_RESET), string)
print(parsed)
def warn(string):
print(C_YELLOW + string + C_RESET)
def error(string):
raise Exception(C_RED + string + C_RESET)
if __name__ == '__main__':
main()