-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmysql2sqlite.py
458 lines (348 loc) · 16 KB
/
mysql2sqlite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
#!/usr/bin/env python3
# https://github.com/WhyAskWhy/mysql2sqlite
"""
Query MySQL database, mirror relevant tables to a local SQLite database.
"""
# TODO:
#
# * Clearly define what conditions are "warnings" and explicitly handle them
# ** e.g., how shall the script handle a request to query a MySQL database
# table that does not exist? This seems like an error ...
# ** what about _not_ handling a MySQL table in a particular database?
# *** perhaps that should be reported as a warning unless a flag is set to
# disable warnings for unhandled tables?
#######################################################
# Modules - Standard Library
#######################################################
# parse command line arguments, 'sys.argv'
import argparse
import configparser
import logging
import logging.handlers
import os
import os.path
import sqlite3
import sys
# Used to help enforce order/precedence for config files.
from collections import OrderedDict
app_name = 'mysql2sqlite'
# TODO: Configure formatter to log function/class info
syslog_formatter = logging.Formatter('%(name)s - L%(lineno)d - %(levelname)s - %(funcName)s - %(message)s')
file_formatter = logging.Formatter('%(asctime)s - %(name)s - L%(lineno)d - %(funcName)s - %(levelname)s - %(message)s')
stdout_formatter = logging.Formatter('%(asctime)s - %(name)s - L%(lineno)d - %(levelname)s - %(funcName)s - %(message)s')
# Grab root logger and set initial logging level
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
# The SysLogHandler class, supports sending logging messages to a remote
# or local Unix syslog.
# TODO: Expose this value elsewhere; move to logging_config.json?
syslog_socket = '/dev/log'
try:
syslog_handler = logging.handlers.SysLogHandler(address=syslog_socket)
except AttributeError:
# We're likely running on Windows, so use the NullHandler here
syslog_handler = logging.NullHandler
else:
# Good thus far, finish configuring SysLogHandler
syslog_handler.ident = app_name + ": "
syslog_handler.setFormatter(syslog_formatter)
syslog_handler.setLevel(logging.INFO)
console_handler = logging.StreamHandler(stream=sys.stdout)
console_handler.setFormatter(stdout_formatter)
# Apply lax logging level since we will use a filter to examine message levels
# and compare against allowed levels set within the main config file. This
# filter is added later once the settings config object has been constructed.
console_handler.setLevel(logging.NOTSET)
file_handler = logging.FileHandler(app_name + '.log', mode='a')
file_handler.setFormatter(file_formatter)
file_handler.setLevel(logging.DEBUG)
# Create logger object that inherits from root and will be inherited by
# all modules used by this project
# Note: The console_handler is added later after the settings config object
# has been constructed.
app_logger = logging.getLogger(app_name)
app_logger.addHandler(syslog_handler)
app_logger.addHandler(file_handler)
app_logger.setLevel(logging.DEBUG)
log = app_logger.getChild(__name__)
log.debug("Logging initialized for %s", __name__)
########################################################
# Collect command-line arguments (e.g., passed by Cron)
########################################################
parser = argparse.ArgumentParser(
# Borrow docstring for this module
description=__doc__.strip()
)
parser.add_argument(
'--config_file_dir',
action='store',
required=False,
help='The directory path containing general and query config files.')
try:
log.info('Parsing commandline options')
args = parser.parse_args()
except argparse.ArgumentError as error:
log.exception("Unable to parse command-line arguments: %s", error)
sys.exit(1)
if args.config_file_dir is not None:
cmdline_config_file_dir = args.config_file_dir
else:
cmdline_config_file_dir = ""
########################################
# Modules - Third party
########################################
# Upstream module, actively maintained and official recommendation
# of the MariaDB project (per their documentation).
#
# Available via OS packages (including apt repo) or pip.
#
# Examples:
#
# * sudo apt-get install mysql-connector-python
# * pip install mysql-connector-python --user
log.debug("Attempting to import mysql.connector module")
import mysql.connector as mysql
########################################
# Modules - Custom
########################################
import mysql2sqlite_lib as m2slib
log.debug("Finished importing standard modules and our custom library modules.")
#######################################################
# CONSTANTS - Modify INI config files instead
#######################################################
# Where this script is called from. We will try to load local copies of all
# dependencies from this location first before falling back to default
# locations in order to support having all of the files bundled together for
# testing and portable use.
script_path = os.path.dirname(os.path.realpath(__file__))
# The name of this script used (as needed) by error/debug messages
script_name = os.path.basename(sys.argv[0])
general_config_file = 'mysql2sqlite_general.ini'
query_config_file = 'mysql2sqlite_queries.ini'
# Listed in in order of precedence: first match in list wins
config_file_paths = OrderedDict({
'envvar_config_file_dir': os.environ.get('MYSQL2SQLITE_CONFIG_DIR', ''),
'cmdline_config_file_dir': cmdline_config_file_dir,
'local_config_file_dir': script_path,
'user_config_file_dir': os.path.expanduser('~/.config/mysql2sqlite'),
'default_config_file_dir': '/etc/mysql2sqlite',
})
general_config_file_candidates = []
query_config_file_candidates = []
for key in reversed(config_file_paths):
general_config_file_candidates.append(
os.path.join(config_file_paths[key], general_config_file))
query_config_file_candidates.append(
os.path.join(config_file_paths[key], query_config_file))
# Prefer the local copy over the "global" one by loading it last (where the
# second config file overrides or "shadows" settings from the first)
# Generate configuration setting options
log.debug(
"Passing in these general config file locations for evaluation: %s",
general_config_file_candidates)
log.debug(
"Passing in these query config file locations for evaluation: %s",
query_config_file_candidates)
# Generate configuration setting options
log.info('Parsing config files')
# Apply handler early so that console logging is enabled prior to parsing
# configuration files. The provided filter configuration allows logging
# warning and error messages only while the settings object has yet to be
# defined.
app_logger.addHandler(console_handler)
console_handler.addFilter(m2slib.ConsoleFilterFunc(settings=None))
try:
general_settings = m2slib.GeneralSettings(general_config_file_candidates)
except configparser.NoSectionError as error:
log.exception("Error parsing configuration file: %s", error)
sys.exit(1)
except IOError as error:
log.exception("Error reading configuration file: %s", error)
sys.exit(1)
try:
query_settings = m2slib.QuerySettings(query_config_file_candidates)
except configparser.NoSectionError as error:
log.exception("Error parsing configuration file: %s", error)
sys.exit(1)
except IOError as error:
log.exception("Error reading configuration file: %s", error)
sys.exit(1)
# Now that the settings object has been properly created, lets use it to
# finish configuring console logging for the main application logger.
console_handler.removeFilter(m2slib.ConsoleFilterFunc)
console_handler.addFilter(m2slib.ConsoleFilterFunc(settings=general_settings))
####################################################################
# Troubleshooting config file flag boolean conversion
####################################################################
# Troubleshooting config file flag boolean conversion
for key, value in list(general_settings.flags.items()):
log.debug("key: '%s' value: '%s' type of value: '%s'",
key,
value,
type(value))
####################################################################
# Open connections to databases
####################################################################
log.info("Opening connection to MySQL database")
mysql_connection = m2slib.open_db_connection(general_settings,
general_settings.mysqldb_config['database'])
# If SQLite database doesn't already exist we will need to import the schema
# when we create the database. Set a flag to indicate that need.
SQLITE_DB_IS_NEW = not os.path.exists(
general_settings.sqlitedb_config['db_file'])
if SQLITE_DB_IS_NEW:
# Make sure that one of these is true:
# * We have write access to an existing SQLite database file
# * We have write access to the folder where we will create the SQLite database
#
# TODO: This function is too large, does too much
try:
m2slib.verify_sqlite_storage(general_settings, query_settings)
except IOError as error:
log.exception(error)
sys.exit(1)
# Open a connection to the database.
try:
sqlite_connection = sqlite3.connect(
general_settings.sqlitedb_config['db_file'])
except sqlite3.Error as error:
log.exception("Failed to connect to the %s database: %s",
general_settings.sqlitedb_config['db_file'],
error)
sys.exit(1)
else:
log.info("Connected to SQLite database ...")
# Make sure that there are tables in the database
if not m2slib.sqlite_db_has_tables(general_settings.sqlitedb_config['db_file']):
log.debug("SQLite db file has no tables")
SQLITE_DB_MISSING_TABLES = True
else:
log.debug("SQLite db file has one more more tables")
SQLITE_DB_MISSING_TABLES = False
# If the database is newly created or missing tables import the schema
if SQLITE_DB_IS_NEW or SQLITE_DB_MISSING_TABLES:
log.info("%s database file is new or is missing tables; importing schema." ,
general_settings.sqlitedb_config['db_file'])
m2slib.import_sqlite_db_schema(sqlite_connection,
general_settings, query_settings)
else:
log.info("%s database file exists and has tables; skipping schema import",
general_settings.sqlitedb_config['db_file'])
# Verify that autocommit is turned off
if sqlite_connection.isolation_level is None:
if general_settings.flags['fail_on_warnings']:
log.warning("autocommit mode is enabled. "
"This results in poor performance when many updates are required.")
else:
log.info("autocommit mode is disabled. "
"This should help performance for large batches of updates")
####################################################################
# Create cursor objects so that we can interact with the databases
####################################################################
# Cursor for the SQLite copy of the database
sqlite_cursor = sqlite_connection.cursor()
# Cursor for the MySQL copy of the database
mysql_cursor = mysql_connection.cursor()
####################################################################
# Copy data from primary MySQL database to local SQLite database
####################################################################
log.info("Continuing with db prep work ...")
# To retrieve data after executing a SELECT statement, you can either treat the
# cursor as an iterator, call the cursor's fetchone() method to retrieve a
# single matching row, or call fetchall() to get a list of the matching rows.
#
# Works for all tables except for the 'virtual_users' table as I've opted to
# not include passwords in the local SQLite db files (at least for now)
################################################################################
# Official Python SQLite docs
#
# When a database is accessed by multiple connections, and one of the
# processes modifies the database, the SQLite database is locked until that
# transaction is committed. The timeout parameter specifies how long the
# connection should wait for the lock to go away until raising an exception.
# The default for the timeout parameter is 5.0 (five seconds).
################################################################################
if m2slib.sqlite_db_has_tables(general_settings.sqlitedb_config['db_file']):
DROP_TABLES = True
log.debug("Tables were found. Setting flag to drop tables.")
else:
# Set flag so we won't try to clear table contents from non-existent tables
DROP_TABLES = False
log.debug("Tables were not found. Setting flag to skip dropping tables.")
log.debug("MySQL tables to replicate: %s",
", ".join(query_settings.queries.keys()))
for table in query_settings.queries:
# Dynamically create the select query used to pull data from MySQL table
mysql_cursor.execute(query_settings.queries[table]['read'])
# FIXME: A future version will first check to make sure that there are
# new entries which require regenerating/updating the tables
#
if DROP_TABLES:
log.info("Recreating %s SQLite table", table)
try:
log.debug("Dropping %s ...", table)
sqlite_cursor.execute('DROP TABLE IF EXISTS {}'.format(table))
except sqlite3.Error as error:
log.exception(error)
sys.exit(1)
try:
log.debug("Creating %s ...", table)
sqlite_cursor.execute(query_settings.queries[table]['new'])
except sqlite3.Error as error:
log.exception(error)
sys.exit(1)
# Make sure that the index key in the mysql2sqlite_queries.ini file
# for each table has content.
if (('index' in query_settings.queries[table])
and (query_settings.queries[table]['index'] is not None)):
try:
log.debug("Creating index for %s ...", table)
sqlite_cursor.execute(query_settings.queries[table]['index'])
except sqlite3.Error as error:
log.exception(error)
sys.exit(1)
else:
log.debug(
"No index query defined,"
" skipping index recreation for %s",
query_settings.queries[table])
log.info("Pulling data from %s MySQL table ...", table)
mysql_data = mysql_cursor.fetchall()
log.info("Updating %s SQLite table ...", table)
for row in mysql_data:
log.debug("Writing entries to %s SQLite table", table)
# Add MySQL table entries to SQLite db tables
try:
sqlite_cursor.execute(query_settings.queries[table]['write'], (row))
except sqlite3.Error as error:
log.exception("Failed to write entries to %s SQLite table: %s",
table, row)
log.exception(error)
sys.exit(1)
# If we made it this far then all data has been inserted into the SQLite db table
log.info("Updates to %s table are complete. "
"The last inserted id was: %s\n", table, sqlite_cursor.lastrowid)
####################################################################
# Cleanup
####################################################################
# Python SQLite docs, Python Standard Library by Example
#
# This method commits the current transaction. If you don't call this method,
# anything you did since the last call to commit() is not visible from other
# database connections. This requirement gives an application an opportunity
# to make several related changes together, so they are stored atomically
# instead of incrementally, and avoids a situation where partial updates
# are seen by different clients connecting to the database simultaneously.
log.info("Committing transactions ...")
sqlite_connection.commit()
# FIXME: The values don't appear to be accurate
# NOTE: tested with Python 2.7.12, MySQL Connector against MariaDB 10.0.29
#
# log.debug("MySQL db rows processed: %d", mysql_cursor.rowcount)
# log.debug("SQLite db rows processed: %d", sqlite_cursor.rowcount)
# Close database connections
log.info("Closing MySQL database connection ...")
mysql_connection.close()
log.info("Closing SQLite database connection ...")
sqlite_connection.close()
log.info("Transactions committed, database connections closed")