From cd288eb700cc846f7313b32293dfe6416e6a69c8 Mon Sep 17 00:00:00 2001 From: brians Date: Wed, 22 Apr 2020 00:12:15 -0600 Subject: [PATCH 1/6] Adding .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c09bbb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +./sms/ +./gvoice-all.xml From 275d3930eaf5ce92d86a28c2aedb83805e12b20f Mon Sep 17 00:00:00 2001 From: brians Date: Wed, 22 Apr 2020 00:12:47 -0600 Subject: [PATCH 2/6] Updating .gitignore --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c09bbb6..95476f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ -./sms/ -./gvoice-all.xml +sms/ +gvoice-all.xml From 55abb788cb4f086a3d2500437a84a798b663ad94 Mon Sep 17 00:00:00 2001 From: brians Date: Wed, 22 Apr 2020 00:24:43 -0600 Subject: [PATCH 3/6] Fixing date support for new Takeout format --- sms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sms.py b/sms.py index 1809586..7f3ffd6 100644 --- a/sms.py +++ b/sms.py @@ -1,6 +1,8 @@ from bs4 import BeautifulSoup +import re import os import phonenumbers +import dateutil.parser import time, datetime from calendar import timegm import warnings @@ -106,9 +108,7 @@ def get_phone(messages): def get_time_unix(message): time_raw = message.find(class_='dt') ymdhms = time_raw['title'] - time_obj = datetime.datetime.strptime(ymdhms.replace('Z','UTC'), '%Y-%m-%dT%H:%M:%S.%f%Z') - #print('GV Date: ', ymdhms) - #mstime = time.mktime(time_obj.timetuple()) * 1000 + time_obj.microsecond / 1000 + time_obj = dateutil.parser.isoparse(ymdhms); mstime = timegm(time_obj.timetuple()) * 1000 + time_obj.microsecond / 1000 return int(mstime) From 7c99e957f72012dc5d6702ac6ebee71149444ddf Mon Sep 17 00:00:00 2001 From: brians Date: Wed, 22 Apr 2020 00:25:52 -0600 Subject: [PATCH 4/6] Adding support for emojis --- sms.py | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/sms.py b/sms.py index 7f3ffd6..3af01a2 100644 --- a/sms.py +++ b/sms.py @@ -6,32 +6,19 @@ import time, datetime from calendar import timegm import warnings - -import tkinter as tk -from tkinter import filedialog - -root = tk.Tk() -root.withdraw() - -file_path = filedialog.askopenfilename() - -""" -TO DO: -filter emoji -fix double-double quotes -""" +from io import open # adds emoji support sms_backup_filename = "./gvoice-all.xml" +print('New file will be saved to ' + sms_backup_filename) def main(): - + print('Checking directory for *.html files') num_sms = 0 root_dir = '.' for subdir, dirs, files in os.walk(root_dir): for file in files: sms_filename = os.path.join(subdir, file) - #print(sms_filename) try: sms_file = open(sms_filename, 'r') @@ -39,9 +26,10 @@ def main(): continue if(os.path.splitext(sms_filename)[1] != '.html'): - print(sms_filename,"- skipped") + # print(sms_filename,"- skipped") continue - print(sms_filename) + + print('Processing ' + sms_filename) soup = BeautifulSoup(sms_file, 'html.parser') @@ -118,9 +106,9 @@ def write_header(filename, numsms): backup_file.close() backup_file = open(filename, 'w') - backup_file.write("\n") - backup_file.write("\n") - backup_file.write('\n') + backup_file.write(u"\n") + backup_file.write(u"\n") + backup_file.write(u'\n') backup_file.write(backup_text) backup_file.close() From f5138c7d7bcad30b76854ee1cdfdad0c646bf713 Mon Sep 17 00:00:00 2001 From: brians Date: Wed, 22 Apr 2020 00:26:10 -0600 Subject: [PATCH 5/6] Adding support for MMS messages --- sms.py | 128 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 101 insertions(+), 27 deletions(-) diff --git a/sms.py b/sms.py index 3af01a2..98e54d4 100644 --- a/sms.py +++ b/sms.py @@ -31,38 +31,86 @@ def main(): print('Processing ' + sms_filename) + is_group_conversation = re.match(r'(^Group Conversation)', file) + soup = BeautifulSoup(sms_file, 'html.parser') messages_raw = soup.find_all(class_='message') num_sms += len(messages_raw) - sms_values = {'phone' : get_phone(messages_raw)} - - for i in range(len(messages_raw)): - ## print('Unix time:',get_time_unix(messages_raw[i])) - ## print('Sender:',get_phone(messages_raw[i])) - ## print('Type:',get_message_type(messages_raw[i])) - ## print('Message text:',get_message_text(messages_raw[i])) - ## print('-----') - sms_values['type'] = get_message_type(messages_raw[i]) - sms_values['message'] = get_message_text(messages_raw[i]) - sms_values['time'] = get_time_unix(messages_raw[i]) - sms_text = (' \n' % sms_values) - sms_backup_file = open(sms_backup_filename, 'a') - sms_backup_file.write(sms_text) - sms_backup_file.close() + if is_group_conversation: + participants_raw = soup.find_all(class_='participants') + write_mms_messages(participants_raw, messages_raw) + else: + write_sms_messages(file, messages_raw) + sms_backup_file = open(sms_backup_filename, 'a') - sms_backup_file.write('') + sms_backup_file.write(u'') sms_backup_file.close() write_header(sms_backup_filename, num_sms) +def write_sms_messages(file, messages_raw): + fallback_number = 0 + title_has_number = re.search(r"(^\+*[0-9]+)", file) + if title_has_number: + fallback_number = title_has_number.group() + + sms_values = {'phone' : get_first_phone_number(messages_raw, fallback_number)} + + sms_backup_file = open(sms_backup_filename, 'a') + for i in range(len(messages_raw)): + sms_values['type'] = get_message_type(messages_raw[i]) + sms_values['message'] = get_message_text(messages_raw[i]) + sms_values['time'] = get_time_unix(messages_raw[i]) + sms_text = (' \n' % sms_values) + sms_backup_file.write(sms_text) + + sms_backup_file.close() + +def write_mms_messages(participants_raw, messages_raw): + sms_backup_file = open(sms_backup_filename, 'a') + + participants = get_participant_phone_numbers(participants_raw) + mms_values = {'participants' : '~'.join(participants)} + + for i in range(len(messages_raw)): + sender = get_mms_sender(messages_raw[i]) + sent_by_me = sender not in participants + + mms_values['type'] = get_message_type(messages_raw[i]) + mms_values['message'] = get_message_text(messages_raw[i]) + mms_values['time'] = get_time_unix(messages_raw[i]) + mms_values['participants_xml'] = u'' + mms_values['msg_box'] = 2 if sent_by_me else 1 + mms_values['m_type'] = 128 if sent_by_me else 132 + + for participant in participants: + participant_is_sender = participant == sender or (sent_by_me and participant == 'Me') + participant_values = {'number': participant, 'code': 137 if participant_is_sender else 151} + mms_values['participants_xml'] += (' \n' % participant_values) + + mms_text = (' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '%(participants_xml)s' + ' \n' + ' \n' % mms_values) + + sms_backup_file.write(mms_text) + + sms_backup_file.close() + def get_message_type(message): # author_raw = messages_raw[i].cite author_raw = message.cite if ( not author_raw.span ): @@ -75,7 +123,11 @@ def get_message_type(message): # author_raw = messages_raw[i].cite def get_message_text(message): return BeautifulSoup(message.find('q').text,'html.parser').prettify(formatter='html').strip().replace('"',"'") -def get_phone(messages): +def get_mms_sender(message): + return format_number(phonenumbers.parse(message.cite.a['href'][4:], None)) + +def get_first_phone_number(messages, fallback_number): + # handle group messages for author_raw in messages: if (not author_raw.span): continue @@ -87,11 +139,33 @@ def get_phone(messages): except phonenumbers.phonenumberutil.NumberParseException: return sender_data.a['href'][4:] - if(phone_number.country_code == 1): - return phonenumbers.format_number(phone_number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)[1:].replace(' ', '-') - else: - return phonenumbers.format_number(phone_number, phonenumbers.PhoneNumberFormat.E164) - return 0 + return format_number(phone_number) + + # fallback case, use number from filename + if (fallback_number == 0 or len(fallback_number) < 7): + return fallback_number + else: + return format_number(phonenumbers.parse(fallback_number, None)) + +def get_participant_phone_numbers(participants_raw): + participants = ['Me'] # May require adding a contact for "Me" to your phone, with your current number + + for participant_set in participants_raw: + for participant in participant_set: + if (not hasattr(participant, 'a')): + continue + + try: + phone_number = phonenumbers.parse(participant.a['href'][4:], None) + except phonenumbers.phonenumberutil.NumberParseException: + participants.push(participant.a['href'][4:]) + + participants.append(format_number(phone_number)) + + return participants + +def format_number(phone_number): + return phonenumbers.format_number(phone_number, phonenumbers.PhoneNumberFormat.E164) def get_time_unix(message): time_raw = message.find(class_='dt') @@ -111,5 +185,5 @@ def write_header(filename, numsms): backup_file.write(u'\n') backup_file.write(backup_text) backup_file.close() - + main() From e53989015769e3c68121e46f148305d2a8b88f1d Mon Sep 17 00:00:00 2001 From: brians Date: Wed, 22 Apr 2020 00:26:24 -0600 Subject: [PATCH 6/6] Updating README --- README.md | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 98c0673..d37c868 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,24 @@ # gvoice-sms-takeout-xml -Convert Google Voice SMS data from Takeout to .xml suitable for use with SMS Backup and Restore +Convert Google Voice SMS data from Takeout to .xml suitable for use with SMS Backup and Restore. +Input data is a folder of SMS .html files from Google Takeout. + +Working as of 2020-04-22. + +## How to use: +1. Go to https://contacts.google.com +2. Export all Google Contacts +3. Delete all Google Contacts (this is required so that numbers show up for each thread, otherwise Takeout will sometimes only have names. If you want to skip this step, you can, but some messages won't be linked to the right thread if you do. Note that this may remove Contact Photos on iOS if you don't pause syncing on your iOS device) +4. Get Google Voice Takeout and Download +5. Restore contacts to your account +6. Download this script to your computer +7. Extract Google Voice Takeout and move the folder into the same folder as this script +8. Open terminal +9. Install pip (sudo easy_install pip) +10. sudo pip install virtualenv +11. virtualenv sms +12. pip install phonenumbers BeautifulSoup4 python-dateutil +13. python sms.py +14. Copy the file "gvoice-all.xml" to your phone, then restore from it using SMS Backup and Restore + -This is a personal project from a few years back when Google switched Voice to Hangouts and I wanted to grab my old messages and get them into a usable format. It worked at the time; I don't know if it works as-is, but I'm planning on some testing in the near term to get it functional. -Input data is a folder of SMS .html files from Google Takeout.