From 5937de1f7d29f98643c1b37c9b29ddab2bdff1b6 Mon Sep 17 00:00:00 2001 From: Cwarren2025 <112130192+Cwarren2025@users.noreply.github.com> Date: Thu, 25 Aug 2022 16:52:19 -0500 Subject: [PATCH] Create FIXED VERSION --- FIXED VERSION | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 FIXED VERSION diff --git a/FIXED VERSION b/FIXED VERSION new file mode 100644 index 0000000..f355df2 --- /dev/null +++ b/FIXED VERSION @@ -0,0 +1,136 @@ +from bs4 import BeautifulSoup +import os +import phonenumbers +import time, datetime +from calendar import timegm +import warnings + +import tkinter as tk +from tkinter import filedialog + +root = tk.Tk() +root.withdraw() + +file_path = filedialog.askopenfilename() + +""" +TO DO: +filter emoji +fix double-double quotes +""" + +sms_backup_filename = "./gvoice-all.xml" + +def main(): + + num_sms = 0 + root_dir = '.' + + for subdir, dirs, files in os.walk(root_dir): + for file in files: + sms_filename = os.path.join(subdir, file) + #print(sms_filename) + + try: + sms_file = open(sms_filename, 'r') + except FileNotFoundError: + continue + + if(os.path.splitext(sms_filename)[1] != '.html'): + print(sms_filename,"- skipped") + continue + print(sms_filename) + + soup = BeautifulSoup(sms_file, 'html.parser') + + messages_raw = soup.find_all(class_='message') + + num_sms += len(messages_raw) + + sms_values = {'phone' : get_phone(messages_raw,sms_filename)} + + for i in range(len(messages_raw)): + ## print('Unix time:',get_time_unix(messages_raw[i])) + ## print('Sender:',get_phone(messages_raw[i])) + ## print('Type:',get_message_type(messages_raw[i])) + ## print('Message text:',get_message_text(messages_raw[i])) + ## print('-----') + sms_values['type'] = get_message_type(messages_raw[i]) + sms_values['message'] = get_message_text(messages_raw[i]) + sms_values['time'] = get_time_unix(messages_raw[i]) + sms_text = (' \n' % sms_values) + sms_backup_file = open(sms_backup_filename, 'a') + sms_backup_file.write(sms_text) + sms_backup_file.close() + + sms_backup_file = open(sms_backup_filename, 'a') + sms_backup_file.write('') + sms_backup_file.close() + + write_header(sms_backup_filename, num_sms) + +def get_message_type(message): # author_raw = messages_raw[i].cite + author_raw = message.cite + if ( not author_raw.span ): + return 2 + else: + return 1 + + return 0 + +def get_message_text(message): + return BeautifulSoup(message.find('q').text,'html.parser').prettify(formatter='html').strip().replace('"',"'") + +def get_phone(messages,sms_filename): + for author_raw in messages: + if (not author_raw.span): + continue + + sender_data = author_raw.cite + + try: + phone_number = phonenumbers.parse(sender_data.a['href'][4:], None) + except phonenumbers.phonenumberutil.NumberParseException: + return sender_data.a['href'][4:] + + if(phone_number.country_code == 1): + return phonenumbers.format_number(phone_number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)[1:].replace(' ', '-') + else: + return phonenumbers.format_number(phone_number, phonenumbers.PhoneNumberFormat.E164) + dashAdd = "" + dashAdd = list(sms_filename.split("+")[1].split(" ")[0]) + dashAdd.insert(7,"-") + dashAdd.insert(4,"-") + dashAdd.insert(1,"-") + return "".join(dashAdd) + + return 0 + +def get_time_unix(message): + time_raw = message.find(class_='dt') + ymdhms = time_raw['title'] + print(ymdhms) + time_obj = datetime.datetime.strptime(ymdhms.replace('Z','UTC'), '%Y-%m-%dT%H:%M:%S.%f%z') + print(time_obj) + #print('GV Date: ', ymdhms) + #mstime = time.mktime(time_obj.timetuple()) * 1000 + time_obj.microsecond / 1000 + mstime = timegm(time_obj.timetuple()) * 1000 + time_obj.microsecond / 1000 + return int(mstime) + +def write_header(filename, numsms): + backup_file = open(filename, 'r') + backup_text = backup_file.read() + backup_file.close() + + backup_file = open(filename, 'w') + backup_file.write("\n") + backup_file.write("\n") + backup_file.write('\n') + backup_file.write(backup_text) + backup_file.close() + +main()