-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFileOperations.py
87 lines (67 loc) · 2.42 KB
/
FileOperations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import struct
import os
import hashlib
import zlib
import base64
from platform import python_version_tuple
from warnings import warn
try:
if int(python_version_tuple()[0]) < 3:
raise ImportError
from charset_normalizer import detect
except ImportError:
try:
from cchardet import detect
except ImportError:
try:
from chardet import detect
warn('python chardet is installed but could be unreliable, upgrade to python 3 and install '
'charset-normalizer or cchardet.')
except ImportError:
def detect(bytes_str):
return None
def decompress(data, enable_encoding_guessing=True, encoding='utf-8'):
raw_subtitle = zlib.decompress(base64.b64decode(data), 16 + zlib.MAX_WBITS)
encoding_detection = detect(raw_subtitle) if enable_encoding_guessing is True else None
if encoding_detection is None:
return raw_subtitle.decode(encoding, errors='ignore')
try:
my_decoded_str = raw_subtitle.decode(encoding_detection['encoding'])
except UnicodeDecodeError as e:
print(e)
return
return my_decoded_str
def get_gzip_base64_encoded(file_path):
handler = open(file_path, mode='rb').read()
return base64.encodestring(zlib.compress(handler))
def get_md5(file_path):
with open(file_path, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
class File(object):
def __init__(self, path):
self.path = path
self.size = str(os.path.getsize(path))
def get_hash(self):
longlongformat = 'q' # long long
bytesize = struct.calcsize(longlongformat)
try:
f = open(self.path, "rb")
except(IOError):
return "IOError"
hash = int(self.size)
if int(self.size) < 65536 * 2:
return "SizeError"
for _ in range(65536 // bytesize):
buffer = f.read(bytesize)
(l_value, ) = struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
f.seek(max(0, int(self.size) - 65536), 0)
for _ in range(65536 // bytesize):
buffer = f.read(bytesize)
(l_value, ) = struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF
f.close()
returnedhash = "%016x" % hash
return str(returnedhash)