-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathocrDemo.py
73 lines (55 loc) · 2.05 KB
/
ocrDemo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding:utf-8 -*-
"""
用dhash判断是否相同照片
基于渐变比较的hash
hash可以省略(本文省略)
By Guanpx
"""
from PIL import Image
from os import listdir
def picPostfix(): # 图片后缀的集合
postFix = set()
postFix.update(['bmp', 'jpg', 'png', 'tiff', 'gif', 'pcx', 'tga', 'exif',
'fpx', 'svg', 'psd', 'cdr', 'pcd', 'dxf', 'ufo', 'eps', 'JPG', 'raw', 'jpeg'])
return postFix
def getDiff(width, high, image): # 将要裁剪成w*h的image照片 得到渐变序列
diff = []
im = image.resize((width, high))
imgray = im.convert('L') # 转换为灰度图片 便于处理
pixels = list(imgray.getdata()) # 得到像素数据 灰度0-255
for row in range(high):
rowStart = row * width # 起始位置行号
for index in range(width - 1):
leftIndex = rowStart + index # 当前位置号
rightIndex = leftIndex + 1
diff.append(pixels[leftIndex] > pixels[rightIndex])
return diff
def getHamming(diff=[], diff2=[]):
# print len(diff)
hamming_distance = 0
for i in range(len(diff)):
if diff[i] != diff2[i]:
hamming_distance += 1
return hamming_distance
if __name__ == '__main__':
width = 32
high = 32
dirName = "F:\pictest2" # 相册路径
allDiff = []
postFix = picPostfix()
dirList = listdir(dirName)
cnt = 0
for i in dirList:
cnt += 1
print cnt
if str(i).split('.')[-1] in postFix: # 判断后缀是不是照片格式
im = Image.open(r'%s\%s' % (dirName, unicode(str(i), "utf-8")))
diff = getDiff(width, high, im)
allDiff.append((str(i), diff))
# print len(allDiff)
for i in range(len(allDiff)):
for j in range(i + 1, len(allDiff)):
if i != j:
ans = getHamming(allDiff[i][1], allDiff[j][1])
if ans <= 5:
print allDiff[i][0], "and", allDiff[j][0], "maybe same photo..."