Skip to content

Commit e6640e5

Browse files
author
remi.paucher
committed
- Centrage des caractères beaucoup plus rapide (boucle sur les fichiers dans le dossier faite en C++), mais spécifique windows
- Génération d'un modèles à partir de données simulées centrées avec plus de rotations => 77.5% de succès - Changement définitif de la manière de tester un modèle sur les captchas Egoshare => beaucoup plus rapide
1 parent 9abba2c commit e6640e5

File tree

9 files changed

+2075
-1676
lines changed

9 files changed

+2075
-1676
lines changed

C++ sources/Centrage caractères/main.cpp

Lines changed: 119 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,17 @@
77
#include <algorithm>
88
#include <sstream>
99

10+
#include <iostream>
11+
#include "windows.h"
12+
#include "stdio.h"
13+
#include "dos.h"
14+
#include <sstream>
15+
16+
#include <cstdlib>
17+
#include <conio.h>
18+
19+
20+
1021
#if defined (WIN32)
1122
#pragma comment(lib,"cv")
1223
#pragma comment(lib,"cvaux")
@@ -18,24 +29,23 @@
1829
using namespace std;
1930

2031

21-
#define WIDTH 38
22-
#define HEIGHT 31
23-
24-
32+
33+
string narrow( wstring& str )
34+
{
35+
ostringstream stm ;
36+
const ctype<char>& ctfacet =
37+
use_facet< ctype<char> >( stm.getloc() ) ;
38+
for( size_t i=0 ; i<str.size() ; ++i )
39+
stm << ctfacet.narrow( str[i], 0 ) ;
40+
return stm.str() ;
41+
}
2542

2643

2744

2845

29-
int main(int argc, char *argv[])
46+
void process_file(string filenameIN, int WIDTH, int HEIGHT)
3047
{
31-
//Noms de fichiers
32-
string filenameIN;
33-
if (argc < 2)
34-
filenameIN = "test.bmp";
35-
else
36-
filenameIN = argv[1];
37-
38-
48+
//cout << "processing file: " << filenameIN << endl;
3949
//Chargement de l'image
4050
IplImage *srcImg=0, *res=0;
4151
srcImg = cvLoadImage(filenameIN.c_str(),0);
@@ -69,19 +79,111 @@ int main(int argc, char *argv[])
6979
{
7080
for (int j=0; j<=ymax-ymin; ++j)
7181
{
72-
cvSet2D(res, offsety+j, offsetx+i, cvGet2D(srcImg, ymin+j, xmin+i));
82+
if ((offsety+j>0) && (offsety+j<res->height) && (offsetx+i>0) && (offsetx+i<res->width))
83+
cvSet2D(res, offsety+j, offsetx+i, cvGet2D(srcImg, ymin+j, xmin+i));
7384
}
7485
}
7586

87+
cvSaveImage(filenameIN.c_str(), res);
88+
89+
}
90+
91+
92+
93+
94+
95+
96+
97+
int main(int argc, char *argv[])
98+
{
99+
//Noms de fichiers
100+
string folder;
101+
102+
//cout << "ARGC: " << argc << endl;
103+
104+
int WIDTH;
105+
int HEIGHT;
106+
107+
if (argc < 2)
108+
{
109+
folder = "*";
110+
WIDTH = 38;
111+
HEIGHT = 31;
112+
}
113+
else
114+
{
115+
folder = argv[1];
116+
//cout << "filename IN " << folder << endl;
117+
118+
WIDTH = atoi(argv[2]);
119+
//cout << "WIDTH " << WIDTH << endl;
120+
121+
HEIGHT = atoi(argv[3]);
122+
//cout << "HEIGHT " << HEIGHT << endl;
123+
}
124+
125+
126+
string expression_bmp = folder + "\\*.bmp";
127+
std::vector<std::wstring> SongsLoaded;
128+
129+
//InitFunction
130+
WIN32_FIND_DATA findFileData;
131+
132+
133+
134+
char *expr = (char *) expression_bmp.c_str();
135+
wchar_t *lpfile = new wchar_t[200];
136+
mbstowcs(lpfile, expr, strlen(expr));
137+
138+
//cout << "sizeof: " << sizeof(expr) << endl;
139+
//cout << "EXPRESSION_BMP CHAR*: " << expr << endl;
140+
//wcout << "EXPRESSION_BMP: " << lpfile << endl;
141+
142+
HANDLE hFind = FindFirstFile(lpfile, &findFileData);
143+
144+
if(hFind == INVALID_HANDLE_VALUE)
145+
{
146+
//cout << "Could Not Find Any file in folder!" << endl;
147+
//system("pause");
148+
return 1;
149+
}
150+
151+
SongsLoaded.push_back(findFileData.cFileName);
152+
153+
while(FindNextFile(hFind, &findFileData))
154+
{
155+
SongsLoaded.push_back(findFileData.cFileName);
156+
//wcout << findFileData.cFileName << endl;
157+
}
158+
159+
FindClose(hFind);
160+
161+
for (int i=0; i<SongsLoaded.size(); ++i)
162+
{
163+
//wcout << "Traitement: " << SongsLoaded[i] << endl;
164+
165+
string filename = narrow(SongsLoaded[i]);
166+
filename = folder + "\\" + filename;
167+
168+
process_file(filename, WIDTH, HEIGHT);
169+
170+
//getchar();
171+
172+
}
173+
//cout << endl;
174+
175+
176+
76177

77-
//cout << "xmin: " << xmin << " xmax: " << xmax << endl;
78-
//cout << "ymin: " << ymin << " ymax: " << ymax << endl;
79178

80-
cvSaveImage(filenameIN.c_str(), res);
81179

82180
//system("pause");
83181

84182
return 0;
85183

86184

87185
}
186+
187+
188+
189+

Egoshare/Models/Stats.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,4 +152,9 @@ SIMULATION-BASED avec centrage des caract
152152
simulation_based_C=1000_KERNEL=2.svm 72.8291316527%
153153
simulation_based_C=1000_KERNEL=1.svm 73.6694677871%
154154
simulation_based_C=1000_KERNEL=1.svm 73.6694677871%
155-
simulation_based_C=1000_KERNEL=1.svm 73.6694677871%
155+
156+
Avec plus de rotations:
157+
-----------------------
158+
simulation_based_NEW_C=1000_KERNEL=2.svm 76.0504201681%
159+
simulation_based_NEW_C=1000_KERNEL=1.svm 77.4509803922%
160+

Egoshare/Models/simulation_based_C=1000_KERNEL=1.svm

Lines changed: 1929 additions & 1540 deletions
Large diffs are not rendered by default.

Egoshare_2_GenerateDB.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
if SIMULATION_BASED:
1313
DEFAULT_SIZE = (30, 30)
1414
GENERATE_TRAINING_SET = True
15-
GENERATE_VALIDATION_SET = True
15+
GENERATE_VALIDATION_SET = False
1616

1717
if GENERATE_TRAINING_SET:
1818
print """
@@ -35,13 +35,12 @@
3535
SCALE_MIN = 17
3636
SCALE_MAX = 22
3737
STEP = 1
38-
ALIGN_RANGEY = [0.7]
38+
ALIGN_RANGEY = [0.5]
3939
ALIGN_RANGEX = [0.5]
40-
SEUIL_RANGE = [160, 180, 200]
41-
ROTATIONS = [2, 4, 6, 9, 13, 17, 20, 22, 25, 27]
42-
FONTS = [("Fonts/comic.ttf", (140, 160)),
40+
ROTATIONS = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
41+
FONTS = [("Fonts/comic.ttf", (145, 163)),
4342
("Fonts/vera.ttf", (160, 180)),
44-
("Fonts/califb.ttf", (160, 180))]
43+
("Fonts/califb.ttf", (171, 191))]
4544
Generate_Set(DESTINATION_FOLDER,CLEAN_DESTINATION_FOLDER,DISTORTION_W_MIN,DISTORTION_W_MAX,DISTORTION_H_MIN,
4645
DISTORTION_H_MAX,SCALE_MIN,SCALE_MAX,STEP, elem_to_gen, FONTS, ALIGN_RANGEX, ALIGN_RANGEY, DEFAULT_SIZE, ROTATIONS)
4746

Egoshare_3_TrainTestSVM.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import time
66

77

8-
GENERATE_CAPTCHA_BASED_MODELS = True
9-
GENERATE_SIMULATION_BASED_MODELS = False
8+
GENERATE_CAPTCHA_BASED_MODELS = False
9+
GENERATE_SIMULATION_BASED_MODELS = True
1010
VERBOSE = 0
1111
MODEL_FOLDER = 'Egoshare/Models'
1212

@@ -24,7 +24,7 @@
2424
#Génération du modèle
2525
execfile("Train & Test SVM.py")
2626
#Test du modèle
27-
execfile("Egoshare_5_Perf.py")
27+
execfile("Egoshare_5_TestPerf.py")
2828

2929

3030
if GENERATE_SIMULATION_BASED_MODELS:
@@ -36,8 +36,10 @@
3636

3737
for C in CRANGE:
3838
for KERNEL in KERNEL_TYPE:
39-
MODEL_FILE = "simulation_based_C="+str(C)+"_KERNEL="+str(KERNEL)+".svm"
39+
MODEL_FILE = "simulation_based_NEW_C="+str(C)+"_KERNEL="+str(KERNEL)+".svm"
4040
#Génération du modèle
4141
execfile("Train & Test SVM.py")
42-
42+
#Test du modèle
43+
execfile("Egoshare_5_TestPerf.py")
44+
4345
raw_input()

Egoshare_5_Perf.py

Lines changed: 0 additions & 35 deletions
This file was deleted.

Egoshare_5_Perf_bis.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

centrage.exe

11 KB
Binary file not shown.

characters_center.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,14 @@
1010

1111
for folder, subfolders, files in os.walk(TRAINING_FOLDER):
1212
loaded = False
13-
for file in [file for file in files if 'bmp' in file]:
14-
if not loaded:
15-
print "folder", folder, "loaded"
16-
loaded = True
17-
18-
print file
19-
20-
if os.name == "nt":
21-
filename = os.path.join(os.getcwd(), folder, file)
22-
command = '""'+os.path.join(os.getcwd(), 'centrage.exe" "'+filename+'" '+str(WIDTH)+' '+str(HEIGHT)+'"')
23-
else:
24-
#Some Linux stuff :)
25-
pass
26-
27-
os.system(command)
28-
29-
3013

14+
print folder
3115

16+
if os.name == "nt":
17+
command = 'centrage.exe "%s" %d %d'%(folder, WIDTH, HEIGHT)
18+
else:
19+
#Some Linux stuff :)
20+
pass
21+
22+
os.system(command)
23+

0 commit comments

Comments
 (0)