-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnamegen.py
142 lines (109 loc) · 4.86 KB
/
namegen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
Copyright 2010 Joao Henriques <jotaf (no spam) at hotmail dot com>.
This file is part of name-gen.
name-gen is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
name-gen is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with name-gen. If not, see
<http://www.gnu.org/licenses/>.
"""
import itertools
import random
import locale
class NameGen:
"""
name-gen: Free python name generator module that analyzes sample text and produces
similar words.
Usage:
1. Initialize with path to language file (generated using 'namegen_training.py').
2. Call gen_word() method, returns generated string.
Optional:
. Change min_syl and max_syl to control number of syllables.
. Pass the sample file as 2nd parameter at initialization to set it as the list
of forbidden words. No words from the sample will be replicated.
. Pass True as the 1st parameter to name_gen() to add the generated word to the
list of forbidden words. The word will not occur again.
"""
def __init__(self, language_file, forbidden_file = None):
self.min_syl = 2
self.max_syl = 4
#load language file
with open(language_file, 'r') as f:
lines = [line.strip() for line in f.readlines()]
self.syllables = lines[0].split(',') #first line, list of syllables
starts_ids = [int(n) for n in lines[1].split(',')] #next 2 lines, start syllable indexes and counts
starts_counts = [int(n) for n in lines[2].split(',')]
self.starts = zip(starts_ids, starts_counts) #zip into a list of tuples
ends_ids = [int(n) for n in lines[3].split(',')] #next 2, same for syllable ends
ends_counts = [int(n) for n in lines[4].split(',')]
self.ends = zip(ends_ids, ends_counts)
#starting with the 6th and 7th lines, each pair of lines holds ids and counts
#of the "next syllables" for a previous syllable.
self.combinations = []
for (ids_str, counts_str) in zip(lines[5:None:2], lines[6:None:2]):
if len(ids_str) == 0 or len(counts_str) == 0: #empty lines
self.combinations.append([])
else:
line_ids = [int(n) for n in ids_str.split(',')]
line_counts = [int(n) for n in counts_str.split(',')]
self.combinations.append(zip(line_ids, line_counts))
#load forbidden words file if needed
if forbidden_file is None:
self.forbidden = ''
else:
self.forbidden = _load_sample(forbidden_file)
def gen_word(self, no_repeat = False):
#random number of syllables, the last one is always appended
num_syl = random.randint(self.min_syl, self.max_syl - 1)
#turn ends list of tuples into a dictionary
ends_dict = dict(self.ends)
#we may have to repeat the process if the first "min_syl" syllables were a bad choice
#and have no possible continuations; or if the word is in the forbidden list.
word = []; word_str = ''
while len(word) < self.min_syl or self.forbidden.find(word_str) != -1:
#start word with the first syllable
syl = _select_syllable(self.starts, 0)
word = [self.syllables[syl]]
for i in range(1, num_syl):
#don't end yet if we don't have the minimum number of syllables
if i < self.min_syl: end = 0
else: end = ends_dict.get(syl, 0) #probability of ending for this syllable
#select next syllable
syl = _select_syllable(self.combinations[syl], end)
if syl is None: break #early end for this word, end syllable was chosen
word.append(self.syllables[syl])
else: #forcefully add an ending syllable if the loop ended without one
syl = _select_syllable(self.ends, 0)
word.append(self.syllables[syl])
word_str = ''.join(word)
#to ensure the word doesn't repeat, add it to the forbidden words
if no_repeat: self.forbidden = self.forbidden + '\n' + word_str
return word_str.capitalize()
def _select_syllable(counts, end_count):
if len(counts) == 0: return None #no elements to choose from
#"counts" holds cumulative counts, so take the last element in the list
#(and 2nd in that tuple) to get the sum of all counts
chosen = random.randint(0, counts[-1][1] + end_count)
for (syl, count) in counts:
if count >= chosen:
return syl
return None
def _load_sample(filename):
#get sample text
with open(filename, 'r') as f:
sample = ''.join(f.readlines()).lower()
#convert accented characters to non-accented characters
sample = locale.strxfrm(sample)
#remove all characters except letters from A to Z
a = ord('a')
z = ord('z')
sample = ''.join([
c if (ord(c) >= a and ord(c) <= z) else ' '
for c in sample])
return sample