-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmodel_utils.lua
215 lines (181 loc) · 6.64 KB
/
model_utils.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
require 'torch'
local model_utils = {}
function model_utils.combine_all_parameters(...)
--[[ like module:getParameters, but operates on many modules ]]--
-- get parameters
local networks = {...}
local parameters = {}
local gradParameters = {}
for i = 1, #networks do
local net_params, net_grads = networks[i]:parameters()
if net_params then
for _, p in pairs(net_params) do
parameters[#parameters + 1] = p
end
for _, g in pairs(net_grads) do
gradParameters[#gradParameters + 1] = g
end
end
end
local function storageInSet(set, storage)
local storageAndOffset = set[torch.pointer(storage)]
if storageAndOffset == nil then
return nil
end
local _, offset = unpack(storageAndOffset)
return offset
end
-- this function flattens arbitrary lists of parameters,
-- even complex shared ones
local function flatten(parameters)
if not parameters or #parameters == 0 then
return torch.Tensor()
end
local Tensor = parameters[1].new
local storages = {}
local nParameters = 0
for k = 1,#parameters do
local storage = parameters[k]:storage()
if not storageInSet(storages, storage) then
storages[torch.pointer(storage)] = {storage, nParameters}
nParameters = nParameters + storage:size()
end
end
local flatParameters = Tensor(nParameters):fill(1)
local flatStorage = flatParameters:storage()
for k = 1,#parameters do
local storageOffset = storageInSet(storages, parameters[k]:storage())
parameters[k]:set(flatStorage,
storageOffset + parameters[k]:storageOffset(),
parameters[k]:size(),
parameters[k]:stride())
parameters[k]:zero()
end
local maskParameters= flatParameters:float():clone()
local cumSumOfHoles = flatParameters:float():cumsum(1)
local nUsedParameters = nParameters - cumSumOfHoles[#cumSumOfHoles]
local flatUsedParameters = Tensor(nUsedParameters)
local flatUsedStorage = flatUsedParameters:storage()
for k = 1,#parameters do
local offset = cumSumOfHoles[parameters[k]:storageOffset()]
parameters[k]:set(flatUsedStorage,
parameters[k]:storageOffset() - offset,
parameters[k]:size(),
parameters[k]:stride())
end
for _, storageAndOffset in pairs(storages) do
local k, v = unpack(storageAndOffset)
flatParameters[{{v+1,v+k:size()}}]:copy(Tensor():set(k))
end
if cumSumOfHoles:sum() == 0 then
flatUsedParameters:copy(flatParameters)
else
local counter = 0
for k = 1,flatParameters:nElement() do
if maskParameters[k] == 0 then
counter = counter + 1
flatUsedParameters[counter] = flatParameters[counter+cumSumOfHoles[k]]
end
end
assert (counter == nUsedParameters)
end
return flatUsedParameters
end
-- flatten parameters and gradients
local flatParameters = flatten(parameters)
local flatGradParameters = flatten(gradParameters)
-- return new flat vector that contains all discrete parameters
return flatParameters, flatGradParameters
end
function model_utils.clone_many_times(net, T)
local clones = {}
local params, gradParams
if net.parameters then
params, gradParams = net:parameters()
if params == nil then
params = {}
end
end
local paramsNoGrad
if net.parametersNoGrad then
paramsNoGrad = net:parametersNoGrad()
end
local mem = torch.MemoryFile("w"):binary()
mem:writeObject(net)
for t = 1, T do
-- We need to use a new reader for each clone.
-- We don't want to use the pointers to already read objects.
local reader = torch.MemoryFile(mem:storage(), "r"):binary()
local clone = reader:readObject()
reader:close()
if net.parameters then
local cloneParams, cloneGradParams = clone:parameters()
local cloneParamsNoGrad
for i = 1, #params do
cloneParams[i]:set(params[i])
cloneGradParams[i]:set(gradParams[i])
end
if paramsNoGrad then
cloneParamsNoGrad = clone:parametersNoGrad()
for i =1,#paramsNoGrad do
cloneParamsNoGrad[i]:set(paramsNoGrad[i])
end
end
end
clones[t] = clone
collectgarbage()
end
mem:close()
return clones
end
function model_utils.clone_many_times_multiple_nngraph(net, T)
local clones = {}
local params_table = {}
local gradParams_table = {}
local paramsNoGrad_table = {}
for i=1, table.getn(net) do
if net[i].parameters then
local params, gradParams = net[i]:parameters()
if params == nil then
params = {}
end
table.insert(params_table, params)
table.insert(gradParams_table, gradParams)
end
if net[i].parametersNoGrad then
paramsNoGrad = net:parametersNoGrad()
table.insert(paramsNoGrad_table, paramsNoGrad)
end
end
local mem = torch.MemoryFile("w"):binary()
mem:writeObject(net)
for t = 1, T do
-- We need to use a new reader for each clone.
-- We don't want to use the pointers to already read objects.
local reader = torch.MemoryFile(mem:storage(), "r"):binary()
local clone = reader:readObject()
reader:close()
for idx=1, table.getn(net) do
local curr_net = net[idx]
if curr_net.parameters then
local cloneParams, cloneGradParams = clone[idx]:parameters()
local cloneParamsNoGrad
for i = 1, #params_table[idx] do
cloneParams[i]:set(params_table[idx][i])
cloneGradParams[i]:set(gradParams_table[idx][i])
end
if paramsNoGrad_table[idx] then
cloneParamsNoGrad = clone[idx]:parametersNoGrad()
for i =1,#paramsNoGrad[idx] do
cloneParamsNoGrad[i]:set(paramsNoGrad_table[idx][i])
end
end
end
end
clones[t] = clone
collectgarbage()
end
mem:close()
return clones
end
return model_utils