-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmflock.c
316 lines (282 loc) · 9.8 KB
/
mflock.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
/*
This file is part of ESFS, a FUSE-based filesystem that supports snapshots.
ESFS is Copyright (C) 2013 Elod Csirmaz
<http://www.epcsirmaz.com/> <https://github.com/csirmaz>.
ESFS is based on Big Brother File System (fuse-tutorial)
Copyright (C) 2012 Joseph J. Pfeiffer, Jr., Ph.D. <[email protected]>,
and was forked from it on 21 August 2013.
Big Brother File System can be distributed under the terms of
the GNU GPLv3. See the file COPYING.
See also <http://www.cs.nmsu.edu/~pfeiffer/fuse-tutorial/>.
Big Brother File System was derived from function prototypes found in
/usr/include/fuse/fuse.h
Copyright (C) 2001-2007 Miklos Szeredi <[email protected]>
fuse.h is licensed under the LGPLv2.
ESFS is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
ESFS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* NOTE: A Perl script is used to replace $ with esfs_ and $$ with ESFS_
* in this file. To write $, use \$.
*/
/* We implement a mutex-based locking mechanism here that allows
* $$LOCK_NUM files to be written in parallel by different threads,
* while only one thread may write a file at any time.
*
* We need this restriction to ensure that when a block is saved in the dat file,
* only one thread is writing that file. This applies even if the dat file
* is a journal storing possibly more changes to the same block, and
* if it's opened with O_APPEND.
*
* We set up a limited number of locks here to save the overhead of a hash
* storage (from files(e.g. inodes) to a mutex). Mutexes here are organised in a table
* and can be labelled. We need to protect changes to the table
* so that a label and a mutex would function as a unit, but instead of
* locking the whole table whenever it changes, we use MOD mutexes
* which only lock changes related to certain labels. There are $$LOCK_NUM
* MOD mutexes, and MOD mutex i is used when label % $$LOCK_NUM == i.
*
* Moreover, to save the overhead or re-labelling mutexes, there is a system
* where mutexes can be handed over to other threads if they are waiting
* for the mutex on the same label. This is done using the 'want' member.
*
* Rules of thumb:
* - Reverse procedures should be symmetrical
* - Re-check expected condition after getting mutex
*
*/
/* GET LOCK:
*
* START:
* if(label is in table -> i){
* want i++
* get mutex i
* want i--
* recheck: if(label i != label){
* goto START
* }
* return i
* }
*
* // label is not in table
* get MOD mutex
* recheck if(label is in table -> i){
* release MOD mutex
* want i++
* get mutex i
* want i--
* recheck: if(label i != label){
* goto START
* }
* return i
* }
*
* FIND:
* try all non-labelled mutexes in a loop until able to get one -> i
* recheck: if(label no longer empty){
* // we must be in the middle of a handover
* release mutex i
* goto FIND: - OR - release MOD mutex; goto START:
* }
* label := new label
* release MOD mutex
* return i
*
* RELEASE LOCK:
* if(want==0){
* get MOD mutex
* recheck if (want==0){
* label := 0
* release mutex
* release MOD mutex
* return
* }
* release MOD mutex
* }
* release mutex
* return
*
*
* Consequences:
* - an label can only be added to / removed from the table if its MOD mutex is held,
* AND the mutex being labelled is held
*
* A labels the mutex and hands it over to B:
*
* MOD -----AAAAAAAAAAAA--|--------------|--BBBBBBBBBB-----
* LABEL ------------+++++++|++++++++++++++|+++++------------
* MUTEX ---------AAAAAAAAAA|AAAAAAAA--BBBB|BBBBBBBB---------
* WANT -------------------|---111111111--|-----------------
*
* Handover fails because want is set too late by B. B starts over.
*
* MOD -----AAAAAAAAAAAA--|-B--AAAAAAA-----
* LABEL ------------+++++++|++++++----------
* MUTEX ---------AAAAAAAAAA|AAAAAAAA--------
* WANT -------------------|----------111---
*
* A and B try to add the same label, but A gets the MOD mutex first,
* so it becomes a handover:
*
* MOD --AAAAAAAAAA--BB-----------
* LABEL1 -------++++++++++++++++++++
* MUTEX1 -----AAAAAAAAAAAAAAA-BBBBBB
* WANT1 ------------------11111----
*
*
*/
/** Allocates memory and initialises the mutexes.
*
* Returns
* * 0 on success
* * -errno on error
*/
static int $mflock_init(struct $fsdata_t *fsdata)
{
int i;
pthread_mutexattr_t mutexattr;
pthread_mutexattr_init(&mutexattr);
pthread_mutexattr_settype(&mutexattr, $$MUTEXT_TYPE);
fsdata->mflocks = malloc(sizeof(struct $mflock_t) * $$LOCK_NUM);
if(fsdata->mflocks == NULL) { return -ENOMEM; }
for(i = 0; i < $$LOCK_NUM; i++) {
pthread_mutex_init(&(fsdata->mflocks[i].mutex), &mutexattr);
pthread_mutex_init(&(fsdata->mflocks[i].mod_mutex), &mutexattr);
fsdata->mflocks[i].label = 0;
fsdata->mflocks[i].want = 0;
}
pthread_mutexattr_destroy(&mutexattr);
return 0;
}
static int $mflock_destroy(struct $fsdata_t *fsdata)
{
int i;
for(i = 0; i < $$LOCK_NUM; i++) {
pthread_mutex_destroy(&(fsdata->mflocks[i].mutex));
pthread_mutex_destroy(&(fsdata->mflocks[i].mod_mutex));
}
free(fsdata->mflocks);
return 0;
}
/** Gets a lock for a particular label
*
* label==0 means that the lock is not in use, so it cannot be used here
*
* Returns:
* * lock number on success (>=0)
* * -errno on error
*/
static int $mflock_lock(struct $fsdata_t *fsdata, $$LOCKLABEL_T label)
{
int i;
int ret;
int ml = -1;
struct $mflock_t *mylock;
pthread_mutex_t *modmutex = NULL;
struct timespec delay = { 0, 10000000 }; // nanoseconds: 1 000 000 000
while(1) { // START:
mylock = NULL;
for(i = 0; i < $$LOCK_NUM; i++) {
if(fsdata->mflocks[i].label == label) {
ml = i;
mylock = &(fsdata->mflocks[i]);
break;
}
}
if(mylock != NULL) { // if label is in the table
if(unlikely(modmutex != NULL)) {
// recheck failed; release the modmutex and carry on as usual
pthread_mutex_unlock(modmutex);
modmutex = NULL;
}
(mylock->want)++; // request handover
pthread_mutex_lock(&(mylock->mutex));
(mylock->want)--;
if(likely(mylock->label == label)) { // recheck
// We have (successfully taken over) the mutex and it's labelled with the label
$dlogdbg("_lock: got lock '%d' for label '%lu' (takeover)\n", ml, label);
return ml;
}
// If recheck fails, retry from start
$$SLEEP
continue;
}
// label is not in the table:
// get the modmutex and re-check that the label is still not in the table
if(modmutex == NULL) { // If we don't yet have the modmutex
modmutex = &(fsdata->mflocks[label & ($$LOCK_NUM - 1)].mod_mutex);
if(unlikely((ret = pthread_mutex_lock(modmutex)) != 0)) { return -ret; }
continue; // Start over for a re-check.
}
// We have the modmutex and label is not in the table
// try all non-labelled locks in the table
while(1) { // FIND:
ml = -1;
for(i = 0; i < $$LOCK_NUM; i++) {
if(likely(fsdata->mflocks[i].label == 0)) {
ret = pthread_mutex_trylock(&(fsdata->mflocks[i].mutex));
if(likely(ret == 0)) { // got the lock
ml = i;
break;
} else if(ret == EBUSY) { // lock is busy
continue;
} else { // error
return -ret;
}
}
}
if(ml > -1) { // managed to get a mutex
// recheck if the mutex is still unlabelled
if(likely(fsdata->mflocks[ml].label == 0)) {
// Success
fsdata->mflocks[ml].label = label;
if(unlikely((ret = pthread_mutex_unlock(modmutex)) != 0)) { return -ret; }
$dlogdbg("_lock: got lock '%d' for label '%lu'\n", ml, label);
return ml;
}
// if recheck fails, release the mutex and continue from FIND
// (this must be the middle of a handover)
if(unlikely((ret = pthread_mutex_unlock(&(fsdata->mflocks[i].mutex))) != 0)) { return -ret; }
}
$$SLEEP
} // end FIND
} // end START
return -EIO; // unreachable
}
/** Release a lock
*
* Returns:
* * 0 on success
* * -errno on error
*/
static int $mflock_unlock(struct $fsdata_t *fsdata, int lockid)
{
int ret;
struct $mflock_t *mylock;
pthread_mutex_t *modmutex;
$dlogdbg("_lock: releasing lock '%d'\n", lockid);
mylock = &(fsdata->mflocks[lockid]);
if(mylock->want == 0) { // no one wants a handover
modmutex = &(fsdata->mflocks[mylock->label & ($$LOCK_NUM - 1)].mod_mutex);
if(unlikely((ret = pthread_mutex_lock(modmutex)) != 0)) { return -ret; }
if(mylock->want == 0) { // recheck
mylock->label = 0;
if(unlikely((ret = pthread_mutex_unlock(&(mylock->mutex))) != 0)) { return -ret; }
if(unlikely((ret = pthread_mutex_unlock(modmutex)) != 0)) { return -ret; }
return 0;
}
// oops - they want a handover
if(unlikely((ret = pthread_mutex_unlock(modmutex)) != 0)) { return -ret; }
}
// handover - release lock without removing label
if(unlikely((ret = pthread_mutex_unlock(&(mylock->mutex))) != 0)) { return -ret; }
return 0;
}