-
Notifications
You must be signed in to change notification settings - Fork 0
/
sw_tracking.py
797 lines (675 loc) · 31 KB
/
sw_tracking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline
from scipy.spatial.distance import euclidean
import scipy.linalg
def isInPolygon(testX, testY, vertX, vertY, addedStart=None, addedEnd=None):
"""Determines whether point(testX, testY) is within the polygon defined by
ordered vertices (vertX[i], vertY[i]). An additional starting or ending
point can be supplemented if needed.
Note: testing points on the edges may be considered inside or outside
depending on the edges.
"""
# insert the added starting or ending points if provided
if addedStart is not None:
vertX = np.insert(vertX, 0, addedStart[0])
vertY = np.insert(vertY, 0, addedStart[1])
if addedEnd is not None:
vertX = np.append(vertX, addedEnd[0])
vertY = np.append(vertY, addedEnd[1])
# Initialize a condition variable "c" as outside of the polygon, then
# flip its value at each time when a horizontal semi-infinate ray from
# the testing point (increasing x, fixed y) crosses an edge. Odd and even
# number of crossings mean "inside" and "outside", respectively.
# The first judging condition determines whether the test point falls
# within the y range of the evaluating edge. Importantly, it also ensures
# that vertY[j]-vertY[i] is not 0. If testy is within the y range of edge,
# it then determines whether the on-edge point at y-level "testY" is to
# the right of testX, e.g. whether testX < xOnEdge.
c = False
nVert = len(vertX)
for i in range(nVert):
j = i + 1
if j == nVert: j = 0 # last point connects back to first point
if ( (vertY[i]>testY) != (vertY[j]>testY) ):
slopeEdge = (vertX[j]-vertX[i]) / (vertY[j]-vertY[i])
xOnEdge = slopeEdge * (testY-vertY[i]) + vertX[i]
if testX < xOnEdge:
c = not c
# print(c)
return c
def getDistance(spotCoor, meshDF, THRESHOLD_DIS=None):
"""
Calculate the minimum distance between a spot and a mesh grid
Inputs
------
spotCoor:
A list or 1-D numpy array of the spot coordinate [x, y, z]
x, y, z coordinates are calibrated (usually in microns)
meshDF:
A pandas data frame specifying the mesh grid
Returns
-------
A tuple of 3 items:
1. The minimum distance between the spot and the mesh grid
2. The path position of the grid point with the minimal distance
3. The z position of the grid point with the minimal distance
"""
# when a threshold is given, only search the +- threshold cube within spot
if THRESHOLD_DIS is not None:
meshDF = meshDF.loc[meshDF.x >= spotCoor[0] - THRESHOLD_DIS]
meshDF = meshDF.loc[meshDF.x <= spotCoor[0] + THRESHOLD_DIS]
meshDF = meshDF.loc[meshDF.y >= spotCoor[1] - THRESHOLD_DIS]
meshDF = meshDF.loc[meshDF.y <= spotCoor[1] + THRESHOLD_DIS]
if len(meshDF.x.values) == 0:
return np.inf, np.inf, np.inf
meshGrids = np.c_[meshDF.x.values, meshDF.y.values, meshDF.z.values]
dis = [euclidean(spotCoor, grid) for grid in meshGrids]
# dis = [np.linalg.norm(spotCoor - grid) for grid in meshGrids]
index_min = np.argmin(dis)
distance = dis[index_min]
pathPosMin = meshDF.pathPos.iloc[index_min]
zPosMin = meshDF.zPos.iloc[index_min]
return distance, pathPosMin, zPosMin
def getDistance2meshes(spotCoor, meshDF1, meshDF2):
"""
Calculate the minimum distance between a spot and a mesh grid
Inputs
------
spotCoor:
A list or 1-D numpy array of the spot coordinate [x, y, z]
x, y, z coordinates are calibrated (usually in microns)
meshDF1 and meshDF2:
Pandas data frames specifying two mesh grids
Returns
-------
The minimum distance between the spot and the two mesh grids.
"""
dist1 = getDistance(spotCoor, meshDF1)
dist2 = getDistance(spotCoor, meshDF2)
return np.min([dist1, dist2])
def f_getDistance2meshes(x):
'''
A wrapper function to accept a list of parameters for parallel processing
Input
-----
x[0]: a spot coordinate (list or numpy array of x, y, z positions)
x[1]: first mesh dataframe
x[2]: second mesh dataframe
Returns
-------
A number representing the shortest distance of a spot to the 2 meshes
'''
return getDistance2meshes(x[0], x[1], x[2])
def refineDistance(spotCoor, p0, p1, p2):
"""Takes the spot coordinate and 3 corners of a triangle, first
fit a plane of the triganle, then find the distance of the spot to
the plane defined by a formular. Return the distance and foot point.
"""
if np.all(p0 == np.inf) or np.all(p1 == np.inf) or np.all(p2 == np.inf):
refinedDis = np.inf
footCoor = np.array([np.inf, np.inf, np.inf])
return refinedDis, footCoor
spotCoor, p0, p1, p2 = np.array(spotCoor), np.array(p0), np.array(p1), np.array(p2)
v0, v1, v2 = spotCoor - p0, p1 - p0, p2 - p0
# print(v0, v1, v2)
# If spotCoor falls on p0, then distance is 0, spotCoor is the foot point
if np.all(v0 == 0):
refinedDis = 0
footCoor = spotCoor.flatten()
return refinedDis, footCoor
# If v0 is perpendicular to the plane, then lenth of v0 is the distance,
# and p0 is the foot point
if np.all(v0*v1 == 0) and np.all(v0*v2 == 0):
refinedDis = euclidean(v0, np.zeros_like(v0))
footCoor = p0.flatten()
return refinedDis, footCoor
# If the trivial cases are not true, we will first compute the norm
# vector that is perpendicular to the plane.
# print("checkpoint")
A0 = np.c_[v1, v2].T
A = A0[:, 0:2] # note that index here is left included, right excluded
b = A0[:, 2] * -1
x = scipy.linalg.solve(A, b) # solve Ax=b
# print(np.allclose(np.dot(A, x), b)) # check wehther solution is good
vNorm = np.array([x[0], x[1], 1.0])
A = np.c_[vNorm, v1, v2].T
b = np.array([np.dot(vNorm, p0), np.dot(v1, spotCoor), np.dot(v2, spotCoor)]).T
x = scipy.linalg.solve(A, b) # solve Ax=b
# print(np.allclose(np.dot(A, x), b)) # check wehther solution is good
footCoor = x.flatten()
vFoot = footCoor - p0
A = A0[:, 0:2] # note that index here is left included, right excluded
b = vFoot[0:2]
x = scipy.linalg.solve(A, b) # solve x1*v1 + x2*v2 = vFoot
# print(np.allclose(np.dot(A, x), b)) # check wehther solution is good
# determine whether foot point is inside the triangle
if x[0] >= 0 and x[1] >= 0 and (x[0] + x[1]) <= 1:
# print("Foot is inside the triangle!")
refinedDis = euclidean(spotCoor,footCoor)
return refinedDis, footCoor
else:
# print("Foot is outside the triangle!")
vertices = np.array([p0, p1, p2])
# dis = [np.linalg.norm(spotCoor - p) for p in vertices]
dis = [euclidean(spotCoor, p) for p in vertices]
tempIndex = np.argmin(dis)
footCoor = vertices[tempIndex].flatten()
refinedDis = dis[tempIndex]
return refinedDis, footCoor
def getGridCoor(meshDF, zPos, pathPos):
""" Return the grid coordinates at zPos and pathPos assuming meshDF is a
mesh data frame at a single time point.
"""
temp = meshDF
temp = temp.loc[temp.zPos == zPos]
temp = temp.loc[temp.pathPos == pathPos]
if len(temp.x.values) > 1:
exit('Mesh contains multiple (zPos, pathPos)!')
if len(temp.x.values) == 0:
WARNING: ('Requested (zPos, pathPos) is outside of the mesh!')
gridCoor = np.array([np.inf, np.inf, np.inf])
# gridCoor = [np.inf, np.inf, np.inf]
else:
gridCoor = np.c_[temp.x.values, temp.y.values, temp.z.values].flatten()
# gridCoor = [temp.x.values, temp.y.values, temp.z.values]
return gridCoor
def getDisFoot(spotCoor, meshDF, pathPos0=None, zPos0=None):
""" Takes the spot coordinate and the mesh data, calculate the minimum
distance of existing grids, then refine the distance by considering 4
adjacent triangle planes sharing the initial foot point. Select the minimum
of the 4 refined distances. Return the foot point coordinate.
Note: spotCoor is ideally 1-D numpy array.
"""
if pathPos0 is None or zPos0 is None:
_, pathPos0, zPos0 = getDistance(spotCoor, meshDF)
p0 = getGridCoor(meshDF, zPos0, pathPos0)
p1 = getGridCoor(meshDF, zPos0 + 1, pathPos0)
p2 = getGridCoor(meshDF, zPos0, pathPos0 + 1)
p3 = getGridCoor(meshDF, zPos0 - 1, pathPos0)
p4 = getGridCoor(meshDF, zPos0, pathPos0 - 1)
# print(spotCoor)
# print(p0, p1, p2, p3, p4)
dis1, foot1 = refineDistance(spotCoor, p0, p1, p2)
dis2, foot2 = refineDistance(spotCoor, p0, p2, p3)
dis3, foot3 = refineDistance(spotCoor, p0, p3, p4)
dis4, foot4 = refineDistance(spotCoor, p0, p4, p1)
disList = [dis1, dis2, dis3, dis4]
footList = [foot1, foot2, foot3, foot4]
dis = min(disList)
foot = footList[np.argmin(disList)]
return dis, foot
def decomposeVector(v1, v2):
"""Project vector v2 onto the direction of vector v1.
Return vPara (same direction as v1) and vPerpend (perpendicular
to the direction of v1)
"""
v1, v2 = np.array(v1).flatten(), np.array(v2).flatten()
if len(v1) == 1 or len(v2) == 1:
exit('Both vectors have to be at least 2-D!')
if len(v1) != len(v2):
exit('The two vectors need to have the same length!')
v1_norm = v1 / euclidean(v1, [0, 0, 0])
vPara = v1_norm * np.dot(v1_norm, v2)
vPerpend = v2 - vPara
# print(np.dot(vPara, vPerpend))
return vPara, vPerpend
def splinePath(x, y, N, z=None):
"""Takes a set of (x,y) coordinates, spline along z if provided,
otherwise along its path. Finally, interpolate at N equal
distance points. Return 2 or 3 size N numpy arrays representing
interpolated N (x,y) or (x,y,z) coordinates.
"""
if z is None:
dr = (np.diff(x)**2 + np.diff(y)**2)**.5 # segment lengths
r = np.zeros_like(x)
r[1:] = np.cumsum(dr) # integrate path
r_int = np.linspace(0, r.max(), N) # regular spaced path
z, z_int = r, r_int
splx = UnivariateSpline(z, x, s=0, ext='const') # interpolate
# splx.set_smoothing_factor(0.5)
x_int = splx(z_int)
sply = UnivariateSpline(z, y, s=0, ext='const')
# sply.set_smoothing_factor(0.5)
y_int = sply(z_int)
return x_int, y_int
if z is not None:
# check whether z is increasing -- requied for spline interpolate
dz = np.diff(z)
if not np.all(dz >= 0):
exit('z for splinePath is not increasing!')
z_int = np.linspace(z.min(), z.max(), N)
splx = UnivariateSpline(z, x, s=0, ext='const') # interpolate
# splx.set_smoothing_factor(0.5)
x_int = splx(z_int)
sply = UnivariateSpline(z, y, s=0, ext='const')
# sply.set_smoothing_factor(0.5)
y_int = sply(z_int)
return x_int, y_int, z_int
def discretizePath(x, y, N, z=None):
"""Similar to splinePath, but do linear interpolation instead of spline.
Takes a set of (x,y) coordinates, interpolate N points along z if provided,
otherwise along its path. Return 2 or 3 size N numpy arrays representing
interpolated N (x,y) or (x,y,z) coordinates.
"""
if z is None:
dr = (np.diff(x)**2 + np.diff(y)**2)**.5 # segment lengths
r = np.zeros_like(x)
r[1:] = np.cumsum(dr) # integrate path
r_int = np.linspace(0, r.max(), N) # regular spaced path
x_int = np.interp(r_int, r, x) # interpolate
y_int = np.interp(r_int, r, y)
return x_int, y_int
if z is not None:
z_int = np.linspace(z.min(), z.max(), N)
x_int = np.interp(z_int, z, x) # interpolate
y_int = np.interp(z_int, z, y)
return x_int, y_int, z_int
def meshFromPolylines(inputFolder, N=100, N_CROSS=30, VOXEL_DEPTH=2.0):
"""Takes a list of .txt files storing ordered coordinates of polylines
at relatively sparse z slices and time intervals, interpolate to generate
a time series of mesh grids (N points along the polyline at N_CROSS
differnt z planes).
The input files are a series of txt files that each contains a list
of ordered x,y coordinates. The file name of each txt file indicates
the z slice and time frame.
First, it reads in the series of x,y coordinates, interpolate equal
distance points along the spline fitted line for plotting on the
corresponding z plane and time point.
Second, it takes the ordered set of x,y,z points along the polyline path,
spline and interpolate again to get a dense enough mesh grid points
at each time point.
Third, it interpolate along the time line to make sure every time point
has a mesh grid representing the bud surface.
Finally, all data will be saved into a pandas DataFrame, which is returned.
"""
fileList = glob.glob(inputFolder+'*.txt')
fileList.sort()
xyData, tzData = [], []
for f in fileList:
xyData.append(pd.read_table(f, header = None))
f = f.split('/')[-1]
f = f.split('.')[0]
tzData.append([int(s) for s in f.split('-') if s.isdigit()])
tzData = np.array(tzData)
# print(xyData)
# print(tzData)
# interpolate along each path to position crosslines
pathData = []
for i in range(len(xyData)):
temp = xyData[i].values
x, y = temp[:,0], temp[:,1]
try:
x_int, y_int = splinePath(x, y, N)
except: # exception will rise when there are only 2 or 3 points
x_int, y_int = discretizePath(x, y, N)
# The following is time series in frame number
tempT = np.ones_like(x_int) * tzData[i,0]
# The following is z slice in microns
tempZ = np.ones_like(x_int) * tzData[i,1] * VOXEL_DEPTH
# Record the point position along path
pathPos = np.arange(N)
tempDF = pd.DataFrame([x_int, y_int, tempZ, tempT, pathPos])
tempDF = tempDF.transpose()
pathData.append(tempDF)
# concat all data into one big pandas data frame
pathDF = pd.concat(pathData, ignore_index=True)
pathDF.columns = ['x', 'y', 'z', 't', 'pathPos']
# # select a subset of the data
# pathDF = pathDF.loc[pathDF.z >= 9*VOXEL_DEPTH]
# pathDF = pathDF.loc[pathDF.z <= 38*VOXEL_DEPTH]
# pathDF = pathDF.loc[pathDF.t >= 131]
# interpolate along crosslines to get meshes for existing time points
meshData = []
for t in pathDF.t.unique():
pathDF_t = pathDF.loc[pathDF.t == t]
for i in pathDF_t.pathPos.unique():
pathDF_ti = pathDF_t.loc[pathDF_t.pathPos == i]
temp = pathDF_ti.sort_values('z', ascending=True)
x, y, z = temp.x, temp.y, temp.z
# no exception handling since z length should always be more than 3
x_int, y_int, z_int = splinePath(x, y, N_CROSS, z)
# x_int, y_int, z_int = discretizePath(x, y, N_CROSS, z) # linear
tempT = np.ones_like(x_int) * t
tempPos = np.ones_like(x_int) * i
tempPosZ = np.arange(len(z_int))
tempDF = pd.DataFrame([x_int, y_int, z_int, tempT, tempPos, tempPosZ])
tempDF = tempDF.transpose()
meshData.append(tempDF)
meshDF = pd.concat(meshData, ignore_index=True)
meshDF.columns = ['x', 'y', 'z', 't', 'pathPos', 'zPos']
# interpolate the mesh data for every time frame
tMeshData = []
for zPos in meshDF.zPos.unique():
meshDF_zPos = meshDF.loc[meshDF.zPos == zPos]
for i in meshDF_zPos.pathPos.unique():
temp = meshDF_zPos.loc[meshDF_zPos.pathPos == i]
temp = temp.sort_values('t', ascending=True)
x, y, t = temp.x, temp.y, temp.t
tN = t.max() - t.min() + 1
# x_int, y_int, t_int = discretizePath(x, y, tN, t)
try:
x_int, y_int, t_int = splinePath(x, y, tN, t)
except:
x_int, y_int, t_int = discretizePath(x, y, tN, t)
tempPosZ = np.ones_like(x_int) * zPos
z = temp.z.unique()[0]
tempZ = np.ones_like(x_int) * z
tempPos = np.ones_like(x_int) * i
tempDF = pd.DataFrame([x_int, y_int, tempZ, t_int, tempPos, tempPosZ])
tempDF = tempDF.transpose()
tMeshData.append(tempDF)
tMeshDF = pd.concat(tMeshData, ignore_index=True)
tMeshDF.columns = ['x', 'y', 'z', 't', 'pathPos', 'zPos']
return tMeshDF
def get_polyline_len(spotCoors):
'''
Calculate the polyline length along an ordered set of spot coordinates
Inputs
------
spotCoors:
A list of spot coodinates.
Each item is a 3-item list or array [x, y, z]
Returns
-------
Cumulative distance of all segments
'''
# When only 1 splot is given
if len(spotCoors) == 1:
return 0
dist = 0
for i in range(len(spotCoors)-1):
dist += euclidean(spotCoors[i], spotCoors[i+1])
return dist
def get_mean_speed(spotCoors, time_interval=5):
'''
Calculate mean speed of an ordered sequence of spot coordinates.
The time intervals between adjacent spots are constant.
Inputs
------
spotCoors:
A list of spot coodinates.
Each item is a 3-item list or array [x, y, z]
time_interval:
Duration between adjacent spots. In minutes. Default is 5
Returns
-------
Mean speed along the poly line specified by spotCoors
'''
# When only 1 splot is given
if len(spotCoors) == 1:
return 0
polyline_len = get_polyline_len(spotCoors)
# total time in hours
total_time = time_interval * ( len(spotCoors)-1 ) / 60
mean_speed = polyline_len / total_time
return mean_speed
def get_mean_speed_track(df, n_rolling=12, time_interval=5):
'''
Calculate the mean track speed from a data frame of the entire spot series
Inputs
------
df:
A Pandas data frame of all spot series in a track
n_rolling:
The number of track segments to average for a more smooth speed calculation
time_interval:
Duration between adjacent spots. In minutes. Default is 5
Returns
-------
Mean speed at each point (spot) along the track
'''
# # sort values by t in case it was not sorted
# df.sort_values('t', ascending=True, inplace=True)
# construct a list of all spot coordinates in time order
spotCoors = []
n_spots = len(df)
for i in range(n_spots):
spotCoor = [df.x[i], df.y[i], df.z[i]]
spotCoors.append(spotCoor)
n_before = int( np.floor(n_rolling/2) )
n_after = n_rolling - n_before
# For special cases when a track is even shorter than specified n_rolling
if n_rolling + 1 >= n_spots:
speed_temp = get_mean_speed(spotCoors, time_interval)
speed_lst = [speed_temp] * n_spots
return speed_lst
# For general cases when a track is longer than n_rolling
speed_lst=[]
# For the beginning spots
for i in range(n_before+1):
spotCoors_local = spotCoors[:(i+n_after+1)]
speed_temp = get_mean_speed(spotCoors_local, time_interval)
speed_lst.append(speed_temp)
# For the middle spots
for i in range(n_before+1, n_spots-n_after):
spotCoors_local = spotCoors[(i-n_before):(i+n_after+1)]
speed_temp = get_mean_speed(spotCoors_local, time_interval)
speed_lst.append(speed_temp)
# For the ending spots
for i in range(n_spots-n_after, n_spots):
spotCoors_local = spotCoors[(i-n_before):]
speed_temp = get_mean_speed(spotCoors_local, time_interval)
speed_lst.append(speed_temp)
return speed_lst
def normalize_time(df,
ref='pre_anaphase_onset_n_frames',
time_interval=5):
'''
Normalize the time so that it starts from the anaphase onset
Inputs
------
df:
A Pandas data frame of all spot series in a track
ref:
The column storing the number of frames before anaphase onset,
or some other meaningful number to serve as the reference point
time_interval:
Duration between adjacent spots. In minutes. Default is 5
Returns
-------
A Pandas data frame with added column 't_normed'
'''
# Return the df as is if 't_normed' is already there
if 't_normed' in df.columns:
print('There is already a \'t_normed\' column!')
return df
track_list = df.track_id.unique()
df_list = []
for track in track_list:
df_temp = df[df.track_id==track]
df_temp.sort_values('t', ascending=True, inplace=True)
df_temp.reset_index(inplace=True, drop=True)
# Note: the frame when two daughter cell chromatin separated
# is considered at time 0
#
# Frame number prior to anaphase onset
n_pre = df_temp[ref].values[0]
frames = df_temp.t.values
start_frame = df_temp.t.min()
t_normed = [5*(i-n_pre-start_frame) for i in frames]
df_temp['t_normed'] = t_normed
df_list.append(df_temp)
df_all = pd.concat(df_list, ignore_index=True)
return df_all
def get_cell_division_tracking_df(f_tracking=None, f_return_time=None, f_combined=None):
'''
Data wrangling to combine the tracking and return time data into a single data frame
Inputs
------
f_tracking:
Path to the csv file storing TrackMate exported long-term manual tracking data
f_return_time:
Path to the csv file storing manually annoated daughter cell returning time from
the anaphase onset to surface. Numbers are in minutes.
f_combined:
Path to the csv file storing combined tracking data.
If exists, read directly from it.
Otherwise, construct from f_tracking and f_return_time and save to csv.
Returns
-------
df_cell_division_tracking:
Pandas DataFrame of the combined tracking data
'''
data_folder = '../data/cell-division-tracking-data/'
if f_combined is None:
f_combined = data_folder + '180218-mTmGHisG-2photon-cell-division-tracking.csv'
if os.path.isfile(f_combined):
print('A combined csv file exists! Data frame will be directly read from this csv file.')
df_cell_division_tracking = pd.read_csv(f_combined)
return df_cell_division_tracking
if f_tracking is None:
f_tracking = data_folder + '180218-mTmGHisG-ROI1-track-info-with-cell-division-id.csv'
if f_return_time is None:
f_return_time = data_folder + '180218-mTmGHisG-2photon-cell-division-returning-time.csv'
# Read in the TrackMate exported long-term manual tracking data of cell divisions for plotting
df = pd.read_csv(f_tracking)
df.sort_values(by=['cell_division_id', 'track_id', 't'], inplace=True)
df.reset_index(inplace=True, drop=True)
# Drop the daughter cells whose tracks were incomplete (goes beyond the FOV or time series)
cell_division_id_to_drop = ['incomplete1A', 'incomplete1B']
df = df[~df.cell_division_id.isin(cell_division_id_to_drop)]
# Remove the A, B postfix to consolidate cell_division id
df.cell_division_id = [i[:-1] for i in df.cell_division_id]
# Make the cell_division_id to be integer type
df.cell_division_id = df.cell_division_id.astype('int64')
# print('The row and column numbers of tracking data:', df.shape)
# In manual tracking using Trackmate, one of the daughter cells begin from the separation of
# the two daughter cells (anaphase onset).
#
# To plot the tracks that reflect the overlapping origin of both daughter cells, append
# the surface to anaphase onset part of tracks from the matching daughter cell
#
# In addition, record the duration of surface cell from leaving the surface to ananphase onset
pre_anaphase_onset_n_frames = []
track1_post_anaphase_n_frames = []
track2_post_anaphase_n_frames = []
for cell_division in df.cell_division_id.unique():
# for cell_division in [20]:# for testing
df_temp = df[df.cell_division_id==cell_division]
# print(df_temp.shape)
assert df_temp.track_id.nunique() == 2
track1_id = df_temp.track_id.unique()[0]
track2_id = df_temp.track_id.unique()[1]
track1 = df_temp[df_temp.track_id==track1_id]
track2 = df_temp[df_temp.track_id==track2_id]
if track1.t.min() < track2.t.min():
to_add = track1[track1.t < track2.t.min()]
assert to_add.shape[0] > 0
to_add.loc[:,'track_id'] = [ df_temp.track_id.unique()[1] ] * to_add.shape[0]
if len(track1)-len(to_add) < len(track2):
track1_post_anaphase_n_frames.append([track1_id,
len(track1)-len(to_add),
str(cell_division)+'_faster'])
track2_post_anaphase_n_frames.append([track2_id,
len(track2),
str(cell_division)+'_slower'])
else:
track1_post_anaphase_n_frames.append([track1_id,
len(track1)-len(to_add),
str(cell_division)+'_slower'])
track2_post_anaphase_n_frames.append([track2_id,
len(track2),
str(cell_division)+'_faster'])
else:
to_add = track2[track2.t < track1.t.min()]
assert to_add.shape[0] > 0
to_add.loc[:,'track_id'] = [ df_temp.track_id.unique()[0] ] * to_add.shape[0]
if len(track1) < len(track2)-len(to_add):
track1_post_anaphase_n_frames.append([track1_id,
len(track1),
str(cell_division)+'_faster'])
track2_post_anaphase_n_frames.append([track2_id,
len(track2)-len(to_add),
str(cell_division)+'_slower'])
else:
track1_post_anaphase_n_frames.append([track1_id,
len(track1),
str(cell_division)+'_slower'])
track2_post_anaphase_n_frames.append([track2_id,
len(track2)-len(to_add),
str(cell_division)+'_faster'])
df = pd.concat([df, to_add])
df.reset_index(inplace=True, drop=True)
pre_anaphase_onset_n_frames.append([cell_division, len(to_add)])
df.sort_values(by=['cell_division_id', 'track_id', 't'], inplace=True)
df.reset_index(inplace=True, drop=True)
# print(df.shape)
# Annotate the pre_anaphase_onset_n_frames data
df_pre_anaphase_onset_n_frames = pd.DataFrame(pre_anaphase_onset_n_frames,
columns=['cell_division_id',
'pre_anaphase_onset_n_frames'])
# Annotate the post_anaphase_n_frames data
df_track1 = pd.DataFrame(track1_post_anaphase_n_frames,
columns=['track_id',
'post_anaphase_n_frames',
'track_id_slower_or_faster'])
df_track2 = pd.DataFrame(track2_post_anaphase_n_frames,
columns=['track_id',
'post_anaphase_n_frames',
'track_id_slower_or_faster'])
df_track_post_anaphase_n_frames = pd.concat([df_track1, df_track2])
df_track_post_anaphase_n_frames.reset_index(inplace=True, drop=True)
# df_track_post_anaphase_n_frames.head()
df_cell_division = df.merge(df_pre_anaphase_onset_n_frames, on='cell_division_id')
df_cell_division = df_cell_division.merge(df_track_post_anaphase_n_frames, on='track_id')
df_cell_division.head()
# Read in manually annotated return time data
df = pd.read_csv(f_return_time, header = 0, sep=',')
# Construct a new track_id to match the merged data frame
df['track_id_slower_or_faster'] = [str(df.cell_division_id[i])+'_'+df.faster_or_slower_daughter[i]
for i in range(len(df))]
df_anaphase_to_surface_return = df.loc[:, ['track_id_slower_or_faster', 'anaphase_to_surface_return']]
df_anaphase_to_surface_return.head()
df_cell_division = df_cell_division.merge(df_anaphase_to_surface_return, on='track_id_slower_or_faster')
df_cell_division.head()
df_cell_division.to_csv(f_combined, index=False)
return df_cell_division
def get_track_to_surf_dist(track_spot_surf_dist_file):
'''
Compute the mean track-to-surface distance for each track
Inputs
------
track_spot_surf_dist_file:
Path to csv file of spot-do-surface distance data
Returns
-------
df_cell_division_tracking:
Pandas DataFrame of the mean track-to-surface distance (averaged over all spots)
'''
# Read in data of all spot to surface distance
df = pd.read_csv(track_spot_surf_dist_file, skiprows=3)
df.rename(columns={'Shortest Distance to Surfaces':'Distance'}, inplace=True)
# get mean distance for each track
track_mean_dist_surf = []
for track_id in df.TrackID.unique():
df_temp = df[df.TrackID == track_id]
track_mean_dist_surf.append([track_id, df_temp.Distance.mean(), df_temp.Distance.min(), df_temp.Distance.max()])
df_epi_track_mean_dist_surf = pd.DataFrame(track_mean_dist_surf,
columns=['ID', 'mean_dist_surf', 'min_dist_surf', 'max_dist_surf'])
return df_epi_track_mean_dist_surf
def combine_speed_and_surf_dist(track_mean_speed_file, track_spot_surf_dist_file):
'''
combine the mean track speed and mean track-to-surface distance data frames;
Inputs
------
track_mean_speed_file:
Path to csv file of mean track speed data
track_spot_surf_dist_file:
Path to csv file of spot-do-surface distance data
Returns
-------
df_merged:
Pandas DataFrame of the mean track-to-surface distance (averaged over all spots)
'''
# Read in data of mean track speed for all epithelial tracks
df_epi_mean_speed = pd.read_csv(track_mean_speed_file, skiprows=3)
df_epi_mean_speed.rename(columns={'Track Speed Mean':'Mean_Track_Speed'}, inplace=True)
# Get mean track-to-surface distance
df_epi_track_dist_surf = get_track_to_surf_dist(track_spot_surf_dist_file)
# Combine data
df_merged = df_epi_mean_speed.merge(df_epi_track_dist_surf, on='ID')
return df_merged