Skip to content

Commit

Permalink
TST: fix ton of warnings (#627)
Browse files Browse the repository at this point in the history
  • Loading branch information
bifbof authored Jun 30, 2024
1 parent 983d27e commit a1b5e46
Show file tree
Hide file tree
Showing 16 changed files with 62 additions and 58 deletions.
5 changes: 3 additions & 2 deletions tests/analysis/test_tracking_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,12 @@ def test_tracking_quality_user_error(self, testdata_sp_tpls_geolife_long):
"""Test if the an error is raised when passing unknown 'granularity' to _get_tracking_quality_user()."""
sp_tpls = testdata_sp_tpls_geolife_long
user_0 = sp_tpls.loc[sp_tpls["user_id"] == 0]
start_date = sp_tpls["started_at"].min().floor(freq="D")

with pytest.raises(ValueError):
ti.analysis.tracking_quality._get_tracking_quality_user(user_0, granularity=12345)
ti.analysis.tracking_quality._get_tracking_quality_user(user_0, start_date, granularity=12345)
with pytest.raises(ValueError):
ti.analysis.tracking_quality._get_tracking_quality_user(user_0, granularity="random")
ti.analysis.tracking_quality._get_tracking_quality_user(user_0, start_date, granularity="random")

def test_staypoints_accessors(self, testdata_all_geolife_long):
"""Test tracking_quality calculation from staypoints accessor."""
Expand Down
2 changes: 1 addition & 1 deletion tests/data/geolife_long/sp_tpls.csv
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ id,started_at,finished_at,user_id,type,is_activity,trip_id,prev_trip_id,next_tri
20,2008-10-24 04:12:55+00:00,2008-10-24 05:28:05+00:00,1,staypoint,True,,11,12,2008-10-24 05:28:05+00:00,False,False
20,2008-10-24 05:28:05+00:00,2008-10-24 05:39:50+00:00,1,tripleg,False,12,,,2008-10-24 05:39:53+00:00,True,False
21,2008-10-24 05:39:53+00:00,2008-10-24 06:08:42+00:00,1,staypoint,True,,12,13,2008-10-24 06:08:42+00:00,False,False
21,2008-10-24 06:08:42+00:00,2008-10-24 06:35:50+00:00,1,tripleg,False,13,,,,,False
21,2008-10-24 06:08:42+00:00,2008-10-24 06:35:50+00:00,1,tripleg,False,13,,,,False,False
2 changes: 1 addition & 1 deletion tests/data/trips/sp_tpls_gaps.csv
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ id,started_at,finished_at,user_id,type,is_activity,trip_id,prev_trip_id,next_tri
70,2010-01-13 20:40:00,2010-01-14 00:44:00,1,staypoint,True,,8.0,,2010-01-15 20:39:00,False,True
126,2010-01-15 20:39:00,2010-01-15 20:40:00,1,tripleg,False,9.0,,,2010-01-15 20:44:00,False,False
127,2010-01-15 20:44:00,2010-01-15 20:50:00,1,tripleg,False,9.0,,,2010-01-17 20:39:00,False,True
128,2010-01-17 20:39:00,2010-01-17 20:40:00,1,tripleg,False,10.0,,,,,False
128,2010-01-17 20:39:00,2010-01-17 20:40:00,1,tripleg,False,10.0,,,,False,False
1 change: 0 additions & 1 deletion tests/geogr/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def test_known_euclidean_distance(self, two_pfs):
pfs0, euc00, pfs1, euc01 = two_pfs
res00 = calculate_distance_matrix(X=pfs0, dist_metric="euclidean")
res01 = calculate_distance_matrix(X=pfs0, Y=pfs1, dist_metric="euclidean")
print(res00)
assert np.all(euc00 == res00)
assert np.all(euc01 == res01)

Expand Down
4 changes: 1 addition & 3 deletions tests/geogr/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ def locs_from_geolife():
method="dbscan", epsilon=10, num_samples=1, distance_metric="haversine", agg_level="dataset"
)

# the projection needs to be defined: WGS84
locs.crs = "epsg:4326"
return locs


Expand Down Expand Up @@ -79,7 +77,7 @@ def test_filter_triplegs(self):
def test_filter_locations(self, locs_from_geolife):
"""Test if spatial_filter works for locations."""
locs = locs_from_geolife
extent = gpd.read_file(os.path.join("tests", "data", "area", "tsinghua.geojson"), crs="epsg:4326")
extent = gpd.read_file(os.path.join("tests", "data", "area", "tsinghua.geojson"))

# filter locations with the area
within_loc = locs.spatial_filter(areas=extent, method="within", re_project=True)
Expand Down
4 changes: 2 additions & 2 deletions tests/io/test_from_geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ def test_setting_geometry(self, example_positionfixes):
def test_set_crs(self, example_positionfixes):
"""Test if crs will be set."""
pfs = example_positionfixes.copy()
example_positionfixes.crs = "EPSG:2056"
example_positionfixes = example_positionfixes.set_crs("EPSG:2056", allow_override=True)
# check if the crs is correctly set
pfs.crs = None
pfs = pfs.set_crs(None, allow_override=True)
pfs = _trackintel_model(pfs, crs="EPSG:2056")
assert_geodataframe_equal(example_positionfixes, pfs)

Expand Down
10 changes: 5 additions & 5 deletions tests/io/test_postgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_no_crs(self, example_positionfixes, conn_postgis):
table = "positionfixes"
sql = f"SELECT * FROM {table}"
geom_col = pfs.geometry.name
pfs.crs = None
pfs = pfs.set_crs(None, allow_override=True)
no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS."
try:
with pytest.warns(UserWarning, match=no_crs_warning):
Expand Down Expand Up @@ -375,7 +375,7 @@ def test_no_crs(self, example_triplegs, conn_postgis):
table = "triplegs"
sql = f"SELECT * FROM {table}"
geom_col = tpls.geometry.name
tpls.crs = None
tpls = tpls.set_crs(None, allow_override=True)

no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS."
try:
Expand Down Expand Up @@ -446,7 +446,7 @@ def test_no_crs(self, example_staypoints, conn_postgis):
table = "staypoints"
sql = f"SELECT * FROM {table}"
geom_col = example_staypoints.geometry.name
sp.crs = None
sp = sp.set_crs(None, allow_override=True)

no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS."
try:
Expand Down Expand Up @@ -500,7 +500,7 @@ def test_no_crs(self, example_locations, conn_postgis):
table = "locations"
sql = f"SELECT * FROM {table}"
geom_col = locs.geometry.name
locs.crs = None
locs = locs.set_crs(None, allow_override=True)

no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS."
try:
Expand Down Expand Up @@ -731,7 +731,7 @@ class TestGetSrid:
def test_srid(self, example_positionfixes):
"""Test if `_get_srid` returns the correct srid."""
gdf = example_positionfixes.copy()
gdf.crs = None
gdf = gdf.set_crs(None, allow_override=True)
assert _get_srid(gdf) == -1
srid = 3857
gdf.set_crs(f"epsg:{srid}", inplace=True)
Expand Down
9 changes: 4 additions & 5 deletions tests/preprocessing/test_positionfixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ def example_positionfixes_isolated():
{"user_id": 2, "tracked_at": t2, "geometry": p2, "staypoint_id": pd.NA},
{"user_id": 2, "tracked_at": t4, "geometry": p3, "staypoint_id": 5},
]
pfs = gpd.GeoDataFrame(data=list_dict, geometry="geometry", crs="EPSG:4326")
pfs = ti.Positionfixes(data=list_dict, geometry="geometry", crs="EPSG:4326")
pfs["staypoint_id"] = pfs["staypoint_id"].astype("Int64")
pfs.index.name = "id"

return ti.Positionfixes(pfs)
return pfs


class TestGenerate_staypoints:
Expand Down Expand Up @@ -201,9 +202,7 @@ def test_include_last(self):
"""Test if the include_last arguement will include the last pfs as stp."""
pfs, _ = ti.io.dataset_reader.read_geolife(os.path.join("tests", "data", "geolife"))

pfs_wo, sp_wo = pfs.generate_staypoints(
method="sliding", dist_threshold=100, time_threshold=5.0, include_last=False
)
pfs_wo, sp_wo = pfs.generate_staypoints()
pfs_include, sp_include = pfs.generate_staypoints(
method="sliding", dist_threshold=100, time_threshold=5.0, include_last=True
)
Expand Down
2 changes: 1 addition & 1 deletion tests/preprocessing/test_staypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def test_dbscan_hav_euc(self):
method="dbscan", epsilon=100, num_samples=1, distance_metric="haversine", agg_level="dataset"
)
# WGS_1984
sp.crs = "epsg:4326"
sp = sp.set_crs("epsg:4326", allow_override=True)
# WGS_1984_UTM_Zone_49N
sp = sp.to_crs("epsg:32649")

Expand Down
20 changes: 10 additions & 10 deletions tests/preprocessing/test_triplegs.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,27 +387,27 @@ def test_crs(self, example_triplegs):
"""Test that the resulting GeoDataFrame has the correct crs or a warning or error is thrown if not set"""
sp, tpls = example_triplegs
# Case 1: sp crs None --> throw warning and set to tpls crs
sp.crs = None
sp = sp.set_crs(None, allow_override=True)
with pytest.warns(UserWarning):
_, _, trips = generate_trips(sp, tpls)
assert trips.crs == tpls.crs
# Case 2: Both crs None --> warn and set to None
tpls.crs = None
tpls = tpls.set_crs(None, allow_override=True)
with pytest.warns(UserWarning):
_, _, trips = generate_trips(sp, tpls)
assert trips.crs is None
# Case 3: tpls crs is None --> throw warning and set to sp crs
sp.crs = "EPSG:4326"
sp = sp.set_crs("EPSG:4326", allow_override=True)
with pytest.warns(UserWarning):
_, _, trips = generate_trips(sp, tpls)
assert trips.crs == "EPSG:4326"
# Case 4: Both crs set and correspond
tpls.crs = "EPSG:2056"
sp.crs = "EPSG:2056"
tpls = tpls.set_crs("EPSG:2056", allow_override=True)
sp = sp.set_crs("EPSG:2056", allow_override=True)
_, _, trips = generate_trips(sp, tpls)
assert trips.crs == "EPSG:2056"
# Case 5: Both crs set but differ --> throw error
sp.crs = "EPSG:4326"
sp = sp.set_crs("EPSG:4326", allow_override=True)
error_msg = "CRS of staypoints and triplegs differ. Geometry cannot be joined safely."
with pytest.raises(AssertionError, match=error_msg):
generate_trips(sp, tpls)
Expand All @@ -432,10 +432,9 @@ def _create_debug_sp_tpls_data(sp, tpls, gap_threshold):
sp_tpls["is_activity"] = sp_tpls["is_activity"].__eq__(True)
sp_tpls.sort_values(by=["user_id", "started_at"], inplace=True)
sp_tpls["started_at_next"] = sp_tpls["started_at"].shift(-1)
sp_tpls["activity_next"] = sp_tpls["is_activity"].shift(-1)
sp_tpls["activity_next"] = sp_tpls["is_activity"].shift(-1, fill_value=False)

sp_tpls["gap"] = (sp_tpls["started_at_next"] - sp_tpls["finished_at"]).dt.seconds / 60 > gap_threshold

return sp_tpls


Expand Down Expand Up @@ -519,16 +518,17 @@ def _generate_trips_old(sp_input, tpls_input, gap_threshold=15, print_progress=F
sp_tpls["started_at_next"] = sp_tpls["started_at"].shift(-1)
sp_tpls["is_activity_next"] = sp_tpls["is_activity"].shift(-1)

cols = ["started_at", "finished_at", "user_id", "type", "is_activity", "id", "started_at_next", "is_activity_next"]
if print_progress:
tqdm.pandas(desc="User trip generation")
trips = (
sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)
sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)[cols]
.progress_apply(_generate_trips_user, gap_threshold=gap_threshold)
.reset_index(drop=True)
)
else:
trips = (
sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)
sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)[cols]
.apply(_generate_trips_user, gap_threshold=gap_threshold)
.reset_index(drop=True)
)
Expand Down
4 changes: 2 additions & 2 deletions trackintel/analysis/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def radius_gyration(sp, method="count", print_progress=False):

if print_progress:
tqdm.pandas(desc="User radius of gyration calculation")
s = sp.groupby("user_id").progress_apply(_radius_gyration_user, method=method)
s = sp.groupby("user_id").progress_apply(_radius_gyration_user, method=method, include_groups=False)
else:
s = sp.groupby("user_id").apply(_radius_gyration_user, method=method)
s = sp.groupby("user_id").apply(_radius_gyration_user, method=method, include_groups=False)

s = s.rename("radius_gyration")
return s
Expand Down
26 changes: 15 additions & 11 deletions trackintel/analysis/tracking_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ def temporal_tracking_quality(source, granularity="all"):
return None

if granularity == "all":
quality = df.groupby("user_id", as_index=False).apply(_get_tracking_quality_user, granularity)
quality = df.groupby("user_id", as_index=False).apply(
_get_tracking_quality_user, granularity, include_groups=False
)
return quality

# split records that span several days
Expand All @@ -90,19 +92,11 @@ def temporal_tracking_quality(source, granularity="all"):
column_name = "week_monday"

elif granularity == "weekday":
# get the tracked week relative to the first day
start_date = df["started_at"].min().floor(freq="D")
df["week"] = ((df["started_at"] - start_date)).dt.days // 7

grouper = df["started_at"].dt.weekday
column_name = "weekday"

elif granularity == "hour":
df = _split_overlaps(df, granularity="hour")
# get the tracked day relative to the first day
start_date = df["started_at"].min().floor(freq="D")
df["day"] = (df["started_at"] - start_date).dt.days

grouper = df["started_at"].dt.hour
column_name = "hour"

Expand All @@ -111,8 +105,13 @@ def temporal_tracking_quality(source, granularity="all"):
f"granularity unknown. We only support ['all', 'day', 'week', 'weekday', 'hour']. You passed {granularity}"
)

start_date = df["started_at"].min().floor(freq="D")
# calculate per-user per-grouper tracking quality
quality = df.groupby(["user_id", grouper]).apply(_get_tracking_quality_user, granularity).reset_index()
quality = (
df.groupby(["user_id", grouper])[["started_at", "finished_at"]]
.apply(_get_tracking_quality_user, start_date, granularity)
.reset_index()
)

# rename and reorder
quality.rename(columns={"started_at": column_name}, inplace=True)
Expand All @@ -121,7 +120,7 @@ def temporal_tracking_quality(source, granularity="all"):
return quality


def _get_tracking_quality_user(df, granularity="all"):
def _get_tracking_quality_user(df, start_date, granularity="all"):
"""
Tracking quality per-user per-granularity.
Expand All @@ -130,6 +129,9 @@ def _get_tracking_quality_user(df, granularity="all"):
df : Trackintel class
The source dataframe
start_date: pd.Timestamp
When measurement started, used to calculate in which weekday or week the measurement lies.
granularity : {"all", "day", "weekday", "week", "hour"}, default "all"
Determines the extent of the tracking. "all" the entire tracking period,
"day" and "weekday" a whole day, "week" a whole week, and "hour" a whole hour.
Expand All @@ -149,13 +151,15 @@ def _get_tracking_quality_user(df, granularity="all"):
elif granularity == "weekday":
# total seconds in an day * number of tracked weeks
# (entries from multiple weeks may be grouped together)
df["week"] = ((df["started_at"] - start_date)).dt.days // 7
extent = 60 * 60 * 24 * (df["week"].max() - df["week"].min() + 1)
elif granularity == "week":
# total seconds in a week
extent = 60 * 60 * 24 * 7
elif granularity == "hour":
# total seconds in an hour * number of tracked days
# (entries from multiple days may be grouped together)
df["day"] = (df["started_at"] - start_date).dt.days
extent = (60 * 60) * (df["day"].max() - df["day"].min() + 1)
else:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion trackintel/geogr/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def get_speed_triplegs(triplegs, positionfixes=None, method="tpls_speed"):
if "tripleg_id" not in positionfixes:
raise AttributeError('Positionfixes must include column "tripleg_id".')
# group positionfixes by triplegs and compute average speed for each collection of positionfixes
grouped_pfs = positionfixes.groupby("tripleg_id").apply(_single_tripleg_mean_speed)
grouped_pfs = positionfixes.groupby("tripleg_id").apply(_single_tripleg_mean_speed, include_groups=False)
# add the speed values to the triplegs column
tpls = pd.merge(triplegs, grouped_pfs.rename("speed"), how="left", left_index=True, right_index=True)
tpls.index = tpls.index.astype("int64")
Expand Down
6 changes: 3 additions & 3 deletions trackintel/geogr/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ def spatial_filter(source, areas, method="within", re_project=False):

# get final result
if method == "within":
ret_gdf = possible_matches.loc[possible_matches.within(areas.unary_union)]
ret_gdf = possible_matches.loc[possible_matches.within(areas.union_all())]
elif method == "intersects":
ret_gdf = possible_matches.loc[possible_matches.intersects(areas.unary_union)]
ret_gdf = possible_matches.loc[possible_matches.intersects(areas.union_all())]
elif method == "crosses":
ret_gdf = possible_matches.loc[possible_matches.crosses(areas.unary_union)]
ret_gdf = possible_matches.loc[possible_matches.crosses(areas.union_all())]
else:
raise ValueError("method unknown. We only support ['within', 'intersects', 'crosses']. " f"You passed {method}")

Expand Down
8 changes: 4 additions & 4 deletions trackintel/preprocessing/positionfixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def generate_triplegs(

# initialize the index list of pfs where a tpl will begin
insert_index_ls = []
pfs["staypoint_id"] = pd.NA
pfs["staypoint_id"] = pd.Series(dtype="Int64")

for user_id_this in pfs["user_id"].unique():
sp_user = staypoints[staypoints["user_id"] == user_id_this]
Expand Down Expand Up @@ -282,7 +282,7 @@ def generate_triplegs(

# initialize tripleg_id with pd.NA and fill all pfs that belong to staypoints with -1
# pd.NA will be replaced later with tripleg ids
pfs["tripleg_id"] = pd.NA
pfs["tripleg_id"] = pd.Series(dtype="Int64")
pfs.loc[~pd.isna(pfs["staypoint_id"]), "tripleg_id"] = -1

# get all conditions that trigger a new tripleg.
Expand Down Expand Up @@ -437,7 +437,7 @@ def _generate_triplegs_overlap_staypoints(cond_temporal_gap, pfs, staypoints):

# spatial overlap: overlap tripleg with the location of previous and next staypoint
# geometry: tpl's share common start and end pfs with sp
cond_overlap_end = cond_overlap & ~cond_temporal_gap.shift(-1).fillna(False) & pd.isna(pfs["tripleg_id"])
cond_overlap_end = cond_overlap & ~cond_temporal_gap.shift(-1, fill_value=False) & pd.isna(pfs["tripleg_id"])
pfs.loc[cond_overlap_end, "tripleg_id"] = between_tpls_ids.shift(-1)[cond_overlap_end]
cond_empty = pd.isna(pfs["tripleg_id"])
pfs.loc[cond_empty, "tripleg_id"] = between_tpls_ids[cond_empty]
Expand Down Expand Up @@ -524,7 +524,7 @@ def __create_new_staypoints(start, end, pfs, elevation_flag, geo_col, last_flag=
# if end is the last pfs, we want to include the info from it as well
if last_flag:
end = len(pfs)
points = pfs[geo_col].iloc[start:end].unary_union
points = pfs[geo_col].iloc[start:end].union_all()
if check_gdf_planar(pfs):
new_sp[geo_col] = points.centroid
else:
Expand Down
15 changes: 9 additions & 6 deletions trackintel/preprocessing/triplegs.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,13 @@ def _seperate_ids(row):
user_change[["type", "is_activity"]] = ["user_change", True] # nicer for debugging

# merge trips with (filler) activities
trips.drop(columns=["type", "sp_tpls_id"], inplace=True) # make space so no overlap with activity "sp_tpls_id"

# make space so no overlap with activity "sp_tpls_id"
trips.drop(columns=["type", "sp_tpls_id"], inplace=True)

# trips are no activity (with this we don't have to fillna later)
trips["is_activity"] = False

# Inserting `gaps` and `user_change` into the dataframe creates buffers that catch shifted
# "staypoint_id" and "trip_id" from corrupting staypoints/trips.
trips_with_act = pd.concat((trips, sp_tpls_only_act, gaps, user_change), axis=0, ignore_index=True)
Expand All @@ -153,8 +159,6 @@ def _seperate_ids(row):
trips_with_act["prev_trip_id"] = trips_with_act["trip_id"].shift(1)
trips_with_act["next_trip_id"] = trips_with_act["trip_id"].shift(-1)

# transform column to binary
trips_with_act["is_activity"] = trips_with_act["is_activity"].fillna(False)
# delete activities
trips = trips_with_act[~trips_with_act["is_activity"]].copy()

Expand Down Expand Up @@ -268,9 +272,8 @@ def _concat_staypoints_triplegs(staypoints, triplegs, add_geometry):
sp["type"] = "staypoint"

# create table with relevant information from triplegs and staypoints.
sp_cols = ["started_at", "finished_at", "user_id", "type", "is_activity"]
tpls_cols = ["started_at", "finished_at", "user_id", "type"]
sp_tpls = pd.concat([sp[sp_cols], tpls[tpls_cols]])
cols = ["started_at", "finished_at", "user_id", "type", "is_activity"]
sp_tpls = pd.concat([sp[cols], tpls[cols]])
sp_tpls["is_activity"] = sp_tpls["is_activity"].fillna(False)
sp_tpls["sp_tpls_id"] = sp_tpls.index # store id for later reassignment
if add_geometry:
Expand Down

0 comments on commit a1b5e46

Please sign in to comment.