From 2d95df72d34df6c561e0d5b2235809f05b4b1ddc Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Thu, 26 Apr 2018 14:19:38 +0200 Subject: [PATCH 1/9] randomize: fix random_solution --- trackml/randomize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trackml/randomize.py b/trackml/randomize.py index 07c2d0d..43d3d62 100644 --- a/trackml/randomize.py +++ b/trackml/randomize.py @@ -24,7 +24,7 @@ def set_seed(seed): numpy.random.seed(seed) def random_solution(truth, ntracks): - """Generate a completely random solution with the given number of particles. + """Generate a completely random solution with the given number of tracks. Parameters ---------- @@ -33,7 +33,7 @@ def random_solution(truth, ntracks): ntracks : int Number of tracks the submission should contain. """ - ids = numpy.random.randint(1, nparticles + 1, size=len(mapping), dtype='i4') + ids = numpy.random.randint(1, ntracks + 1, size=len(truth), dtype='i4') return _make_submission(truth, ids, renumber=False) def drop_hits(truth, probability): From c143e0fac464dd1d1a9ae936917b7c544c4b3f8b Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Thu, 26 Apr 2018 14:25:26 +0200 Subject: [PATCH 2/9] randomize: clarify random_solution doc --- trackml/randomize.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/trackml/randomize.py b/trackml/randomize.py index 43d3d62..8623e64 100644 --- a/trackml/randomize.py +++ b/trackml/randomize.py @@ -23,18 +23,18 @@ def set_seed(seed): """Set the random seed used for randomness in this module.""" numpy.random.seed(seed) -def random_solution(truth, ntracks): +def random_solution(hits, ntracks): """Generate a completely random solution with the given number of tracks. Parameters ---------- - truth : pandas.DataFrame - Truth mapping must contain hit_id and particle_id columns. + hits : pandas.DataFrame + Hits information must contain hit_id column. ntracks : int Number of tracks the submission should contain. """ - ids = numpy.random.randint(1, ntracks + 1, size=len(truth), dtype='i4') - return _make_submission(truth, ids, renumber=False) + ids = numpy.random.randint(1, ntracks + 1, size=len(hits), dtype='i4') + return _make_submission(hits, ids, renumber=False) def drop_hits(truth, probability): """Drop hits from each track with a certain probability. From 24aaadcb3424113b9592099ffceb396eda3284c1 Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Thu, 26 Apr 2018 14:54:03 +0200 Subject: [PATCH 3/9] randomize: simplify internal code --- trackml/randomize.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/trackml/randomize.py b/trackml/randomize.py index 8623e64..ed176d1 100644 --- a/trackml/randomize.py +++ b/trackml/randomize.py @@ -6,12 +6,11 @@ import numpy import numpy.random -def _make_submission(mapping, track_ids, renumber=True): +def _make_submission(hit_ids, track_ids, renumber=True): """Create a submission DataFrame with hit_id and track_id columns. Optionally renumbers the track_id to random small integers. """ - hit_ids = mapping['hit_id'] if renumber: unique_ids, inverse = numpy.unique(track_ids, return_inverse=True) numbers = numpy.arange(1, len(unique_ids) + 1, dtype=unique_ids.dtype) @@ -34,7 +33,7 @@ def random_solution(hits, ntracks): Number of tracks the submission should contain. """ ids = numpy.random.randint(1, ntracks + 1, size=len(hits), dtype='i4') - return _make_submission(hits, ids, renumber=False) + return _make_submission(hits['hit_id'], ids, renumber=False) def drop_hits(truth, probability): """Drop hits from each track with a certain probability. @@ -55,7 +54,7 @@ def drop_hits(truth, probability): fakeids = numpy.arange(fakeid0, fakeid0 + dropped_count, dtype='i8') # replace masked particle ids with fakes ones numpy.place(out, dropped_mask, fakeids) - return _make_submission(truth, out) + return _make_submission(truth['hit_id'], out) def shuffle_hits(truth, probability): """Randomly assign hits to a wrong particle with a certain probability. @@ -73,4 +72,4 @@ def shuffle_hits(truth, probability): wrongparticles = numpy.random.choice(numpy.unique(out), size=shuffled_count) # replace masked particle ids with random valid ids numpy.place(out, shuffled_mask, wrongparticles) - return _make_submission(truth, out) + return _make_submission(truth['hit_id'], out) From f58f2ebbd3fee533595c0b083da107b97841e73e Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Thu, 26 Apr 2018 14:54:32 +0200 Subject: [PATCH 4/9] README: fix Kaggle link --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3bf9dad..0e9e654 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ TrackML utility library ======================= A python library to simplify working with the -[High Energy Physics Tracking Machine Learning challenge](kaggle_trackml) +[High Energy Physics Tracking Machine Learning challenge][kaggle_trackml] dataset. Installation @@ -186,7 +186,7 @@ The submission file must associate each hit in each event to one and only one reconstructed particle track. The reconstructed tracks must be uniquely identified only within each event. Participants are advised to compress the submission file (with zip, bzip2, gzip) before submission to the -[Kaggle site](kaggle_trackml). +[Kaggle site][kaggle_trackml]. * **event_id**: numerical identifier of the event; corresponds to the number found in the per-event file name prefix. From ff1b01199395bef1e1559d115d67f906643b4fa9 Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Thu, 26 Apr 2018 14:55:28 +0200 Subject: [PATCH 5/9] README: wording --- README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0e9e654..c3cc556 100644 --- a/README.md +++ b/README.md @@ -96,11 +96,10 @@ some hits can be left unassigned). The training dataset contains the recorded hits, their truth association to particles, and the initial parameters of those particles. The test dataset contains only the recorded hits. -The dataset is provided as a set of plain `.csv` files (`.csv.gz` or `.csv.bz2` -are also allowed). Each event has four associated files that contain hits, hit -cells, particles, and the ground truth association between them. The common -prefix (like `event000000000`) is fully constrained to be `event` followed by 9 -digits. +The dataset is provided as a set of plain `.csv` files. Each event has four +associated files that contain hits, hit cells, particles, and the ground truth +association between them. The common prefix, e.g. `event000000010`, is always +`event` followed by 9 digits. event000000000-hits.csv event000000000-cells.csv @@ -159,7 +158,7 @@ The particles files contains the following values for each particle/entry: coordinates. * **px, py, pz**: initial momentum (in GeV/c) along each global axis. * **q**: particle charge (as multiple of the absolute electron charge). -* **nhits**: number of hits generated by this particle +* **nhits**: number of hits generated by this particle. All entries contain the generated information or ground truth. @@ -193,7 +192,7 @@ submission file (with zip, bzip2, gzip) before submission to the * **hit_id**: numerical identifier of the hit inside the event as defined in the per-event hits file. * **track_id**: user-defined numerical identifier (non-negative integer) of - the track + the track. [cern]: https://home.cern From f6050c2c18c0e95ec00f872d12c3aad1e0e0e193 Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Fri, 27 Apr 2018 19:21:00 +0200 Subject: [PATCH 6/9] README: word fixes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c3cc556..1a9dcc0 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ a name starting with `submission`, e.g. The hits file contains the following values for each hit/entry: * **hit_id**: numerical identifier of the hit inside the event. -* **x, y, z**: measured x, y, z position (in millimeters) of the hit in +* **x, y, z**: measured x, y, z position (in millimeter) of the hit in global coordinates. * **volume_id**: numerical identifier of the detector group. * **layer_id**: numerical identifier of the detector layer inside the From ae54cdb922d1089329396e9a8097826b4556158c Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Fri, 27 Apr 2018 20:16:12 +0200 Subject: [PATCH 7/9] README: add detector files description --- README.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/README.md b/README.md index 1a9dcc0..a7cb0cd 100644 --- a/README.md +++ b/README.md @@ -194,6 +194,49 @@ submission file (with zip, bzip2, gzip) before submission to the * **track_id**: user-defined numerical identifier (non-negative integer) of the track. +### Additional detector geometry information + +The detector modules that measure particles and generated the hits are organized +into detector groups or volumes identified by a volume id. Inside a volume they +are further grouped into layers identified by a layer id. Each layer can contain +an arbitrary number of detector modules, the smallest geometrically distinct +detector object, each identified by a module_id. Within each group detector +modules are of the same type have e.g. the same granularity. All simulated +detector modules are so-called semiconductor sensors that are build from thin +silicon sensor chips. Each module can be represented by a two-dimensional, +planar, bounded sensitive surface. These sensitive surfaces are subdivided into +regular grids that define the detectors cells, the smallest granularity within +the detector. + +Each module has a different position and orientation described in the detectors +file. A local, right-handed coordinate system is defined on each sensitive +surface such that the first two coordinates u and v are on the sensitive surface +and the third coordinate w is normal to the surface. The orientation and +position are defined by the following transformation + + pos_xyz = rotation_matrix * pos_uvw + offset + +that transform a position described in local coordinates u,v,w into the +equivalent position x,y,z in global coordinates using a rotation matrix and +an offset. + +* **volume_id**: numerical identifier of the detector group. +* **layer_id**: numerical identifier of the detector layer inside the + group. +* **module_id**: numerical identifier of the detector module inside + the layer. +* **cx, cy, cz**: position of the local origin in the described in the global + coordinate system (in millimeter). +* **rot_xu, rot_xv, rot_xw, rot_yu, ...**: components of the rotation matrix + to rotate from local u,v,w to global x,y,z coordinates. +* **module_t**: thickness of the detector module (in millimeter). +* **module_minhu, module_maxhu**: the minimum/maximum half-length of the + module boundary along the local u direction (in millimeter). +* **module_hv**: the half-length of the module boundary along the local v + direction (in millimeter). +* **pitch_u, pitch_v**: the size of detector cells along the local u and v + direction (in millimeter). + [cern]: https://home.cern [lhc]: https://home.cern/topics/large-hadron-collider From 0ba18a62f76039b8821fb90fb3a960f33434f5c9 Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Fri, 27 Apr 2018 20:16:40 +0200 Subject: [PATCH 8/9] set version=2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5421b2a..0aa3574 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='trackml', - version='1', + version='2', description='TrackML utility library', long_description=long_description, long_description_content_type='text/markdown', From 71fe6ea97a4c94611863567bb6b7e517ed486e0f Mon Sep 17 00:00:00 2001 From: Moritz Kiehn Date: Fri, 27 Apr 2018 20:51:31 +0200 Subject: [PATCH 9/9] README: mention particle_id=0 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a7cb0cd..5e95db2 100644 --- a/README.md +++ b/README.md @@ -170,7 +170,8 @@ particle/track. * **hit_id**: numerical identifier of the hit as defined in the hits file. * **particle_id**: numerical identifier of the generating particle as defined - in the particles file. + in the particles file. A value of 0 means that the hit did not originate + from a reconstructible particle, but e.g. from detector noise. * **tx, ty, tz** true intersection point in global coordinates (in millimeters) between the particle trajectory and the sensitive surface. * **tpx, tpy, tpz** true particle momentum (in GeV/c) in the global