Merge pull request #68 from alk-lbinet/clarifications

Documentation and cleanup
alkemics · Apr 23, 2021 · 6d92b7a · 6d92b7a
2 parents 6b9a393 + 1edda9d
commit 6d92b7a
Show file tree

Hide file tree

Showing 61 changed files with 1,317 additions and 1,182 deletions.
diff --git a/README.md b/README.md
@@ -21,9 +21,9 @@ and is intended to make it more convenient to deal with deeply nested queries an
 ## Features
 
 - flexible aggregation and search queries declaration, with ability to insert clauses at specific points (and not only below last manipulated clause)
-- query validation based on provided mapping
+- query validation based on provided mappings
 - parsing of aggregation results in convenient formats: tree with interactive navigation, csv-like tabular breakdown, pandas dataframe, and others
-- cluster indices discovery module, and mapping interactive navigation
+- cluster indices discovery module, and mappings interactive navigation
 
 
 ## Documentation
@@ -56,12 +56,12 @@ Discover indices on cluster with matching pattern:
 <Indices> ['movies', 'movies_fake']
 ```
 
-Explore index mapping:
+Explore index mappings:
 
 ```python
 >>> movies = indices.movies
->>> movies.mapping
-<Mapping>
+>>> movies.mappings
+<Mappings>
 _
 ├── directors                                                [Nested]
 │   ├── director_id                                           Keyword
@@ -78,8 +78,8 @@ _
 ...
 ```
 ```python
->>> movies.mapping.roles
-<Mapping subpart: roles>
+>>> movies.mappings.roles
+<Mappings subpart: roles>
 roles                                                        [Nested]
 ├── actor_id                                                  Keyword
 ├── first_name                                                Text
@@ -95,7 +95,7 @@ roles                                                        [Nested]
 Execute aggregation on field:
 
 ```python
->>> movies.mapping.roles.gender.a.terms()
+>>> movies.mappings.roles.gender.a.terms()
    doc_count key
 M    2296792   M
 F    1135174   F

diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
@@ -15,7 +15,7 @@ Elasticsearch tree structures
 
 Many Elasticsearch objects have a **tree** structure, ie they are built from a hierarchy of **nodes**:
 
-* a `mapping <https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html>`_ (tree) is a hierarchy of `fields <https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html>`_ (nodes)
+* a `mappings <https://www.elastic.co/guide/en/elasticsearch/reference/current/mappings.html>`_ (tree) is a hierarchy of `fields <https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html>`_ (nodes)
 * a `query <https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html>`_ (tree) is a hierarchy of query clauses (nodes)
 * an `aggregation <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html>`_ (tree) is a hierarchy of aggregation clauses (nodes)
 * an aggregation response (tree) is a hierarchy of response buckets (nodes)

diff --git a/docs/source/reference/pandagg.interactive.mappings.rst b/docs/source/reference/pandagg.interactive.mappings.rst
@@ -0,0 +1,7 @@
+pandagg.interactive.mappings module
+===================================
+
+.. automodule:: pandagg.interactive.mappings
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/pandagg.interactive.rst b/docs/source/reference/pandagg.interactive.rst
@@ -7,7 +7,7 @@ Submodules
 .. toctree::
    :maxdepth: 8
 
-   pandagg.interactive.mapping
+   pandagg.interactive.mappings
    pandagg.interactive.response
 
 Module contents

diff --git a/docs/source/reference/pandagg.mappings.rst b/docs/source/reference/pandagg.mappings.rst
@@ -0,0 +1,7 @@
+pandagg.mappings module
+=======================
+
+.. automodule:: pandagg.mappings
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/pandagg.node.aggs.composite.rst b/docs/source/reference/pandagg.node.aggs.composite.rst
@@ -0,0 +1,7 @@
+pandagg.node.aggs.composite module
+==================================
+
+.. automodule:: pandagg.node.aggs.composite
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/pandagg.node.aggs.rst b/docs/source/reference/pandagg.node.aggs.rst
@@ -9,6 +9,7 @@ Submodules
 
    pandagg.node.aggs.abstract
    pandagg.node.aggs.bucket
+   pandagg.node.aggs.composite
    pandagg.node.aggs.metric
    pandagg.node.aggs.pipeline
 

diff --git a/docs/source/reference/pandagg.node.mappings.abstract.rst b/docs/source/reference/pandagg.node.mappings.abstract.rst
@@ -0,0 +1,7 @@
+pandagg.node.mappings.abstract module
+=====================================
+
+.. automodule:: pandagg.node.mappings.abstract
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/pandagg.node.mappings.field_datatypes.rst b/docs/source/reference/pandagg.node.mappings.field_datatypes.rst
@@ -0,0 +1,7 @@
+pandagg.node.mappings.field\_datatypes module
+=============================================
+
+.. automodule:: pandagg.node.mappings.field_datatypes
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/pandagg.node.mappings.meta_fields.rst b/docs/source/reference/pandagg.node.mappings.meta_fields.rst
@@ -0,0 +1,7 @@
+pandagg.node.mappings.meta\_fields module
+=========================================
+
+.. automodule:: pandagg.node.mappings.meta_fields
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/pandagg.node.mappings.rst b/docs/source/reference/pandagg.node.mappings.rst
@@ -0,0 +1,20 @@
+pandagg.node.mappings package
+=============================
+
+Submodules
+----------
+
+.. toctree::
+   :maxdepth: 8
+
+   pandagg.node.mappings.abstract
+   pandagg.node.mappings.field_datatypes
+   pandagg.node.mappings.meta_fields
+
+Module contents
+---------------
+
+.. automodule:: pandagg.node.mappings
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/pandagg.node.rst b/docs/source/reference/pandagg.node.rst
@@ -8,7 +8,7 @@ Subpackages
    :maxdepth: 8
 
    pandagg.node.aggs
-   pandagg.node.mapping
+   pandagg.node.mappings
    pandagg.node.query
    pandagg.node.response
 

diff --git a/docs/source/reference/pandagg.rst b/docs/source/reference/pandagg.rst
@@ -21,7 +21,7 @@ Submodules
    pandagg.connections
    pandagg.discovery
    pandagg.exceptions
-   pandagg.mapping
+   pandagg.mappings
    pandagg.query
    pandagg.response
    pandagg.search

diff --git a/docs/source/reference/pandagg.tree.aggs.rst b/docs/source/reference/pandagg.tree.aggs.rst
@@ -1,16 +1,5 @@
-pandagg.tree.aggs package
-=========================
-
-Submodules
-----------
-
-.. toctree::
-   :maxdepth: 8
-
-   pandagg.tree.aggs.aggs
-
-Module contents
----------------
+pandagg.tree.aggs module
+========================
 
 .. automodule:: pandagg.tree.aggs
    :members:

diff --git a/docs/source/reference/pandagg.tree.mappings.rst b/docs/source/reference/pandagg.tree.mappings.rst
@@ -0,0 +1,7 @@
+pandagg.tree.mappings module
+============================
+
+.. automodule:: pandagg.tree.mappings
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/pandagg.tree.query.rst b/docs/source/reference/pandagg.tree.query.rst
@@ -1,16 +1,5 @@
-pandagg.tree.query package
-==========================
-
-Submodules
-----------
-
-.. toctree::
-   :maxdepth: 8
-
-   pandagg.tree.query.abstract
-
-Module contents
----------------
+pandagg.tree.query module
+=========================
 
 .. automodule:: pandagg.tree.query
    :members:

diff --git a/docs/source/reference/pandagg.tree.rst b/docs/source/reference/pandagg.tree.rst
@@ -1,22 +1,15 @@
 pandagg.tree package
 ====================
 
-Subpackages
------------
-
-.. toctree::
-   :maxdepth: 8
-
-   pandagg.tree.aggs
-   pandagg.tree.query
-
 Submodules
 ----------
 
 .. toctree::
    :maxdepth: 8
 
-   pandagg.tree.mapping
+   pandagg.tree.aggs
+   pandagg.tree.mappings
+   pandagg.tree.query
    pandagg.tree.response
 
 Module contents

diff --git a/examples/imdb/README.md b/examples/imdb/README.md
@@ -27,7 +27,7 @@ Relational schema is the following:
 
 ![imdb tables](ressources/imdb_ijs.svg)
 
-## Index mapping
+## Index mappings
 
 #### Overview
 The base unit (document) will be a movie, having a name, rank (ratings), year of release, a list of actors
@@ -46,7 +46,7 @@ Movie:
 
 #### Which fields require nesting?
 Since genres contain a single keyword field, in no case we need it to be stored as a nested field.
-On the contrary, actor roles and directors require a nested mapping if we consider applying multiple
+On the contrary, actor roles and directors require a nested field if we consider applying multiple
 simultanous query clauses on their sub-fields (for instance search movie in which actor is a woman AND whose role is
 nurse).
 More information on distinction between array and nested fields [here](
@@ -61,10 +61,10 @@ opt for a text field. Yet we might want to aggregate on exact keywords to count
 More inforamtion on distinction between text and keyword fields [here](
 https://www.elastic.co/fr/blog/strings-are-dead-long-live-strings)
 
-#### Mapping
+#### Mappings
 
 ```
-<Mapping>
+<Mappings>
 _
 ├── directors                                                [Nested]
 │   ├── director_id                                           Keyword
@@ -151,7 +151,7 @@ OUTPUT_FILE_NAME = 'serialized.json'
 # can take a while
 python examples/imdb/serialize.py
 
-# create index with mapping if necessary, bulk insert documents in ES
+# create index with mappings if necessary, bulk insert documents in ES
 python examples/imdb/load.py
 ```
 

diff --git a/examples/imdb/load.py b/examples/imdb/load.py
@@ -2,10 +2,10 @@
 from os.path import join
 from elasticsearch import Elasticsearch, helpers
 from examples.imdb.conf import ES_HOST, ES_USE_AUTH, ES_PASSWORD, ES_USER, DATA_DIR
-from pandagg.mapping import Mapping, Keyword, Text, Float, Nested, Integer
+from pandagg.mappings import Mappings, Keyword, Text, Float, Nested, Integer
 
 index_name = "movies"
-mapping = Mapping(
+mappings = Mappings(
     dynamic=False,
     properties={
         "movie_id": Keyword(),
@@ -68,8 +68,8 @@ def bulk_index(client, docs):
     print("CREATE INDEX\n")
     es_client.indices.create(index_name)
     print("-" * 50)
-    print("UPDATE MAPPING\n")
-    es_client.indices.put_mapping(index=index_name, body=mapping)
+    print("UPDATE MAPPINGS\n")
+    es_client.indices.put_mapping(index=index_name, body=mappings)
 
     print("-" * 50)
     print("WRITE DOCUMENTS\n")

diff --git a/pandagg/connections.py b/pandagg/connections.py
@@ -1,5 +1,4 @@
 # copied from elasticsearch-dsl/connections.py
-from six import string_types
 
 from elasticsearch import Elasticsearch
 
@@ -77,7 +76,7 @@ def get_connection(self, alias="default"):
         """
         # do not check isinstance(Elasticsearch) so that people can wrap their
         # clients
-        if not isinstance(alias, string_types):
+        if not isinstance(alias, str):
             return alias
 
         # connection already established

diff --git a/pandagg/discovery.py b/pandagg/discovery.py
@@ -1,11 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
 
-from future.utils import iteritems, python_2_unicode_compatible
 from lighttree.interactive import Obj
 
-from pandagg.interactive.mapping import IMapping
+from pandagg.interactive.mappings import IMappings
 from pandagg.search import Search
 
 
@@ -15,11 +13,11 @@ def discover(using, index="*"):
     :param index: Comma-separated list or wildcard expression of index names used to limit the request.
     """
     indices = Indices()
-    for index_name, index_detail in iteritems(using.indices.get(index=index)):
+    for index_name, index_detail in using.indices.get(index=index).items():
         indices[index_name] = Index(
             client=using,
             name=index_name,
-            mapping=index_detail["mappings"],
+            mappings=index_detail["mappings"],
             settings=index_detail["settings"],
             aliases=index_detail["aliases"],
         )
@@ -29,21 +27,20 @@ def discover(using, index="*"):
 # until Proper Index class is written
 
 
-@python_2_unicode_compatible
 class Index(object):
-    def __init__(self, name, settings, mapping, aliases, client=None):
+    def __init__(self, name, settings, mappings, aliases, client=None):
         super(Index, self).__init__()
         self.client = client
         self.name = name
         self.settings = settings
-        self._mapping = mapping
-        self.mapping = IMapping(mapping, client=client, index=name)
+        self._mappings = mappings
+        self.mappings = IMappings(mappings, client=client, index=name)
         self.aliases = aliases
 
     def search(self, nested_autocorrect=True, repr_auto_execute=True):
         return Search(
             using=self.client,
-            mapping=self._mapping,
+            mappings=self._mappings,
             index=self.name,
             nested_autocorrect=nested_autocorrect,
             repr_auto_execute=repr_auto_execute,

diff --git a/pandagg/exceptions.py b/pandagg/exceptions.py
@@ -1,24 +1,21 @@
-from __future__ import unicode_literals
-
-
 class InvalidAggregation(Exception):
     """Wrong aggregation definition"""
 
 
 class MappingError(Exception):
-    """Basic Mapping Error"""
+    """Basic Mappings Error"""
 
     pass
 
 
 class AbsentMappingFieldError(MappingError):
-    """Field is not present in mapping."""
+    """Field is not present in mappings."""
 
     pass
 
 
 class InvalidOperationMappingFieldError(MappingError):
-    """Invalid aggregation type on this mapping field."""
+    """Invalid aggregation type on this mappings field."""
 
     pass
 

diff --git a/pandagg/interactive/__init__.py b/pandagg/interactive/__init__.py
@@ -1,2 +1,2 @@
-from .mapping import *
+from .mappings import *
 from .response import *