RasaHQ
diff --git a/‎.github/workflows/continous-integration.yml
-1 b/‎.github/workflows/continous-integration.yml
-1
diff --git a/‎changelog/1424.bugfix.md
+19 b/‎changelog/1424.bugfix.md
+19
diff --git a/‎poetry.lock
+342-112 b/‎poetry.lock
+342-112
diff --git a/‎pyproject.toml
+5-4 b/‎pyproject.toml
+5-4
diff --git a/‎rasa/core/featurizers/single_state_featurizer.py
+22-1 b/‎rasa/core/featurizers/single_state_featurizer.py
+22-1
diff --git a/‎rasa/core/featurizers/tracker_featurizers.py
+115-18 b/‎rasa/core/featurizers/tracker_featurizers.py
+115-18
@@ -1307,7 +1307,6 @@ jobs:
         with:
           args: "💥 New *Rasa Open Source * version `${{ github.ref_name }}` has been released!"
 
-
   send_slack_notification_for_release_on_failure:
     name: Notify Slack & Publish Release Notes
     runs-on: ubuntu-24.04
 
@@ -0,0 +1,19 @@
+Replace `pickle` and `joblib` with safer alternatives, e.g. `json`, `safetensors`, and `skops`, for
+serializing components.
+
+**Note**: This is a model breaking change. Please retrain your model.
+
+If you have a custom component that inherits from one of the components listed below and modified the `persist` or
+`load` method, make sure to update your code. Please contact us in case you encounter any problems.
+
+Affected components:
+
+- `CountVectorFeaturizer`
+- `LexicalSyntacticFeaturizer`
+- `LogisticRegressionClassifier`
+- `SklearnIntentClassifier`
+- `DIETClassifier`
+- `CRFEntityExtractor`
+- `TrackerFeaturizer`
+- `TEDPolicy`
+- `UnexpectedIntentTEDPolicy`
@@ -120,7 +120,6 @@ sanic-cors = "~2.0.0"
 sanic-jwt = "^1.6.0"
 sanic-routing = "^0.7.2"
 websockets = ">=10.0,<11.0"
-cloudpickle = ">=1.2,<2.3"
 aiohttp = ">=3.9.0,<3.10"
 questionary = ">=1.5.1,<1.11.0"
 prompt-toolkit = "^3.0,<3.0.29"
@@ -133,10 +132,9 @@ psycopg2-binary = ">=2.8.2,<2.10.0"
 python-dateutil = "~2.8"
 protobuf = ">=4.23.3,< 4.23.4"
 tensorflow_hub = "^0.13.0"
-setuptools = ">=65.5.1"
+setuptools = "~70.3.0"
 ujson = ">=1.35,<6.0"
 regex = ">=2020.6,<2022.11"
-joblib = ">=0.15.1,<1.3.0"
 sentry-sdk = ">=0.17.0,<1.15.0"
 aio-pika = ">=6.7.1,<8.2.4"
 aiogram = "<2.26"
@@ -156,6 +154,9 @@ dnspython = "2.3.0"
 wheel = ">=0.38.1"
 certifi = ">=2023.7.22"
 cryptography = ">=41.0.7"
+skops = "0.9.0"
+safetensors = "~0.4.5"
+
 [[tool.poetry.dependencies.tensorflow-io-gcs-filesystem]]
 version = "==0.31"
 markers = "sys_platform == 'win32'"
@@ -285,7 +286,7 @@ version = "~3.2.0"
 optional = true
 
 [tool.poetry.dependencies.transformers]
-version = ">=4.13.0, <=4.26.0"
+version = "~4.36.2"
 optional = true
 
 [tool.poetry.dependencies.sentencepiece]
 
@@ -1,7 +1,8 @@
 import logging
+from typing import List, Optional, Dict, Text, Set, Any
+
 import numpy as np
 import scipy.sparse
-from typing import List, Optional, Dict, Text, Set, Any
 
 from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
 from rasa.nlu.extractors.extractor import EntityTagSpec
@@ -362,6 +363,26 @@ def encode_all_labels(
             for action in domain.action_names_or_texts
         ]
 
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "action_texts": self.action_texts,
+            "entity_tag_specs": self.entity_tag_specs,
+            "feature_states": self._default_feature_states,
+        }
+
+    @classmethod
+    def create_from_dict(
+        cls, data: Dict[str, Any]
+    ) -> Optional["SingleStateFeaturizer"]:
+        if not data:
+            return None
+
+        featurizer = SingleStateFeaturizer()
+        featurizer.action_texts = data["action_texts"]
+        featurizer._default_feature_states = data["feature_states"]
+        featurizer.entity_tag_specs = data["entity_tag_specs"]
+        return featurizer
+
 
 class IntentTokenizerSingleStateFeaturizer(SingleStateFeaturizer):
     """A SingleStateFeaturizer for use with policies that predict intent labels."""
 
@@ -1,11 +1,9 @@
 from __future__ import annotations
-from pathlib import Path
-from collections import defaultdict
-from abc import abstractmethod
-import jsonpickle
-import logging
 
-from tqdm import tqdm
+import logging
+from abc import abstractmethod
+from collections import defaultdict
+from pathlib import Path
 from typing import (
     Tuple,
     List,
@@ -18,25 +16,30 @@
     Set,
     DefaultDict,
     cast,
+    Type,
+    Callable,
+    ClassVar,
 )
+
 import numpy as np
+from tqdm import tqdm
 
-from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
-from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
-from rasa.core.exceptions import InvalidTrackerFeaturizerUsageError
 import rasa.shared.core.trackers
 import rasa.shared.utils.io
-from rasa.shared.nlu.constants import TEXT, INTENT, ENTITIES, ACTION_NAME
-from rasa.shared.nlu.training_data.features import Features
-from rasa.shared.core.trackers import DialogueStateTracker
-from rasa.shared.core.domain import State, Domain
-from rasa.shared.core.events import Event, ActionExecuted, UserUttered
+from rasa.core.exceptions import InvalidTrackerFeaturizerUsageError
+from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
+from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
 from rasa.shared.core.constants import (
     USER,
     ACTION_UNLIKELY_INTENT_NAME,
     PREVIOUS_ACTION,
 )
+from rasa.shared.core.domain import State, Domain
+from rasa.shared.core.events import Event, ActionExecuted, UserUttered
+from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.exceptions import RasaException
+from rasa.shared.nlu.constants import TEXT, INTENT, ENTITIES, ACTION_NAME
+from rasa.shared.nlu.training_data.features import Features
 from rasa.utils.tensorflow.constants import LABEL_PAD_ID
 from rasa.utils.tensorflow.model_data import ragged_array_to_ndarray
 
@@ -64,6 +67,10 @@ def __str__(self) -> Text:
 class TrackerFeaturizer:
     """Base class for actual tracker featurizers."""
 
+    # Class registry to store all subclasses
+    _registry: ClassVar[Dict[str, Type["TrackerFeaturizer"]]] = {}
+    _featurizer_type: str = "TrackerFeaturizer"
+
     def __init__(
         self, state_featurizer: Optional[SingleStateFeaturizer] = None
     ) -> None:
@@ -74,6 +81,36 @@ def __init__(
         """
         self.state_featurizer = state_featurizer
 
+    @classmethod
+    def register(cls, featurizer_type: str) -> Callable:
+        """Decorator to register featurizer subclasses."""
+
+        def wrapper(subclass: Type["TrackerFeaturizer"]) -> Type["TrackerFeaturizer"]:
+            cls._registry[featurizer_type] = subclass
+            # Store the type identifier in the class for serialization
+            subclass._featurizer_type = featurizer_type
+            return subclass
+
+        return wrapper
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "TrackerFeaturizer":
+        """Create featurizer instance from dictionary."""
+        featurizer_type = data.pop("type")
+
+        if featurizer_type not in cls._registry:
+            raise ValueError(f"Unknown featurizer type: {featurizer_type}")
+
+        # Get the correct subclass and instantiate it
+        subclass = cls._registry[featurizer_type]
+        return subclass.create_from_dict(data)
+
+    @classmethod
+    @abstractmethod
+    def create_from_dict(cls, data: Dict[str, Any]) -> "TrackerFeaturizer":
+        """Each subclass must implement its own creation from dict method."""
+        pass
+
     @staticmethod
     def _create_states(
         tracker: DialogueStateTracker,
@@ -465,9 +502,7 @@ def persist(self, path: Union[Text, Path]) -> None:
             self.state_featurizer.entity_tag_specs = []
 
         # noinspection PyTypeChecker
-        rasa.shared.utils.io.write_text_file(
-            str(jsonpickle.encode(self)), featurizer_file
-        )
+        rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, self.to_dict())
 
     @staticmethod
     def load(path: Union[Text, Path]) -> Optional[TrackerFeaturizer]:
@@ -481,7 +516,17 @@ def load(path: Union[Text, Path]) -> Optional[TrackerFeaturizer]:
         """
         featurizer_file = Path(path) / FEATURIZER_FILE
         if featurizer_file.is_file():
-            return jsonpickle.decode(rasa.shared.utils.io.read_file(featurizer_file))
+            data = rasa.shared.utils.io.read_json_file(featurizer_file)
+
+            if "type" not in data:
+                logger.error(
+                    f"Couldn't load featurizer for policy. "
+                    f"File '{featurizer_file}' does not contain all "
+                    f"necessary information. 'type' is missing."
+                )
+                return None
+
+            return TrackerFeaturizer.from_dict(data)
 
         logger.error(
             f"Couldn't load featurizer for policy. "
@@ -508,7 +553,16 @@ def _remove_action_unlikely_intent_from_events(events: List[Event]) -> List[Even
             )
         ]
 
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "type": self.__class__._featurizer_type,
+            "state_featurizer": (
+                self.state_featurizer.to_dict() if self.state_featurizer else None
+            ),
+        }
+
 
+@TrackerFeaturizer.register("FullDialogueTrackerFeaturizer")
 class FullDialogueTrackerFeaturizer(TrackerFeaturizer):
     """Creates full dialogue training data for time distributed architectures.
 
@@ -646,7 +700,20 @@ def prediction_states(
 
         return trackers_as_states
 
+    def to_dict(self) -> Dict[str, Any]:
+        return super().to_dict()
 
+    @classmethod
+    def create_from_dict(cls, data: Dict[str, Any]) -> "FullDialogueTrackerFeaturizer":
+        state_featurizer = SingleStateFeaturizer.create_from_dict(
+            data["state_featurizer"]
+        )
+        return cls(
+            state_featurizer,
+        )
+
+
+@TrackerFeaturizer.register("MaxHistoryTrackerFeaturizer")
 class MaxHistoryTrackerFeaturizer(TrackerFeaturizer):
     """Truncates the tracker history into `max_history` long sequences.
 
@@ -887,7 +954,25 @@ def prediction_states(
 
         return trackers_as_states
 
+    def to_dict(self) -> Dict[str, Any]:
+        data = super().to_dict()
+        data.update(
+            {
+                "remove_duplicates": self.remove_duplicates,
+                "max_history": self.max_history,
+            }
+        )
+        return data
+
+    @classmethod
+    def create_from_dict(cls, data: Dict[str, Any]) -> "MaxHistoryTrackerFeaturizer":
+        state_featurizer = SingleStateFeaturizer.create_from_dict(
+            data["state_featurizer"]
+        )
+        return cls(state_featurizer, data["max_history"], data["remove_duplicates"])
 
+
+@TrackerFeaturizer.register("IntentMaxHistoryTrackerFeaturizer")
 class IntentMaxHistoryTrackerFeaturizer(MaxHistoryTrackerFeaturizer):
     """Truncates the tracker history into `max_history` long sequences.
 
@@ -1166,6 +1251,18 @@ def prediction_states(
 
         return trackers_as_states
 
+    def to_dict(self) -> Dict[str, Any]:
+        return super().to_dict()
+
+    @classmethod
+    def create_from_dict(
+        cls, data: Dict[str, Any]
+    ) -> "IntentMaxHistoryTrackerFeaturizer":
+        state_featurizer = SingleStateFeaturizer.create_from_dict(
+            data["state_featurizer"]
+        )
+        return cls(state_featurizer, data["max_history"], data["remove_duplicates"])
+
 
 def _is_prev_action_unlikely_intent_in_state(state: State) -> bool:
     prev_action_name = state.get(PREVIOUS_ACTION, {}).get(ACTION_NAME)