Trusted-AI · OrsonTyphanel93 · Nov 15, 2023 · Nov 16, 2023 · Dec 18, 2023 · Dec 18, 2023
diff --git a/art/attacks/poisoning/AudioDynamicTrigger b/art/attacks/poisoning/AudioDynamicTrigger
@@ -0,0 +1,146 @@
+
+# MIT License
+#
+# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2022
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""
+Adversarial perturbations designed to work for audio.
+Uses classes, rather than pure functions as in image_perturbations.py,
+because loading the audio trigger from disk (librosa.load()) is very slow
+and should be done only once.
+"""
+
+```python
+import os
+import logging
+import numpy as np
+from scipy.io import wavfile
+from scipy import signal
+from sklearn.preprocessing import QuantileTransformer
+from tensorflow.keras.layers import Input, Dense
+from tensorflow.keras.models import Model
+
+class DynamicTrigger:
+    def __init__(self, sampling_rate=16000, backdoor_path='/content/triggers_clapping.wav', scale=0.2):
+        """
+        Initialize the DynamicTrigger object.
+
+        Parameters:
+        - sampling_rate (int): Sampling rate of the audio.
+        - backdoor_path (str): Path to the trigger audio file.
+        - scale (float): Scaling factor for the trigger.
+        """
+        self.sampling_rate = sampling_rate
+        self.backdoor_path = backdoor_path
+        self.scale = scale
+        self.trigger = self.load_trigger()
+
+    def load_trigger(self):
+        """
+        Load the trigger audio file and handle resampling if needed.
+
+        Returns:
+        - np.ndarray: Trigger audio data.
+        """
+        if not os.path.isfile(self.backdoor_path):
+            raise FileNotFoundError(f"Trigger file not found: {self.backdoor_path}")
+        try:
+            _, trigger = wavfile.read(self.backdoor_path)
+            if self.sampling_rate != _:
+                trigger = signal.resample(trigger, int(len(trigger) * self.sampling_rate / _))
+            trigger = trigger.flatten() * self.scale
+            return trigger
+        except Exception as e:
+            logging.error(f"Error loading trigger: {e}")
+            raise
+
+    def anonymize_speaker(self, spectrogram, noise_std=0.1):
+        """
+        Anonymize the speaker in the spectrogram using an autoencoder.
+
+        Parameters:
+        - spectrogram (np.ndarray): Input spectrogram.
+        - noise_std (float): Standard deviation of noise to add during anonymization.
+
+        Returns:
+        - np.ndarray: Anonymized spectrogram.
+        """
+        try:
+            # Create a model for differentially private feature extraction
+            input_layer = Input(shape=(spectrogram.shape[1],))
+            hidden_layer = Dense(128, activation='relu')(input_layer)
+            output_layer = Dense(spectrogram.shape[1])(hidden_layer)
+            autoencoder = Model(input_layer, output_layer)
+            autoencoder.compile(optimizer='adam', loss='mean_squared_error')
+
+            # Train the autoencoder with noise layers
+            noisy_spectrogram = spectrogram + np.random.normal(0, noise_std, spectrogram.shape)
+            autoencoder.fit(noisy_spectrogram, spectrogram, epochs=10, batch_size=32, verbose=1)
+
+            # Use the autoencoder to extract features from the spectrogram
+            features = autoencoder.predict(spectrogram)
+
+            # Apply quantization-based transformation
+            transformer = QuantileTransformer(n_quantiles=100, random_state=0)
+            quantized_features = transformer.fit_transform(features)
+
+            # Reconstruct the spectrogram from the quantized features
+            reconstructed_spectrogram = autoencoder.predict(quantized_features)
+
+            return reconstructed_spectrogram
+        except Exception as e:
+            logging.error(f"Error during anonymization: {e}")
+            raise
+
+    def insert(self, x_audio, trigger_start_index=10, trigger_end_index=20, noise_std=0.05):
+        """
+        Insert the trigger into the audio signal and apply anonymization.
+
+        Parameters:
+        - x_audio (np.ndarray): Input audio signal.
+        - trigger_start_index (int): Start index for trigger insertion.
+        - trigger_end_index (int): End index for trigger insertion.
+        - noise_std (float): Standard deviation of noise to add during anonymization.
+
+        Returns:
+        - np.ndarray: Anonymized audio signal.
+        - int: Sampling rate of the audio signal.
+        """
+        try:
+            _, _, xi = signal.stft(x_audio, fs=self.sampling_rate)
+
+            # Ensure trigger indices are within bounds
+            trigger_len = len(self.trigger)
+            if trigger_start_index < 0 or trigger_end_index > xi.shape[0] or trigger_start_index >= trigger_end_index:
+                raise ValueError("Invalid trigger indices provided.")
+
+            # Insert trigger into the audio signal
+            sigma = self.trigger[:trigger_len]
+            xi[trigger_start_index:trigger_end_index, :] = sigma
+
+            # Anonymize the speaker in the spectrogram
+            xi = self.anonymize_speaker(xi, noise_std=noise_std)
+
+            # Reconstruct the audio signal from the spectrogram
+            _, poisoned_x = signal.istft(xi, fs=self.sampling_rate)
+
+            return poisoned_x, self.sampling_rate
+        except Exception as e:
+            logging.error(f"Error during trigger insertion: {e}")
+            raise
+
+
+```