Skip to content

Commit

Permalink
feat: add analytics (#335)
Browse files Browse the repository at this point in the history
* feat: add telemetry features.

* feat: add analytics to streamlit app

* docs: Add analytics information to the documentation.

* fix: fix issues related with imports

* fix: add request

* fix: remove print

* fix: cleaning code

---------

Co-authored-by: Fabiana Clemente <[email protected]>
  • Loading branch information
fabclmnt and Fabiana Clemente authored May 3, 2024
1 parent 6466686 commit b7c05c2
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 0 deletions.
44 changes: 44 additions & 0 deletions docs/support/analytics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

# Analytics & Telemetry

## Overview

`ydata-synthetic` is a powerful library designed to generate synthetic data.
As part of our ongoing efforts to improve user experience and functionality, `ydata-synthetic`
includes a telemetry feature. This feature collects anonymous usage data, helping us understand
how the library is used and identify areas for improvement.

The primary goal of collecting telemetry data is to:

- Enhance the functionality and performance of the ydata-synthetic library
- Prioritize new features based on user engagement
- Identify common issues and bugs to improve overall user experience

### Data Collected
The telemetry system collects non-personal, anonymous information such as:

- Python version
- `ydata-synthetic` version
- Frequency of use of `ydata-synthetic` features
- Errors or exceptions thrown within the library

## Disabling usage analytics

We respect your choice to not participate in our telemetry collection.
If you prefer to disable telemetry, you can do so by setting an environment
variable on your system. Disabling telemetry will not affect the functionality
of the ydata-profiling library, except for the ability to contribute to its usage analytics.

### Set an Environment Variable
In your notebook or script make sure to set YDATA_SYNTHETIC_NO_ANALYTICS
environment variable to `True`.

````python
import os

os.environ['YDATA_SYNTHETIC_NO_ANALYTICS']='True'
````




1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ nav:
- Help & Troubleshooting: 'support/help-troubleshooting.md'
- Contribution Guidelines: 'support/contribute.md'
- Contribution Guidelines: 'support/contribute.md'
- Analytics: 'support/analytics.md'
- Reference:
- Changelog: 'reference/changelog.md'
- API:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
from typing import Union
import os
import json
import logging

import streamlit as st

from ydata.sdk.synthesizers import RegularSynthesizer
from ydata.sdk.common.client import get_client

from ydata_synthetic.utils.logger import SynthesizersLogger
from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
from ydata_synthetic.synthesizers.regular.model import Model

from ydata_synthetic.streamlit_app.pages.functions.load_data import upload_file
from ydata_synthetic.streamlit_app.pages.functions.train import DataType, __CONDITIONAL_MODELS
from ydata_synthetic.streamlit_app.pages.functions.train import init_synth, advanced_setttings, training_parameters

logger = SynthesizersLogger(name='streamlitSynthesizer.logger')
logger.setLevel(logging.INFO)

def get_available_models(type: Union[str, DataType]):

dtype = DataType(type)
Expand Down Expand Up @@ -114,6 +120,8 @@ def run():

st.subheader("3. Train your synthesizer")
if st.button('Click here to start the training process', disabled=not valid_token):

logger.info_def_report(model='fabric')
model = RegularSynthesizer()
with st.spinner("Please wait while your synthesizer trains..."):
dtypes = {}
Expand Down
7 changes: 7 additions & 0 deletions src/ydata_synthetic/synthesizers/regular/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Main synthesizer class
"""
from enum import Enum, unique
import logging

from joblib import load

Expand All @@ -17,6 +18,10 @@
from ydata_synthetic.synthesizers.regular.ctgan.model import CTGAN
from ydata_synthetic.synthesizers.regular.gmm.model import GMM

from ydata_synthetic.utils.logger import SynthesizersLogger

logger = SynthesizersLogger(name='regularsynthesizer.logger')
logger.setLevel(logging.INFO)

@unique
class Model(Enum):
Expand Down Expand Up @@ -54,6 +59,8 @@ def __new__(cls, modelname: str, model_parameters =None, **kwargs):
model=Model(modelname).function(**kwargs)
else:
model=Model(modelname).function(model_parameters, **kwargs)

logger.info_def_report(model=modelname)
return model

@staticmethod
Expand Down
6 changes: 6 additions & 0 deletions src/ydata_synthetic/synthesizers/timeseries/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@
"""
from enum import Enum, unique
import os
import logging
from joblib import load

from tensorflow import config as tfconfig

from ydata_synthetic.synthesizers.timeseries.timegan.model import TimeGAN
from ydata_synthetic.synthesizers.timeseries.doppelganger.model import DoppelGANger

from ydata_synthetic.utils.logger import SynthesizersLogger

logger = SynthesizersLogger(name='timseriesSynthesizer.logger')
logger.setLevel(logging.INFO)

@unique
class Model(Enum):
Expand All @@ -28,6 +33,7 @@ def function(self):
class TimeSeriesSynthesizer():
"Abstraction class "
def __new__(cls, modelname: str, model_parameters=None, **kwargs):
logger.info_def_report(model=modelname)
return Model(modelname).function(model_parameters, **kwargs)

@staticmethod
Expand Down
23 changes: 23 additions & 0 deletions src/ydata_synthetic/utils/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
ydata-synthetic logger
"""
from __future__ import absolute_import, division, print_function

import logging

from ydata_synthetic.utils.utils import analytics_features

class SynthesizersLogger(logging.Logger):
def __init__(self, name, level=logging.INFO):
super().__init__(name, level)

def info(
self,
msg: object,
) -> None:
super().info(f'[SYNTHESIZER] - {msg}.')

def info_def_report(self, model: str):
analytics_features(model=model)

super().info(f'[SYNTHESIZER] Creating a synthetic data generator with the following model - {model}.')
32 changes: 32 additions & 0 deletions src/ydata_synthetic/utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Utility functions that are common to ydata-synthetic project
"""
import os
import subprocess
import platform
import requests

from ydata_synthetic.version import __version__
def analytics_features(model: str):
endpoint= "https://packages.ydata.ai/ydata-synthetic?"

if bool(os.getenv("YDATA_SYNTHETIC_NO_ANALYTICS"))!= True:
package_version = __version__
try:
subprocess.check_output("nvidia-smi")
gpu_present = True
except Exception:
gpu_present = False

python_version = ".".join(platform.python_version().split(".")[:2])

try:
request_message = f"{endpoint}version={package_version}" \
f"&python_version={python_version}" \
f"&model={model}" \
f"&os={platform.system()}" \
f"&gpu={str(gpu_present)}"

requests.get(request_message)
except Exception:
pass

0 comments on commit b7c05c2

Please sign in to comment.