Skip to content

Commit

Permalink
Refactor the metric library to conform to new naming structure
Browse files Browse the repository at this point in the history
  • Loading branch information
Anthony Naddeo committed Mar 5, 2024
1 parent 5f9fd4c commit 570abc2
Show file tree
Hide file tree
Showing 23 changed files with 769 additions and 786 deletions.
1 change: 1 addition & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@
- Implement real multi metric conversion in the whylogs_compat file. Jamie said that whylogs actually can return multiple metrics at once.
- Add validation options for string based metrics like topic. Right now there are only validation creators for numeric things.
- Add multiple python version builds to the CI matrix. Things like `isinstance(foo, Union[..])` are in the code still.
- Create version of @cache for python 3.8. Can apparently use @lru_cache(maxsize=None)
1 change: 1 addition & 0 deletions langkit/core/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def run(self, data: Union[pd.DataFrame, Row, Dict[str, str]]) -> EvaluationResul
else:
condensed["id"] = df["id"]

# TODO set column names `metric` and `value`
full_df = condensed.copy() # guard against mutations
validation_results: List[ValidationResult] = []
all_validators_start = time.perf_counter()
Expand Down
6 changes: 4 additions & 2 deletions langkit/metrics/injections.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:
metrics = [float(score) for _, score in zip(max_indices, max_similarities)]
return SingleMetricResult(metrics=metrics)

return SingleMetric(name=f"{column_name}.injections", input_name=column_name, evaluate=udf, cache_assets=cache_assets, init=init)
return SingleMetric(
name=f"{column_name}.similarity.injection", input_name=column_name, evaluate=udf, cache_assets=cache_assets, init=init
)


prompt_injections_module = partial(injections_metric, "prompt")
prompt_injections_metric = partial(injections_metric, "prompt")
4 changes: 2 additions & 2 deletions langkit/metrics/input_output_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:
return SingleMetricResult(similarity.squeeze(dim=0).tolist()) # type: ignore[reportUnknownVariableType]

return SingleMetric(
name=f"{output_column_name}.relevance_to_{input_column_name}",
name=f"{output_column_name}.similarity.{input_column_name}",
input_name=input_column_name,
evaluate=udf,
init=init,
)


prompt_response_input_output_similarity_module = partial(input_output_similarity_metric, "prompt", "response")
prompt_response_input_output_similarity_metric = partial(input_output_similarity_metric, "prompt", "response")
567 changes: 270 additions & 297 deletions langkit/metrics/library.py

Large diffs are not rendered by default.

54 changes: 35 additions & 19 deletions langkit/metrics/regexes/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:
return SingleMetricResult(metric)

return SingleMetric(
name=f"{column_name}.has_patterns",
name=f"{column_name}.regex.has_patterns",
input_name=column_name,
evaluate=udf,
)
Expand Down Expand Up @@ -112,29 +112,45 @@ def udf(text: Union[pd.DataFrame, Dict[str, List[Any]]]) -> SingleMetricResult:
return SingleMetricResult(metrics)

return SingleMetric(
name=f"{column_name}.{__sanitize_name_for_metric(pattern_name)}",
name=f"{column_name}.regex.{__sanitize_name_for_metric(pattern_name)}",
input_name=column_name,
evaluate=udf,
)


prompt_ssn_regex_module = partial(__single_regex_module, "prompt", __default_patterns, "SSN")
prompt_credit_card_number_regex_module = partial(__single_regex_module, "prompt", __default_patterns, "credit card number")
prompt_phone_number_regex_module = partial(__single_regex_module, "prompt", __default_patterns, "phone number")
prompt_mailing_address_regex_module = partial(__single_regex_module, "prompt", __default_patterns, "mailing address")
prompt_email_address_regex_module = partial(__single_regex_module, "prompt", __default_patterns, "email address")

response_ssn_regex_module = partial(__single_regex_module, "response", __default_patterns, "SSN")
response_credit_card_number_regex_module = partial(__single_regex_module, "response", __default_patterns, "credit card number")
response_phone_number_regex_module = partial(__single_regex_module, "response", __default_patterns, "phone number")
response_mailing_address_regex_module = partial(__single_regex_module, "response", __default_patterns, "mailing address")
response_email_address_regex_module = partial(__single_regex_module, "response", __default_patterns, "email address")

prompt_response_ssn_regex_module = [prompt_ssn_regex_module, response_ssn_regex_module]
prompt_response_credit_card_number_regex_module = [prompt_credit_card_number_regex_module, response_credit_card_number_regex_module]
prompt_response_phone_number_regex_module = [prompt_phone_number_regex_module, response_phone_number_regex_module]
prompt_response_mailing_address_regex_module = [prompt_mailing_address_regex_module, response_mailing_address_regex_module]
prompt_response_email_address_regex_module = [prompt_email_address_regex_module, response_email_address_regex_module]
prompt_ssn_regex_metric = partial(__single_regex_module, "prompt", __default_patterns, "SSN")
prompt_credit_card_number_regex_metric = partial(__single_regex_module, "prompt", __default_patterns, "credit card number")
prompt_phone_number_regex_metric = partial(__single_regex_module, "prompt", __default_patterns, "phone number")
prompt_mailing_address_regex_metric = partial(__single_regex_module, "prompt", __default_patterns, "mailing address")
prompt_email_address_regex_metric = partial(__single_regex_module, "prompt", __default_patterns, "email address")

prompt_regex_metric = [
prompt_ssn_regex_metric,
prompt_credit_card_number_regex_metric,
prompt_phone_number_regex_metric,
prompt_mailing_address_regex_metric,
prompt_email_address_regex_metric,
]

response_ssn_regex_metric = partial(__single_regex_module, "response", __default_patterns, "SSN")
response_credit_card_number_regex_metric = partial(__single_regex_module, "response", __default_patterns, "credit card number")
response_phone_number_regex_metric = partial(__single_regex_module, "response", __default_patterns, "phone number")
response_mailing_address_regex_metric = partial(__single_regex_module, "response", __default_patterns, "mailing address")
response_email_address_regex_metric = partial(__single_regex_module, "response", __default_patterns, "email address")

response_regex_metric = [
response_ssn_regex_metric,
response_credit_card_number_regex_metric,
response_phone_number_regex_metric,
response_mailing_address_regex_metric,
response_email_address_regex_metric,
]

prompt_response_ssn_regex_module = [prompt_ssn_regex_metric, response_ssn_regex_metric]
prompt_response_credit_card_number_regex_module = [prompt_credit_card_number_regex_metric, response_credit_card_number_regex_metric]
prompt_response_phone_number_regex_module = [prompt_phone_number_regex_metric, response_phone_number_regex_metric]
prompt_response_mailing_address_regex_module = [prompt_mailing_address_regex_metric, response_mailing_address_regex_metric]
prompt_response_email_address_regex_module = [prompt_email_address_regex_metric, response_email_address_regex_metric]


def custom_regex_metric(column_name: str, file_or_patterns: Optional[Union[str, CompiledPatternGroups]] = None) -> MetricCreator:
Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/sentiment_polarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:
return SingleMetricResult(metrics)

return SingleMetric(
name=f"{column_name}.sentiment_polarity",
name=f"{column_name}.sentiment.sentiment_score",
input_name=column_name,
evaluate=udf,
init=init,
Expand Down
112 changes: 47 additions & 65 deletions langkit/metrics/text_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,110 +14,92 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:
return SingleMetricResult(metrics)

return SingleMetric(
name=f"{column_name}.{stat}", # TODO make this ...text_stat...
name=f"{column_name}.text_stat.{stat}",
input_name=column_name,
evaluate=udf,
)


__reading_ease_module = partial(textstat_module, "flesch_reading_ease")
prompt_reading_ease_module = partial(__reading_ease_module, column_name="prompt")
response_reading_ease_module = partial(__reading_ease_module, column_name="response")
prompt_response_reading_ease_module = [prompt_reading_ease_module, response_reading_ease_module]
prompt_reading_ease_metric = partial(__reading_ease_module, column_name="prompt")
response_reading_ease_metric = partial(__reading_ease_module, column_name="response")
prompt_response_reading_ease_module = [prompt_reading_ease_metric, response_reading_ease_metric]


__flesch_kincaid_grade_level_module = partial(textstat_module, "flesch_kincaid_grade")
prompt_flesch_kincaid_grade_level_module = partial(__flesch_kincaid_grade_level_module, column_name="prompt")
response_flesch_kincaid_grade_level_module = partial(__flesch_kincaid_grade_level_module, column_name="response")
prompt_response_flesch_kincaid_grade_level_module = [
prompt_flesch_kincaid_grade_level_module,
response_flesch_kincaid_grade_level_module,
__flesch_kincaid_grade_metric = partial(textstat_module, "flesch_kincaid_grade")
prompt_grade_metric = partial(__flesch_kincaid_grade_metric, column_name="prompt")
response_grade_metric = partial(__flesch_kincaid_grade_metric, column_name="response")
prompt_response_grade_metric = [
prompt_grade_metric,
response_grade_metric,
]


__char_count_module = partial(textstat_module, "char_count")
prompt_char_count_module = partial(__char_count_module, column_name="prompt")
response_char_count_module = partial(__char_count_module, column_name="response")
prompt_response_char_count_module = [prompt_char_count_module, response_char_count_module]
prompt_char_count_metric = partial(__char_count_module, column_name="prompt")
response_char_count_metric = partial(__char_count_module, column_name="response")
prompt_response_char_count_module = [prompt_char_count_metric, response_char_count_metric]


__syllable_count_module = partial(textstat_module, "syllable_count")
prompt_syllable_count_module = partial(__syllable_count_module, column_name="prompt")
response_syllable_count_module = partial(__syllable_count_module, column_name="response")
prompt_response_syllable_count_module = [prompt_syllable_count_module, response_syllable_count_module]
prompt_syllable_count_metric = partial(__syllable_count_module, column_name="prompt")
response_syllable_count_metric = partial(__syllable_count_module, column_name="response")
prompt_response_syllable_count_module = [prompt_syllable_count_metric, response_syllable_count_metric]


__lexicon_count_module = partial(textstat_module, "lexicon_count")
prompt_lexicon_count_module = partial(__lexicon_count_module, column_name="prompt")
response_lexicon_count_module = partial(__lexicon_count_module, column_name="response")
prompt_response_lexicon_count_module = [prompt_lexicon_count_module, response_lexicon_count_module]
prompt_lexicon_count_metric = partial(__lexicon_count_module, column_name="prompt")
response_lexicon_count_metric = partial(__lexicon_count_module, column_name="response")
prompt_response_lexicon_count_module = [prompt_lexicon_count_metric, response_lexicon_count_metric]


__sentence_count_module = partial(textstat_module, "sentence_count")
prompt_sentence_count_module = partial(__sentence_count_module, column_name="prompt")
response_sentence_count_module = partial(__sentence_count_module, column_name="response")
prompt_response_sentence_count_module = [prompt_sentence_count_module, response_sentence_count_module]
prompt_sentence_count_metric = partial(__sentence_count_module, column_name="prompt")
response_sentence_count_metric = partial(__sentence_count_module, column_name="response")
prompt_response_sentence_count_module = [prompt_sentence_count_metric, response_sentence_count_metric]


__letter_count_module = partial(textstat_module, "letter_count")
prompt_letter_count_module = partial(__letter_count_module, column_name="prompt")
response_letter_count_module = partial(__letter_count_module, column_name="response")
prompt_response_letter_count_module = [prompt_letter_count_module, response_letter_count_module]


__polysyllabcount_module = partial(textstat_module, "polysyllabcount")
prompt_polysyllabcount_module = partial(__polysyllabcount_module, column_name="prompt")
response_polysyllabcount_module = partial(__polysyllabcount_module, column_name="response")
prompt_response_polysyllabcount_module = [prompt_polysyllabcount_module, response_polysyllabcount_module]


__monosyllabcount_module = partial(textstat_module, "monosyllabcount")
prompt_monosyllabcount_module = partial(__monosyllabcount_module, column_name="prompt")
response_monosyllabcount_module = partial(__monosyllabcount_module, column_name="response")
prompt_response_monosyllabcount_module = [prompt_monosyllabcount_module, response_monosyllabcount_module]
prompt_letter_count_metric = partial(__letter_count_module, column_name="prompt")
response_letter_count_metric = partial(__letter_count_module, column_name="response")
prompt_response_letter_count_module = [prompt_letter_count_metric, response_letter_count_metric]


__difficult_words_module = partial(textstat_module, "difficult_words")
prompt_difficult_words_module = partial(__difficult_words_module, column_name="prompt")
response_difficult_words_module = partial(__difficult_words_module, column_name="response")
prompt_response_difficult_words_module = [prompt_difficult_words_module, response_difficult_words_module]
prompt_difficult_words_metric = partial(__difficult_words_module, column_name="prompt")
response_difficult_words_metric = partial(__difficult_words_module, column_name="response")
prompt_response_difficult_words_module = [prompt_difficult_words_metric, response_difficult_words_metric]


prompt_response_textstat_module: MetricCreator = [
*prompt_response_reading_ease_module,
*prompt_response_flesch_kincaid_grade_level_module,
*prompt_response_grade_metric,
*prompt_response_char_count_module,
*prompt_response_syllable_count_module,
*prompt_response_lexicon_count_module,
*prompt_response_sentence_count_module,
*prompt_response_letter_count_module,
*prompt_response_polysyllabcount_module,
*prompt_response_monosyllabcount_module,
*prompt_response_difficult_words_module,
]

prompt_textstat_module: MetricCreator = [
prompt_reading_ease_module,
prompt_flesch_kincaid_grade_level_module,
prompt_char_count_module,
prompt_syllable_count_module,
prompt_lexicon_count_module,
prompt_sentence_count_module,
prompt_letter_count_module,
prompt_polysyllabcount_module,
prompt_monosyllabcount_module,
prompt_difficult_words_module,
prompt_textstat_metric: MetricCreator = [
prompt_reading_ease_metric,
prompt_grade_metric,
prompt_char_count_metric,
prompt_syllable_count_metric,
prompt_lexicon_count_metric,
prompt_sentence_count_metric,
prompt_letter_count_metric,
prompt_difficult_words_metric,
]

response_textstat_module: MetricCreator = [
response_reading_ease_module,
response_flesch_kincaid_grade_level_module,
response_char_count_module,
response_syllable_count_module,
response_lexicon_count_module,
response_sentence_count_module,
response_letter_count_module,
response_polysyllabcount_module,
response_monosyllabcount_module,
response_difficult_words_module,
response_textstat_metric: MetricCreator = [
response_reading_ease_metric,
response_grade_metric,
response_char_count_metric,
response_syllable_count_metric,
response_lexicon_count_metric,
response_sentence_count_metric,
response_letter_count_metric,
response_difficult_words_metric,
]
Loading

0 comments on commit 570abc2

Please sign in to comment.