From a8dd4b5db758aae65b8a7f20918882b304f44cc0 Mon Sep 17 00:00:00 2001 From: antter Date: Wed, 17 Nov 2021 21:53:22 +0000 Subject: [PATCH] finalized unexpectedness score notebook --- .../metrics/unexpected_test_failures.ipynb | 972 ++++++++++++++++++ 1 file changed, 972 insertions(+) create mode 100644 notebooks/data-sources/TestGrid/metrics/unexpected_test_failures.ipynb diff --git a/notebooks/data-sources/TestGrid/metrics/unexpected_test_failures.ipynb b/notebooks/data-sources/TestGrid/metrics/unexpected_test_failures.ipynb new file mode 100644 index 00000000..52d8765b --- /dev/null +++ b/notebooks/data-sources/TestGrid/metrics/unexpected_test_failures.ipynb @@ -0,0 +1,972 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "986f8173-ec5d-4408-8b16-2870eba609ce", + "metadata": {}, + "source": [ + "# Unexpected Test Failures" + ] + }, + { + "cell_type": "markdown", + "id": "1b6b805a-cb06-48f8-834f-74297b75862a", + "metadata": {}, + "source": [ + "In this notebook we initially set out to predict infrastructure flakes from testgrid data. Usually an infrastructure flake can be categorized by several tests failing unexpectedly at the same time, which would mean that the problem wasn't the tests themselves, but rather the infrastructure they run on. The main challenge came from defining mathematically what it could mean for tests to fail unexpectedly. That became the main goal of this notebook and classifying why the tests failed unexpectedly, whether it be because of infrastructure or other reasons, will require further analysis. In this notebook, every column of testgrid data will be classified with a \"unexpectedness score\" that is really a probability from 0 to 100." + ] + }, + { + "cell_type": "markdown", + "id": "b77c4adb-9d60-455a-bbaf-7ddd004160d3", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "8ea6c93f-2136-4a36-9e72-9bfdfe02c5c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "import gzip\n", + "import os\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as mpatches\n", + "import seaborn as sns\n", + "import datetime\n", + "import numpy as np\n", + "from matplotlib import colors\n", + "import bisect\n", + "from ipynb.fs.defs.metric_template import CephCommunication\n", + "from ipynb.fs.defs.metric_template import save_to_disk\n", + "from dotenv import load_dotenv, find_dotenv\n", + "import sys\n", + "\n", + "sys.path.append('../../../failure-type-classification')\n", + "\n", + "sys.path.append('../../../failure-type-classification')\n", + "\n", + "from ipynb.fs.defs.failure_type_functions import ( #noqa\n", + " normalize, #noqa\n", + " decode_run_length, #noqa\n", + ")\n", + "\n", + "load_dotenv(find_dotenv())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "34ab464c-5512-4096-aa81-5f579e30524b", + "metadata": {}, + "outputs": [], + "source": [ + "## Specify variables\n", + "METRIC_NAME = \"unexpected_runs\"\n", + "# Specify the path for input grid data,\n", + "INPUT_DATA_PATH = \"../../../../data/raw/testgrid_810.json.gz\"\n", + "\n", + "# Specify the path for output metric data\n", + "OUTPUT_DATA_PATH = f\"../../../../data/processed/metrics/{METRIC_NAME}\"\n", + "\n", + "# Specify whether or not we are running this as a notebook or part of an automation pipeline.\n", + "AUTOMATION = os.getenv(\"IN_AUTOMATION\")\n", + "\n", + "## CEPH Bucket variables\n", + "## Create a .env file on your local with the correct configs,\n", + "s3_endpoint_url = os.getenv(\"S3_ENDPOINT\")\n", + "s3_access_key = os.getenv(\"S3_ACCESS_KEY\")\n", + "s3_secret_key = os.getenv(\"S3_SECRET_KEY\")\n", + "s3_bucket = os.getenv(\"S3_BUCKET\")\n", + "s3_input_data_path = \"raw_data\"\n", + "metric_path = f\"ai4ci/testgrid/metrics/{METRIC_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3ba80b38-21e9-4f7b-aee3-0095b4259e74", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "## Import data\n", + "timestamp = datetime.datetime.today()\n", + "\n", + "if AUTOMATION:\n", + " filename = f\"testgrid_{timestamp.day}{timestamp.month}.json\"\n", + " cc = CephCommunication(s3_endpoint_url, s3_access_key, s3_secret_key, s3_bucket)\n", + " s3_object = cc.s3_resource.Object(s3_bucket, f\"{s3_input_data_path}/{filename}\")\n", + " file_content = s3_object.get()[\"Body\"].read().decode(\"utf-8\")\n", + " testgrid_data = json.loads(file_content)\n", + "\n", + "else:\n", + " with gzip.open(INPUT_DATA_PATH, \"rb\") as read_file:\n", + " testgrid_data = json.load(read_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "00ae3a7e-9298-4ff5-bd8f-60b1ee7950f8", + "metadata": {}, + "outputs": [], + "source": [ + "all_dashboards = list(testgrid_data.keys())\n", + "\n", + "all_jobs = {key: list(testgrid_data[key].keys()) for key in all_dashboards}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "1ef6920c-cb47-4e00-bf35-1217632e8d6e", + "metadata": {}, + "outputs": [], + "source": [ + "# for data exploration\n", + "\n", + "def get_random_grid():\n", + " \"\"\"Call this function and get a totally random grid.\"\"\"\n", + " first = np.random.choice(all_dashboards)\n", + " second = np.random.choice(all_jobs[first])\n", + " grid = testgrid_data[first][second]['grid']\n", + " x = np.array(list(pd.DataFrame(grid).statuses.apply(decode_run_length)))\n", + " x = pd.DataFrame(x).apply(lambda x: [normalize(y) for y in x])\n", + " return first, second, x" + ] + }, + { + "cell_type": "markdown", + "id": "085d9dc3-458a-431a-b142-1f8c6a0a2bef", + "metadata": {}, + "source": [ + "## Visualizing" + ] + }, + { + "cell_type": "markdown", + "id": "625f023d-208b-4a2b-bba4-0eb66d0b43f5", + "metadata": {}, + "source": [ + "Below we visualize some improbable failures. We define a metric (probability of failure) that is just the sample probability of failure, $\\overline{X}$. If the probability of failure is < 0.05 and it fails, we mark it in yellow." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "178c7aff-a995-4c8a-ae01-179436875a92", + "metadata": {}, + "outputs": [], + "source": [ + "def naive_prob_failure(row):\n", + " \"\"\"Input a row and get the probability that test fails, given that it is run.\"\"\"\n", + " row = row.values\n", + " row = row[row != 0]\n", + " return 1 - (row.mean() + 1)/2" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "d415c229-8a40-4409-bcba-84a743fa7bf1", + "metadata": {}, + "outputs": [], + "source": [ + "def get_grid(x):\n", + " \"\"\"Plot the grid with not run tests in white, improb\n", + " able failed tests in yellow, other failed tests in red, and passed in green.\"\"\"\n", + " plt.figure(figsize=(10, 5))\n", + " y = x[:50].copy()\n", + " for i, row in y.iterrows():\n", + " pf = naive_prob_failure(row)\n", + " if pf <= 0.05:\n", + " row[row == -1] = 0.5\n", + " y.loc[i] = row\n", + " cmap = colors.ListedColormap(['red', 'white', 'yellow', 'green'])\n", + " boundaries = [-1.2, -0.2, 0.2, 0.7, 1.2]\n", + " norm = colors.BoundaryNorm(boundaries, cmap.N, clip=True)\n", + " sns.heatmap(y[:50], fmt=\"\", cmap=cmap, cbar=False, norm = norm)\n", + " plt.ylabel(\"Tests\")\n", + " plt.xlabel(\"Run\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "3258601c-ae52-4fe6-a15f-08b82cb2895e", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "dashboard, job, x = get_random_grid()\n", + "get_grid(x)" + ] + }, + { + "cell_type": "markdown", + "id": "1669cb1c-ae87-43c1-aa68-e0c2f2566fca", + "metadata": {}, + "source": [ + "## Unexpected Failure Classification" + ] + }, + { + "cell_type": "markdown", + "id": "fa1efee6-fb4c-4686-9837-ebd676e06ceb", + "metadata": {}, + "source": [ + "First we will walk through the process with visualizations to motivate our methods before defining an overall function to calculate an \"unexpectedness score\"." + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "3a71e3f1-3a1f-4799-8b3f-5489dad8dcd2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Proportion of NA values 0.0\n" + ] + } + ], + "source": [ + "def preprocess(x):\n", + " \"\"\"Preprocess the data to make our methods work.\"\"\"\n", + " # unsure why we have NA values; safest bet to fix is to assume test didn't run\n", + " x = x.fillna(0)\n", + " print('Proportion of NA values', x.isna().to_numpy().sum()/(x.shape[0]*x.shape[1]))\n", + " # if we have all the same values, there is no real info (for our methods)\n", + " x = x[~(x.apply(np.std, axis = 1) == 0)]\n", + " return x\n", + "\n", + "\n", + "x = preprocess(x)" + ] + }, + { + "cell_type": "markdown", + "id": "f5ec74bb-0efd-435a-8b45-1a00445d7750", + "metadata": {}, + "source": [ + "Now we define a column score. We earlier defined the row score as the sample probability of a test failing. Rows correspond to tests and columns refer to time. For the column score, we take all the tests that failed at that time, and take the sums of the entropies of the failed scores. Entropy is just the negative of the log of the row score. It just makes probabilities work in a more linear fashion. Usually if you accumulate multiple probabilities you would multiply them together but that would make a lot of numbers really small and hard to understand so we apply the log. There are reasons for this being called \"entropy\" that could be read [here](https://en.wikipedia.org/wiki/Information_theory). So if a run has 3 tests fail where two tests had a $\\frac25$ probability of failing and a one test had a $\\frac13$ probability of failing our column score will be $-2\\log\\frac25 - \\log\\frac13$." + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "968ce931-73a7-4f0b-873d-33e0e41a4781", + "metadata": {}, + "outputs": [], + "source": [ + "def column_scores(x):\n", + " # defined as the sum of the log of probability to fail of the failed tests\n", + " row_scores = x.apply(naive_prob_failure, axis = 1).values\n", + " return x.apply(lambda x: column_score(x, row_scores))\n", + "\n", + "\n", + "def column_score(column, row_scores):\n", + " return np.sum(-np.log(row_scores[column == -1]))" + ] + }, + { + "cell_type": "markdown", + "id": "27bb49d0-01f7-464e-bef6-44c28e72efb9", + "metadata": {}, + "source": [ + "Now let's visualize what our column scores look like next to the grid itself." + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "46df01ac-793a-43bb-a4d2-e46136720cbb", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "get_grid(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "e6a9cb65-06a6-4e46-89f5-c2a39df5f2ba", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmQAAAE9CAYAAACleH4eAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUv0lEQVR4nO3dfbCmZX0f8O8vu6KgUwHZUgKmS0bUUScq3TpYO6kVJ8XECjN1kNTEraXDtGN8SzqJ6B+2f6QTp6lGY7TdAePSImqBBiY1NBRNtNOCWYTxBaIw+LaUl7UK8a0K8usfz73luO5yzp49z3Ods+fzmTlznvvlee7fuebac757X9d939XdAQBgnJ8aXQAAwGYnkAEADCaQAQAMJpABAAwmkAEADCaQAQAMtnV0AUfipJNO6u3bt48uAwBgWTfffPM3unvbwbZt6EC2ffv27NmzZ3QZAADLqqqvHmqbIUsAgMEEMgCAwQQyAIDBBDIAgMEEMgCAwQQyAIDBBDIAgMEEMgCAwQQyAIDBBDIAgMEEMgCAwTb0sywBgHWm6tHX3ePq2GCcIQMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGGxugayqPlBV91fV55esO7Gqrq+qO6bvJ0zrq6reU1V3VtVnq+rMedUFALDezPMM2QeTnHPAurckuaG7z0hyw7ScJC9Lcsb0dVGS98+xLgCAdWVugay7P5nkmwesPjfJ7un17iTnLVl/Wc/cmOT4qjplXrUBAKwni55DdnJ33zO9vjfJydPrU5N8fcl+e6d1AABHvWGT+ru7k/Thvq+qLqqqPVW1Z9++fXOoDABgsRYdyO7bPxQ5fb9/Wn93kqcu2e+0ad1P6O5d3b2ju3ds27ZtrsUCACzCogPZtUl2Tq93JrlmyfrXTFdbnpXkwSVDmwAAR7Wt8/rgqroiyYuTnFRVe5O8PcnvJPloVV2Y5KtJzp92/1iSX0xyZ5LvJXntvOoCAFhv5hbIuvuXD7Hp7IPs20leN69aAADWM3fqBwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGGxIIKuqN1fVF6rq81V1RVU9oapOr6qbqurOqvpIVR0zojYAgEVbeCCrqlOTvCHJju5+TpItSS5I8o4k7+rupyX5VpILF10bAMAIo4YstyY5tqq2JjkuyT1JXpLkymn77iTnjSkNAGCxFh7IuvvuJL+b5GuZBbEHk9yc5IHufnjabW+SUw/2/qq6qKr2VNWeffv2LaJkAIC5GjFkeUKSc5OcnuSnkzwxyTkrfX937+ruHd29Y9u2bXOqEgBgcUYMWb40yZe7e193P5Tk6iQvSnL8NISZJKcluXtAbQAACzcikH0tyVlVdVxVVZKzk9yW5BNJXjntszPJNQNqAwBYuBFzyG7KbPL+Z5J8bqphV5LfSvLrVXVnkqckuXTRtQEAjLB1+V3WXne/PcnbD1h9V5IXDCgHAGAod+oHABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYbMhtLwAAkiRVP77cPaaOwZwhAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGMyd+gFgLS298/wmves8h88ZMgCAwQQyAIDBBDIAgMEEMgCAwQQyAIDBBDIAgMEEMgCAwdyHDAA4+q3z+8M5QwYAMJhABgAwmEAGADCYQAYAMJhABgAwmEAGADCYQAYAMJhABgAwmEAGADCYQAYAMJhABgAwmEAGADDYigNZVR1bVc+YZzEAAJvRigJZVf3DJLcmuW5afl5VXbvag1bV8VV1ZVX9ZVXdXlUvrKoTq+r6qrpj+n7Caj8fAGAjWekZsn+V5AVJHkiS7r41yelHcNx3J7muu5+Z5LlJbk/yliQ3dPcZSW6YlgEAjnorDWQPdfeDB6zr1Rywqp6c5OeTXJok3f3D7n4gyblJdk+77U5y3mo+HwBgo1lpIPtCVf3jJFuq6oyq+v0k/3OVxzw9yb4kf1hVt1TVJVX1xCQnd/c90z73Jjl5lZ8PALChrDSQvT7Js5P8IMmHkjyY5E2rPObWJGcmeX93Pz/Jd3PA8GR3dw5xBq6qLqqqPVW1Z9++fassAQBg/di63A5VtSXJf+3uv5/kbWtwzL1J9nb3TdPylZkFsvuq6pTuvqeqTkly/8He3N27kuxKkh07dqxq2BQAYD1Z9gxZd/8oySPT3K8j1t33Jvn6kltonJ3ktiTXJtk5rduZ5Jq1OB4AwHq37BmyyXeSfK6qrs9siDFJ0t1vWOVxX5/k8qo6JsldSV6bWTj8aFVdmOSrSc5f5WcDAGwoKw1kV09fa2K6bcaOg2w6e62OAQCwUawokHX37uls1tOnVV/s7ofmVxYAwOaxokBWVS/O7N5gX0lSSZ5aVTu7+5NzqwwAYJNY6ZDlv0vyC939xSSpqqcnuSLJ35pXYQAAm8VK70P2uP1hLEm6+0tJHjefkgAANpeVniHbU1WXJPlP0/Krk+yZT0kAAJvLSgPZv0jyuiT7b3PxqSTvm0tFAACbzEoD2dYk7+7udyb//+79j59bVQAAm8hK55DdkOTYJcvHJvnva18OAMDms9JA9oTu/s7+hen1cfMpCQBgc1lpIPtuVZ25f6GqdiT5/nxKAgDYXFY6h+xNSf5zVf3vafmUJK+aS0UAAJvMY54hq6q/XVV/o7v/Iskzk3wkyUNJrkvy5QXUBwBw1FtuyPI/JPnh9PqFSd6a5A+SfCvJrjnWBQCwaSw3ZLmlu785vX5Vkl3dfVWSq6rq1rlWBgCwSSx3hmxLVe0PbWcn+fiSbSudfwYAwGNYLlRdkeTPq+obmV1V+akkqaqnJXlwzrUBAGwKjxnIuvu3q+qGzK6q/NPu7mnTTyV5/byLAwDYDJYdduzuGw+y7kvzKQcAYPNZ6Y1hAQCYE4EMAGAwgQwAYDCBDABgMIEMAGAwgQwAYDCBDABgMIEMAGAwgQwAYDCBDABgMIEMAGAwgQwAYDCBDABgMIEMAGAwgQwAYDCBDABgMIEMAGAwgQwAYDCBDABgMIEMAGAwgQwAYLBhgayqtlTVLVX1x9Py6VV1U1XdWVUfqapjRtUGALBII8+QvTHJ7UuW35HkXd39tCTfSnLhkKoAABZsSCCrqtOS/FKSS6blSvKSJFdOu+xOct6I2gAAFm3UGbLfS/KbSR6Zlp+S5IHufnha3pvk1AF1AQAs3MIDWVW9PMn93X3zKt9/UVXtqao9+/btW+PqAAAWb8QZshcleUVVfSXJhzMbqnx3kuOrauu0z2lJ7j7Ym7t7V3fv6O4d27ZtW0S9AABztfBA1t0Xd/dp3b09yQVJPt7dr07yiSSvnHbbmeSaRdcGADDCeroP2W8l+fWqujOzOWWXDq4HAGAhti6/y/x0958l+bPp9V1JXjCyHgCAEdbTGTIAgE1JIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGEwgAwAYTCADABhMIAMAGGzhgayqnlpVn6iq26rqC1X1xmn9iVV1fVXdMX0/YdG1AQCMMOIM2cNJfqO7n5XkrCSvq6pnJXlLkhu6+4wkN0zLAABHvYUHsu6+p7s/M73+dpLbk5ya5Nwku6fddic5b9G1AQCMMHQOWVVtT/L8JDclObm775k23Zvk5FF1AQAs0rBAVlVPSnJVkjd1918t3dbdnaQP8b6LqmpPVe3Zt2/fAioFAJivIYGsqh6XWRi7vLuvnlbfV1WnTNtPSXL/wd7b3bu6e0d379i2bdtiCgYAmKMRV1lWkkuT3N7d71yy6dokO6fXO5Ncs+jaAABG2DrgmC9K8qtJPldVt07r3prkd5J8tKouTPLVJOcPqA0AYOEWHsi6+38kqUNsPnuRtQAArAfu1A8AMJhABgAwmEAGADCYQAYAMJhABgAwmEAGADCYQAYAMJhABgAwmEAGADCYQAYAMJhABgAwmEAGADCYQAYAMJhABgAwmEAGADDY1tEFAMCyqh593T2uDpgTZ8gAAAYTyAAABhPIAAAGE8gAAAYTyAAABhPIAAAGc9sLANgM3DpkXXOGDABgMIEMAGAwQ5ZsHE63s5no77CpCGQAwMr4j8LcGLIEABhMIAMAGEwgAwAYzBwyxjAP4cctbY9Em6xn+i4wBwIZP04wAICFM2QJADCYQAYAMJhABgAwmEAGADCYSf3zMGpi/GafkL/Zf344mrialU1GIAMAFufA/zyTxJAlAMBw6yqQVdU5VfXFqrqzqt4yuh4AgEVYN4GsqrYk+YMkL0vyrCS/XFXPGlsVAMD8rZtAluQFSe7s7ru6+4dJPpzk3ME1AQDM3XoKZKcm+fqS5b3TOgCAo9qGu8qyqi5KctG0+J2q+uKCDn1Skm+s6p2jrihZi+POv/aTUnX47bqSujbylTxHVvvq+yqHcvA2XVQf28h9+bGtrK8e+PNvpPZYfK2ra9PVmNfv4UW02eEfY61+r/7NQ21YT4Hs7iRPXbJ82rTux3T3riS7FlXUflW1p7t3LPq4Rzvtuva06drTpvOhXdeeNp2PRbTrehqy/IskZ1TV6VV1TJILklw7uCYAgLlbN2fIuvvhqvq1JP8tyZYkH+juLwwuCwBg7tZNIEuS7v5Yko+NruMQFj5Muklo17WnTdeeNp0P7br2tOl8zL1dqz0jDABgqPU0hwwAYFMSyFbAI52OXFU9tao+UVW3VdUXquqN0/oTq+r6qrpj+n7C6Fo3mqraUlW3VNUfT8unV9VNU3/9yHSRDIehqo6vqiur6i+r6vaqeqG+emSq6s3Tv/3PV9UVVfUEffXwVdUHqur+qvr8knUH7Zs1856pfT9bVWeOq3z9OkSb/tvp3/9nq+q/VNXxS7ZdPLXpF6vqH6xVHQLZMjzSac08nOQ3uvtZSc5K8rqpHd+S5IbuPiPJDdMyh+eNSW5fsvyOJO/q7qcl+VaSC4dUtbG9O8l13f3MJM/NrH311VWqqlOTvCHJju5+TmYXbl0QfXU1PpjknAPWHapvvizJGdPXRUnev6AaN5oP5ifb9Pokz+nun0vypSQXJ8n0d+uCJM+e3vO+KSccMYFseR7ptAa6+57u/sz0+tuZ/YE7NbO23D3ttjvJeUMK3KCq6rQkv5Tkkmm5krwkyZXTLtr0MFXVk5P8fJJLk6S7f9jdD0RfPVJbkxxbVVuTHJfknuirh627P5nkmwesPlTfPDfJZT1zY5Ljq+qUhRS6gRysTbv7T7v74WnxxszujZrM2vTD3f2D7v5ykjszywlHTCBbnkc6rbGq2p7k+UluSnJyd98zbbo3ycmj6tqgfi/JbyZ5ZFp+SpIHlvwi0V8P3+lJ9iX5w2ko+JKqemL01VXr7ruT/G6Sr2UWxB5McnP01bVyqL7p79fa+KdJ/mR6Pbc2FchYqKp6UpKrkrypu/9q6baeXfLrst8VqqqXJ7m/u28eXctRZmuSM5O8v7ufn+S7OWB4Ul89PNOcpnMzC7s/neSJ+ckhItaAvrm2quptmU25uXzexxLIlreiRzqxvKp6XGZh7PLuvnpafd/+U+jT9/tH1bcBvSjJK6rqK5kNpb8ks7lPx0/DQon+uhp7k+zt7pum5SszC2j66uq9NMmXu3tfdz+U5OrM+q++ujYO1Tf9/ToCVfVPkrw8yav70XuEza1NBbLleaTTGpjmNl2a5PbufueSTdcm2Tm93pnkmkXXtlF198XdfVp3b8+sX368u1+d5BNJXjntpk0PU3ffm+TrVfWMadXZSW6LvnokvpbkrKo6bvpdsL9N9dW1cai+eW2S10xXW56V5MElQ5s8hqo6J7PpIK/o7u8t2XRtkguq6vFVdXpmF0x8ek2O6cawy6uqX8xsrs7+Rzr99tiKNp6q+rtJPpXkc3l0vtNbM5tH9tEkP5Pkq0nO7+4DJ6yyjKp6cZJ/2d0vr6qfzeyM2YlJbknyK939g4HlbThV9bzMLpQ4JsldSV6b2X9g9dVVqqp/neRVmQ3/3JLkn2U290ZfPQxVdUWSFyc5Kcl9Sd6e5I9ykL45hd/3ZjY8/L0kr+3uPQPKXtcO0aYXJ3l8kv8z7XZjd//zaf+3ZTav7OHMpt/8yYGfuao6BDIAgLEMWQIADCaQAQAMJpABAAwmkAEADCaQAQAMtnX5XQA2vqr6UWa3XXlcZperX5bZg60fecw3AiyAQAZsFt/v7uclSVX99SQfSvLXMrvnEMBQhiyBTae7709yUZJfm+5ivr2qPlVVn5m+/k6SVNVlVXXe/vdV1eVVdW5VPbuqPl1Vt1bVZ6vqjEE/CnCUcGNYYFOoqu9095MOWPdAkmck+XaSR7r7/07h6oru3lFVfy/Jm7v7vKp6cpJbM3tUyrsyu3P35dMj1bZ09/cX+fMARxdDlgCzeWXvnR6Z9KMkT0+S7v7zqnpfVW1L8o+SXNXdD1fV/0rytqo6LcnV3X3HqMKBo4MhS2BTmp75+aMk9yd5c2bPsHtukh2ZPcNyv8uS/Epmz7P8QJJ094eSvCLJ95N8rKpesrjKgaORM2TApjOd8fr3Sd7b3T0NR+7t7keqameSLUt2/2CSTye5t7tvm97/s0nu6u73VNXPJPm5JB9f6A8BHFUEMmCzOLaqbs2jt734j0neOW17X5Krquo1Sa5L8t39b+ru+6rq9iR/tOSzzk/yq1X1UJJ7k/ybuVcPHNVM6gd4DFV1XGb3Lzuzux8cXQ9wdDKHDOAQquqlSW5P8vvCGDBPzpABAAzmDBkAwGACGQDAYAIZAMBgAhkAwGACGQDAYAIZAMBg/w/jhuFmFhiVvwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "col_scores = column_scores(x)\n", + "\n", + "plt.figure(figsize=(10, 5))\n", + "plt.bar(x = x.columns, height = col_scores, color = 'r')\n", + "plt.ylabel(\"Score\")\n", + "plt.xlabel(\"Days\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "84add652-4199-4483-884a-3ad3fa767df3", + "metadata": {}, + "source": [ + "Interestingly, the one flake we think we see at around day 13 has a score that is not as large as serveral of the other days. It is important to keep in mind that we are only seeing the first 50 tests that had a failure for visibility purposes, and there are hundreds more that extend theoretically very far below this graph." + ] + }, + { + "cell_type": "markdown", + "id": "ea26bbe7-c547-4d1a-93c2-a4f6a24ded5d", + "metadata": {}, + "source": [ + "Now that we have these scores, we need to decide what a high score would be. We do this by performing a monte carlo simulation to get an approximate idea of the distribution. With every test there is a porbability associated with it, and a score. To generate a random column we simply add $-\\log p$ to the score with a probability of $p$. We do this for every test and return the resulting total score." + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "15aa17f3-79f5-42fc-8481-121df72cbada", + "metadata": {}, + "outputs": [], + "source": [ + "def random_trial(row_scores):\n", + " row_scores = row_scores[row_scores != 0]\n", + " scores_scores = -np.log(row_scores)\n", + " tot = 0\n", + " for a, b in zip(row_scores, scores_scores):\n", + " tot += b * np.random.binomial(1, p = a)\n", + " return tot" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "d88192e6-2de7-4995-aa94-5f9b3faa1f2f", + "metadata": {}, + "outputs": [], + "source": [ + "# this is a large number of simulations and can be adjusted later\n", + "n_sims = 10000\n", + "\n", + "row_scores = x.apply(naive_prob_failure, axis = 1).values\n", + "out = [random_trial(row_scores) for i in range(n_sims)]" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "109a3992-19db-437f-9fe4-67721993a655", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD6CAYAAABQ6WtbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUl0lEQVR4nO3df4xd9Znf8fcn5keiTVrMMnVc49Yk6zYljdZBE0NI1LBEIQbRNamyFFQlVsTWqQpVIhK0kP5BfpQqK21gm90slbO4mCoL6yZQvBFd1guUNEoDjFMvYAhiSkDYtceTQEjSqKxMnv4xXy8Xe2bOOMy9c4d5v6Sre87zPefcZ66u/PH5cc9NVSFJ0mxet9ANSJKGn2EhSepkWEiSOhkWkqROhoUkqZNhIUnq1LewSPL6JA8m+aske5J8rtVvTvKDJLvbY12rJ8mXk4wneTjJGT3b2pTkyfbY1K+eJUnTO66P234ROLeqfpbkeODbSf5bG7uqqr5+xPLnA2vb40zgRuDMJCcD1wKjQAG7kuyoqudneuFTTjml1qxZM79/jSS9xu3ateuHVTUy3VjfwqKmvu33szZ7fHvM9g3AjcAtbb3vJjkpyUrgHGBnVT0HkGQnsAG4daYNrVmzhrGxsVf/R0jSEpLkmZnG+nrOIsmyJLuBg0z9g/9AG7quHWq6IcmJrbYKeLZn9b2tNlP9yNfanGQsydjk5OR8/ymStKT1NSyq6qWqWgecCqxP8o+Ba4C3Ae8CTgZ+Z55ea0tVjVbV6MjItHtRkqRf0kCuhqqqHwP3ARuqan9NeRH4T8D6ttg+YHXPaqe22kx1SdKA9PNqqJEkJ7XpNwAfAL7fzkOQJMBFwKNtlR3AR9tVUWcBL1TVfuBu4Lwky5MsB85rNUnSgPTzaqiVwLYky5gKpe1V9c0k9yYZAQLsBv5VW/4u4AJgHPg58DGAqnouyReAh9pynz98sluSNBh5Ld6ifHR0tLwaSpKOTZJdVTU63Zjf4JYkdTIsJEmdDAtJUqd+nuBetM5+37kcmJj5i31vXjHCd+6/d4AdSdLCMiymcWBikvVXbplx/MHrNw+wG0laeB6GkiR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdepbWCR5fZIHk/xVkj1JPtfqpyV5IMl4kj9NckKrn9jmx9v4mp5tXdPqTyT5YL96liRNr597Fi8C51bVrwPrgA1JzgJ+F7ihqn4NeB64rC1/GfB8q9/QliPJ6cAlwNuBDcAfJVnWx74lSUfoW1jUlJ+12ePbo4Bzga+3+jbgoja9sc3Txt+fJK1+W1W9WFU/AMaB9f3qW5J0tL6es0iyLMlu4CCwE/jfwI+r6lBbZC+wqk2vAp4FaOMvAL/aW59mnd7X2pxkLMnY5ORkH/4aSVq6+hoWVfVSVa0DTmVqb+BtfXytLVU1WlWjIyMj/XoZSVqSBnI1VFX9GLgPeDdwUpLj2tCpwL42vQ9YDdDG/zbwo976NOtIkgagn1dDjSQ5qU2/AfgA8DhTofHhttgm4M42vaPN08bvrapq9Uva1VKnAWuBB/vVtyTpaMd1L/JLWwlsa1cuvQ7YXlXfTPIYcFuSfwf8L+CmtvxNwH9OMg48x9QVUFTVniTbgceAQ8DlVfVSH/uWJB2hb2FRVQ8D75ym/hTTXM1UVf8P+K0ZtnUdcN189yhJmhu/wS1J6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjr1LSySrE5yX5LHkuxJ8olW/2ySfUl2t8cFPetck2Q8yRNJPthT39Bq40mu7lfPkqTpHdfHbR8CPlVV30vyJmBXkp1t7Iaq+r3ehZOcDlwCvB34u8BfJvkHbfgrwAeAvcBDSXZU1WN97F2S1KNvYVFV+4H9bfqnSR4HVs2yykbgtqp6EfhBknFgfRsbr6qnAJLc1pY1LCRpQAZyziLJGuCdwAOtdEWSh5NsTbK81VYBz/astrfVZqof+Rqbk4wlGZucnJzvP0GSlrS+h0WSNwLfAD5ZVT8BbgTeCqxjas/jS/PxOlW1papGq2p0ZGRkPjYpSWr6ec6CJMczFRRfq6rbAapqomf8q8A32+w+YHXP6qe2GrPUJUkD0M+roQLcBDxeVdf31Ff2LPYh4NE2vQO4JMmJSU4D1gIPAg8Ba5OcluQEpk6C7+hX35Kko/Vzz+I9wEeAR5LsbrXPAJcmWQcU8DTwcYCq2pNkO1Mnrg8Bl1fVSwBJrgDuBpYBW6tqTx/7liQdoZ9XQ30byDRDd82yznXAddPU75ptPUlSf/kNbklSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1KlvYZFkdZL7kjyWZE+ST7T6yUl2JnmyPS9v9ST5cpLxJA8nOaNnW5va8k8m2dSvniVJ0+vnnsUh4FNVdTpwFnB5ktOBq4F7qmotcE+bBzgfWNsem4EbYSpcgGuBM4H1wLWHA0aSNBh9C4uq2l9V32vTPwUeB1YBG4FtbbFtwEVteiNwS035LnBSkpXAB4GdVfVcVT0P7AQ29KtvSdLRBnLOIska4J3AA8CKqtrfhg4AK9r0KuDZntX2ttpM9SNfY3OSsSRjk5OT8/sHSNIS1/ewSPJG4BvAJ6vqJ71jVVVAzcfrVNWWqhqtqtGRkZH52KQkqZlTWCR5z1xq0yxzPFNB8bWqur2VJ9rhJdrzwVbfB6zuWf3UVpupLkkakOPmuNwfAGfMofY3kgS4CXi8qq7vGdoBbAK+2J7v7KlfkeQ2pk5mv1BV+5PcDfz7npPa5wHXzLHvJens953LgYmZD8W9ecUI37n/3gF2JGmxmzUskrwbOBsYSXJlz9DfApZ1bPs9wEeAR5LsbrXPMBUS25NcBjwDXNzG7gIuAMaBnwMfA6iq55J8AXioLff5qnqu+09bug5MTLL+yi0zjj94/eYBdiPptaBrz+IE4I1tuTf11H8CfHi2Favq20BmGH7/NMsXcPkM29oKbO3oVZLUJ7OGRVXdD9yf5OaqemZAPUmShsxcz1mcmGQLsKZ3nao6tx9NSZKGy1zD4r8A/xH4Y+Cl/rUjSRpGcw2LQ1V1Y187kSQNrbl+Ke/PkvzrJCvbjQBPbvdskiQtAXPdszh8p9eremoFvGV+25EkDaM5hUVVndbvRiRJw2tOYZHko9PVq+qW+W1HkjSM5noY6l09069n6kt13wMMC0laAuZ6GOrf9M4nOQm4rR8NSZKGzy97i/L/C3geQ5KWiLmes/gzXv7diWXAPwK296spSdJwmes5i9/rmT4EPFNVe/vQjyRpCM3pMFS7oeD3mbrz7HLgr/vZlCRpuMz1l/IuBh4Efoup3594IMmstyiXJL12zPUw1L8F3lVVBwGSjAB/CXy9X41JkobHXK+Get3hoGh+dAzrSpIWubnuWfx5+y3sW9v8P2fqZ1AlSUtA129w/xqwoqquSvLPgPe2of8JfK3fzUmShkPXnsXvA9cAVNXtwO0ASd7Rxv5pH3uTJA2JrvMOK6rqkSOLrbamLx1JkoZOV1icNMvYG2ZbMcnWJAeTPNpT+2ySfUl2t8cFPWPXJBlP8kSSD/bUN7TaeJKrO/qVJPVBV1iMJfmXRxaT/Dawq2Pdm4EN09RvqKp17XFX297pwCXA29s6f5RkWZJlwFeA84HTgUvbspKkAeo6Z/FJ4I4k/4KXw2EUOAH40GwrVtW3kqyZYx8bgduq6kXgB0nGgfVtbLyqngJIcltb9rE5bleSNA9m3bOoqomqOhv4HPB0e3yuqt5dVQd+yde8IsnD7TDV8lZbBTzbs8zeVpupfpQkm5OMJRmbnJz8JVuTJE1nrveGuq+q/qA97n0Vr3cj8FZgHbAf+NKr2NYrVNWWqhqtqtGRkZH52qwkibl/KW9eVNXE4ekkXwW+2Wb3Aat7Fj211ZilLkkakIHesiPJyp7ZDwGHr5TaAVyS5MQkpwFrmbpx4UPA2iSnJTmBqZPgOwbZsySpj3sWSW4FzgFOSbIXuBY4J8k6pn5I6Wng4wBVtSfJdqZOXB8CLq+ql9p2rgDuZupHl7ZW1Z5+9SxJml7fwqKqLp2mfNMsy18HXDdN/S68D5UkLSjvHCtJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjr1LSySbE1yMMmjPbWTk+xM8mR7Xt7qSfLlJONJHk5yRs86m9ryTybZ1K9+JUkzO66P274Z+EPglp7a1cA9VfXFJFe3+d8BzgfWtseZwI3AmUlOBq4FRoECdiXZUVXP97HvoXf2+87lwMTkjOMHJiYG2I2kpaBvYVFV30qy5ojyRuCcNr0N+O9MhcVG4JaqKuC7SU5KsrItu7OqngNIshPYANzar74XgwMTk6y/csuM43d8+sIBdiNpKRj0OYsVVbW/TR8AVrTpVcCzPcvtbbWZ6kdJsjnJWJKxycmZ/9ctSTp2C3aCu+1F1Dxub0tVjVbV6MjIyHxtVpLE4MNioh1eoj0fbPV9wOqe5U5ttZnqkqQBGnRY7AAOX9G0Cbizp/7RdlXUWcAL7XDV3cB5SZa3K6fOazVJ0gD17QR3kluZOkF9SpK9TF3V9EVge5LLgGeAi9vidwEXAOPAz4GPAVTVc0m+ADzUlvv84ZPdkqTB6efVUJfOMPT+aZYt4PIZtrMV2DqPrUmSjpHf4JYkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUqW8/fiTN5Oz3ncuBickZx9+8YoTv3H/vADuS1MWw0MAdmJhk/ZVbZhy/86oLecvb3jHjuGEiDZ5hoaHzi2LWMHnw+s0D7EYSeM5CkjQHhoUkqZNhIUnqtCBhkeTpJI8k2Z1krNVOTrIzyZPteXmrJ8mXk4wneTjJGQvRsyQtZQu5Z/EbVbWuqkbb/NXAPVW1FrinzQOcD6xtj83AjQPvVJKWuGE6DLUR2NamtwEX9dRvqSnfBU5KsnIB+pOkJWuhwqKAv0iyK8nh6yBXVNX+Nn0AWNGmVwHP9qy7t9VeIcnmJGNJxiYnZ/7ClyTp2C3U9yzeW1X7kvwdYGeS7/cOVlUlqWPZYFVtAbYAjI6OHtO6kqTZLcieRVXta88HgTuA9cDE4cNL7flgW3wfsLpn9VNbTZI0IAMPiyS/kuRNh6eB84BHgR3AprbYJuDONr0D+Gi7Kuos4IWew1WSpAFYiMNQK4A7khx+/T+pqj9P8hCwPcllwDPAxW35u4ALgHHg58DHBt+yJC1tAw+LqnoK+PVp6j8C3j9NvYDLB9CaJGkGw3TprCRpSBkWkqROhoUkqZNhIUnq5I8fDaGunx09MDExwG6O3WLvX9LRDIsh1PWzo3d8+sIBdnPsFnv/ko5mWOgoXXsG/ga2tPQYFjpK156Bv4EtLT2e4JYkdTIsJEmdDAtJUifDQpLUyRPces3xai5p/hkWes3p99VchpGWIsNCOkZeWqylyHMWkqRO7llo0dm////wlre9Y8bxV3vvKe9tJR3NsNCi84uir/eeerX3tuoKM89paDEyLJagfv/PfKnrCjPPaWgxMiyWoH7/z1zSa49hIQ0ZL83VMDIsdMwW+2Gshe5/Lq//m79754zjHsbSQlg0YZFkA/AfgGXAH1fVFxe4pSVrsR/GWuj++/36XXsmP/rhJL96ysiM4+65aDqLIiySLAO+AnwA2As8lGRHVT22sJ1Jg/dq90zu+PSFs4bVnVdd6NVcOsqiCAtgPTBeVU8BJLkN2AgYFlpy+r1n0rX9rjDp2nNZ7ONLNSxTVQvdQ6ckHwY2VNVvt/mPAGdW1RU9y2wGDh/M/YfAE6/iJU8Bfvgq1n+t8/2Zne9PN9+j2S3U+/P3q2rapFwsexadqmoLMPN/h45BkrGqGp2Pbb0W+f7Mzvenm+/R7Ibx/Vks94baB6zumT+11SRJA7BYwuIhYG2S05KcAFwC7FjgniRpyVgUh6Gq6lCSK4C7mbp0dmtV7enjS87L4azXMN+f2fn+dPM9mt3QvT+L4gS3JGlhLZbDUJKkBWRYSJI6GRY9kmxI8kSS8SRXL3Q/wyjJ00keSbI7ydhC97PQkmxNcjDJoz21k5PsTPJke16+kD0upBnen88m2dc+Q7uTXLCQPS6kJKuT3JfksSR7knyi1YfuM2RYND23FDkfOB24NMnpC9vV0PqNqlo3bNeBL5CbgQ1H1K4G7qmqtcA9bX6pupmj3x+AG9pnaF1V3TXgnobJIeBTVXU6cBZweft3Z+g+Q4bFy/7mliJV9dfA4VuKSDOqqm8Bzx1R3ghsa9PbgIsG2dMwmeH9UVNV+6vqe236p8DjwCqG8DNkWLxsFfBsz/zeVtMrFfAXSXa1W6zoaCuqan+bPgCsWMhmhtQVSR5uh6kW/BDLMEiyBngn8ABD+BkyLHSs3ltVZzB1uO7yJP9koRsaZjV1bbrXp7/SjcBbgXXAfuBLC9rNEEjyRuAbwCer6ie9Y8PyGTIsXuYtReagqva154PAHUwdvtMrTSRZCdCeDy5wP0Olqiaq6qWq+gXwVZb4ZyjJ8UwFxdeq6vZWHrrPkGHxMm8p0iHJryR50+Fp4Dzg0dnXWpJ2AJva9CZg5h+XWIIO/yPYfIgl/BlKEuAm4PGqur5naOg+Q36Du0e7hO/3efmWItctbEfDJclbmNqbgKlbxfzJUn+PktwKnMPULaUngGuB/wpsB/4e8AxwcVUtyZO8M7w/5zB1CKqAp4GP9xyfX1KSvBf4H8AjwC9a+TNMnbcYqs+QYSFJ6uRhKElSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHX6/2ZKc6Sk0KVDAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.histplot(out)" + ] + }, + { + "cell_type": "markdown", + "id": "52fda8ba-d6cd-4447-9b3b-3b74fd8d0a08", + "metadata": {}, + "source": [ + "We can see our distribution is fairly smooth, which happens when we have enough tests that have any probability of failing. We will also output the true distribution of scores to compare." + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "de14103d-3f09-47a4-9eb5-d096064432cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD4CAYAAADhNOGaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQm0lEQVR4nO3dfbBdVX3G8e9DwosvCAipNSSYWIM2Y0vBFOnoVIrYCVRJfRmFams7aKZWKla0g6OlFp2paNWqpU4pqGgriFRtxkbRAhk7jiKJLyjBaECQIJaoiFbHQtpf/zg79Xi5yT1J7j6Xc9f3M3Pm7pd1dn57VuY8d++1z7qpKiRJ7dpvrguQJM0tg0CSGmcQSFLjDAJJapxBIEmNWzjXBeypI444opYtWzbXZUjSRNm0adN3q2rRdPsmLgiWLVvGxo0b57oMSZooSW7b1T5vDUlS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuOaCoJlixeTZK9fyxYvnutTkKRZN3FTTOyL2+68kzrxxL1+fzZsmLVaJOmBoqkrAknS/RkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY3rNQiSrE6yJcnWJOdOs/+oJNcm+WKSG5Kc2mc9kqT76y0IkiwALgROAVYCZyRZOaXZa4ErqupY4HTg7/uqR5I0vT6vCI4HtlbVLVV1L3A5sGZKmwIe1i0fAny7x3okSdNY2OOxjwRuH1rfBjxxSpvXAZ9M8qfAQ4CTe6xHkjSNuR4sPgN4b1UtAU4F3p/kfjUlWZtkY5KN27dvH3uRkjSf9RkEdwBLh9aXdNuGnQlcAVBVnwUOAo6YeqCquqiqVlXVqkWLFvVUriS1qc8guB5YkWR5kgMYDAavm9LmW8BTAZL8MoMg8Fd+SRqj3oKgqnYAZwFXATcxeDroxiTnJzmta3YO8OIkXwYuA/6wqqqvmiRJ99fnYDFVtR5YP2XbeUPLm4En9VmDJGn35nqwWJI0xwwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcb0GQZLVSbYk2Zrk3F20eW6SzUluTPKBPuuRJN3fwr4OnGQBcCHwNGAbcH2SdVW1eajNCuDVwJOq6u4kv9BXPZKk6fV5RXA8sLWqbqmqe4HLgTVT2rwYuLCq7gaoqrt6rEeSNI0+g+BI4Pah9W3dtmFHA0cn+UySzyVZPd2BkqxNsjHJxu3bt/dUriS1aa4HixcCK4ATgTOAf0xy6NRGVXVRVa2qqlWLFi0ab4WSNM+NFARJnpFkT0PjDmDp0PqSbtuwbcC6qrqvqr4JfJ1BMEiSxmTUD/fnAd9I8qYkjxvxPdcDK5IsT3IAcDqwbkqbjzK4GiDJEQxuFd0y4vElSbNgpCCoqhcAxwI3A+9N8tnuvv3Bu3nPDuAs4CrgJuCKqroxyflJTuuaXQV8L8lm4FrgVVX1vX04H0nSHhr58dGq+mGSK4EHAS8Hngm8Ksk7quqdu3jPemD9lG3nDS0X8IruJUmaA6OOEaxJ8hFgA7A/cHxVnQIcA5zTX3mSpL6NekXwLOBtVfXp4Y1V9ZMkZ85+WZKkcRl1sPg7U0MgyQUAVXX1rFclSRqbUYPgadNsO2U2C5EkzY3d3hpK8hLgT4BfSnLD0K6Dgc/0WZgkaTxmGiP4APBx4K+B4dlDf1RV3++tKknS2MwUBFVVtyZ56dQdSR5uGEjS5BvliuDpwCaggAztK+DRPdUlSRqT3QZBVT29+7l8POVIksZtpsHi43a3v6q+MLvlSJLGbaZbQ2/Zzb4CTprFWiRJc2CmW0O/Na5CJElzY6ZbQydV1TVJnjXd/qr6cD9lSZLGZaZbQ08BrgGeMc2+AgwCSZpwM90a+svu5x+NpxxJ0riNOg314UnekeQLSTYleXuSw/suTpLUv1Ennbsc2A48G3hOt/zBvoqSJI3PqH+P4JFV9fqh9TckeV4fBUmSxmvUK4JPJjk9yX7d67kM/t6wJGnCzfT46I/42RxDLwf+qdu1H/BfwCv7LE6S1L+Znho6eFyFSJLmxqhjBCQ5DFgBHLRz29Q/XylJmjwjBUGSFwFnA0uALwEnAJ/FuYYkaeKNOlh8NvDrwG3d/EPHAj/oqyhJ0viMGgQ/raqfAiQ5sKq+Bjy2v7IkSeMy6hjBtiSHAh8FPpXkbuC2voqSJI3PSEFQVc/sFl+X5FrgEOATvVUlSRqbPXlq6DjgyQy+V/CZqrq3t6okSWMz6qRz5wGXAocDRwDvSfLaPguTJI3HqFcEzweOGRowfiODx0jf0FNdkqQxGfWpoW8z9EUy4EDgjtkvR5I0bjPNNfROBmMC9wA3JvlUt/404PP9lydJ6ttMt4Y2dj83AR8Z2r6hl2okSWM306Rzl+5cTnIAcHS3uqWq7uuzMEnSeIw619CJDJ4aupXBlNRLk7zQSeckafKN+tTQW4DfrqotAEmOBi4DntBXYZKk8Rj1qaH9d4YAQFV9Hdh/pjclWZ1kS5KtSc7dTbtnJ6kkq0asR5I0S0a9ItiU5GJ+9hfKns/PBpKnlWQBcCGDJ4y2AdcnWVdVm6e0O5jB7KbX7UnhkqTZMeoVwR8Dm4GXda/NwEtmeM/xwNaquqWbjuJyYM007V4PXAD8dMRaJEmzaMYrgu43+y9X1eOAt+7BsY8Ebh9a3wY8ccqxjwOWVtW/JXnVbmpYC6wFOOqoo/agBEnSTGa8Iqiq/wG2JJnVT+Ak+zEIlnNGqOGiqlpVVasWLVo0m2VIUvNGHSM4jME3iz8P/Hjnxqo6bTfvuQNYOrS+hJ+fluJg4PHAhiQAvwisS3JaVe12/EGSNHtGDYK/2ItjXw+sSLKcQQCcDvzezp1VdQ+DmUwBSLIBeKUhIEnjNdNcQwcxGCh+DPAV4JKq2jHKgatqR5KzgKuABcC7q+rGJOcDG6tq3b6VLkmaDTNdEVwK3Af8B3AKsJLBo54jqar1wPop287bRdsTRz2uJGn2zBQEK6vqVwCSXIIzjkrSvDPTU0P/P7HcqLeEJEmTZaYrgmOS/LBbDvCgbj1AVdXDeq1OktS7maahXjCuQiRJc2PUKSYkSfOUQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuN6DYIkq5NsSbI1ybnT7H9Fks1JbkhydZJH9VmPJOn+eguCJAuAC4FTgJXAGUlWTmn2RWBVVf0qcCXwpr7qkSRNr88rguOBrVV1S1XdC1wOrBluUFXXVtVPutXPAUt6rEeSNI0+g+BI4Pah9W3dtl05E/j4dDuSrE2yMcnG7du3z2KJkqQHxGBxkhcAq4A3T7e/qi6qqlVVtWrRokXjLU6S5rmFPR77DmDp0PqSbtvPSXIy8BrgKVX13z3WI0maRp9XBNcDK5IsT3IAcDqwbrhBkmOBfwBOq6q7eqxFkrQLvQVBVe0AzgKuAm4CrqiqG5Ocn+S0rtmbgYcCH0rypSTrdnE4SVJP+rw1RFWtB9ZP2Xbe0PLJff77kqSZPSAGiyVJc8cgkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCPbA/kCSvXotW7x4rsuXpGktnOsCJsl9QJ144l69Nxs2zGYpkjRrvCKQpMYZBJLUOINAatyyxYsd+2pcr2MESVYDbwcWABdX1Run7D8QeB/wBOB7wPOq6tY+a5L08267807HvhrX2xVBkgXAhcApwErgjCQrpzQ7E7i7qh4DvA24oK965ppPHKlP+/JbvcZnX/qpz8+CPq8Ijge2VtUtAEkuB9YAm4farAFe1y1fCfxdklRV9VjXnPCJI/XJ3+onw770E/TXV+nrMzfJc4DVVfWibv33gSdW1VlDbb7atdnWrd/ctfnulGOtBdZ2q48FtuxlWUcA352x1eTy/Cab5zfZHujn96iqWjTdjon4HkFVXQRctK/HSbKxqlbNQkkPSJ7fZPP8Jtskn1+fTw3dASwdWl/SbZu2TZKFwCEMBo0lSWPSZxBcD6xIsjzJAcDpwLopbdYBL+yWnwNcMx/HByTpgay3W0NVtSPJWcBVDB4ffXdV3ZjkfGBjVa0DLgHen2Qr8H0GYdGnfb699ADn+U02z2+yTez59TZYLEmaDH6zWJIaZxBIUuOaCYIkq5NsSbI1yblzXc++SrI0ybVJNie5McnZ3faHJ/lUkm90Pw+b61r3VpIFSb6Y5GPd+vIk13V9+MHuIYSJleTQJFcm+VqSm5L8xnzpvyR/1v2//GqSy5IcNOn9l+TdSe7qvv+0c9u0/ZWBd3TnekOS4+au8pk1EQQjTncxaXYA51TVSuAE4KXdOZ0LXF1VK4Cru/VJdTZw09D6BcDbuilJ7mYwRckkezvwiap6HHAMg3Od+P5LciTwMmBVVT2ewcMipzP5/fdeYPWUbbvqr1OAFd1rLfCuMdW4V5oIAoamu6iqe4Gd011MrKq6s6q+0C3/iMGHyJEMzuvSrtmlwO/OSYH7KMkS4HeAi7v1ACcxmIoEJvjcAJIcAvwmgyfnqKp7q+oHzJP+Y/BE4oO67wc9GLiTCe+/qvo0g6cbh+2qv9YA76uBzwGHJnnkWArdC60EwZHA7UPr27pt80KSZcCxwHXAI6rqzm7Xd4BHzFVd++hvgT8H/rdbPxz4QVXt6NYnvQ+XA9uB93S3vy5O8hDmQf9V1R3A3wDfYhAA9wCbmF/9t9Ou+muiPnNaCYJ5K8lDgX8BXl5VPxze1305b+KeD07ydOCuqto017X0aCFwHPCuqjoW+DFTbgNNcP8dxuA34uXAYuAh3P+Wyrwzqf0F7QTBKNNdTJwk+zMIgX+uqg93m/9z5yVo9/OuuapvHzwJOC3JrQxu453E4H76od2tBpj8PtwGbKuq67r1KxkEw3zov5OBb1bV9qq6D/gwgz6dT/230676a6I+c1oJglGmu5go3T3zS4CbquqtQ7uGp+14IfCv465tX1XVq6tqSVUtY9BX11TV84FrGUxFAhN6bjtV1XeA25M8ttv0VAZTtE98/zG4JXRCkgd3/093ntu86b8hu+qvdcAfdE8PnQDcM3QL6YGnqpp4AacCXwduBl4z1/XMwvk8mcFl6A3Al7rXqQzupV8NfAP4d+Dhc13rPp7nicDHuuVHA58HtgIfAg6c6/r28dx+DdjY9eFHgcPmS/8BfwV8Dfgq8H7gwEnvP+AyBmMe9zG4ojtzV/0FhMGTijcDX2HwBNWcn8OuXk4xIUmNa+XWkCRpFwwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1Lj/A+XAR85Z5RQFAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.histplot(column_scores(x), stat = 'probability', color = 'r', bins = 20)" + ] + }, + { + "cell_type": "markdown", + "id": "5ce9840c-c457-4510-a932-0f2843157ca2", + "metadata": {}, + "source": [ + "We see these look nothing alike. Randomly running simulations does not accurately imitate what the testgrid data looks like, but rather exposes where the unexpectedness is too high to have been a result of a couple random failures." + ] + }, + { + "cell_type": "markdown", + "id": "e9cf953f-e2fb-4954-8e0b-7d7985b326d0", + "metadata": {}, + "source": [ + "We classify a couple levels of significance: 10%, 5%, and 1%. We see if the true scores are in the top x% of the simulated distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "ceaab32f-20c7-4072-97af-a5b48c6c27fe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.05 significance: 8.050703381470303\n", + "0.01 significance: 9.933434628904084\n", + "0.001 significance: 13.616500113152597\n" + ] + } + ], + "source": [ + "out = sorted(out)\n", + "\n", + "n = len(out)\n", + "\n", + "idx1 = int(0.90*n)\n", + "idx2 = int(0.95*n)\n", + "idx3 = int(0.99*n)\n", + "\n", + "print(f'0.05 significance: {out[idx1]}')\n", + "print(f'0.01 significance: {out[idx2]}')\n", + "print(f'0.001 significance: {out[idx3]}')" + ] + }, + { + "cell_type": "markdown", + "id": "2d5bd073-891f-40e2-b0e6-79010dd94e6c", + "metadata": {}, + "source": [ + "We visualize these cutoffs below." + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "id": "6540ff59-806a-4c40-8bbf-3e507e401be1", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "col_scores = column_scores(x)\n", + "\n", + "plt.figure(figsize=(10, 5))\n", + "plt.bar(x = x.columns, height = col_scores, color = 'b')\n", + "plt.ylabel(\"Score\")\n", + "plt.xlabel(\"Days\")\n", + "x1 = 0\n", + "x2 = len(col_scores)\n", + "\n", + "plt.plot([x1, x2], [out[idx1],out[idx1]], color='g', linestyle='-', linewidth=2)\n", + "plt.plot([x1, x2], [out[idx2],out[idx2]], color='y', linestyle='-', linewidth=2)\n", + "plt.plot([x1, x2], [out[idx3],out[idx3]], color='r', linestyle='-', linewidth=2)\n", + "\n", + "green_patch = mpatches.Patch(color='green', label='5% significnace')\n", + "yellow_patch = mpatches.Patch(color='yellow', label='1% significance')\n", + "red_patch = mpatches.Patch(color='red', label='0.1% significance')\n", + "\n", + "plt.legend(handles=[green_patch, yellow_patch, red_patch])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "d139e386-cd02-4e34-bb83-eb2453bca3c6", + "metadata": {}, + "source": [ + "In this case the scores were so anomolous that the significance levels didn't end up making a big difference." + ] + }, + { + "cell_type": "markdown", + "id": "d1317863-f152-4564-8ce5-75a3bfdc8305", + "metadata": {}, + "source": [ + "We define the final score as being the percentile of where our actual data lies on the simulated distribution. Thus, the highest, most anomolous score is 1 (being greater than any simulated score), and the lowest is 0. Below we define the main function that will assign a probability to every column of testgrid data." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "2c9bae40-64d0-48c5-b77b-22c519a2a247", + "metadata": {}, + "outputs": [], + "source": [ + "def score_grid(grid):\n", + " grid = preprocess(grid)\n", + " # get col_scores\n", + " col_scores = column_scores(grid)\n", + " n_sims = 1000\n", + " row_scores = grid.apply(naive_prob_failure, axis = 1).values\n", + " out = sorted([random_trial(row_scores) for i in range(n_sims)])\n", + " ret = []\n", + " for col_score in col_scores:\n", + " ret_val = bisect.bisect_left(out, col_score)\n", + " ret.append(ret_val/1000)\n", + " return ret" + ] + }, + { + "cell_type": "markdown", + "id": "d2041229-11a9-46da-a56f-44d4018fe513", + "metadata": {}, + "source": [ + "## Analysis of Scores" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bb9bb28-fe02-40a1-9c2a-0a5a62f9fe7e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "all_grids = []\n", + "data = []\n", + "for i in all_dashboards:\n", + " for j in all_jobs[i]:\n", + " all_grids.append((i, j))\n", + "for n, (i,j) in enumerate(all_grids):\n", + " if n % 100 == 0:\n", + " print(n)\n", + " x = testgrid_data[i][j]['grid']\n", + " x = np.array(list(pd.DataFrame(x).statuses.apply(decode_run_length)))\n", + " x = pd.DataFrame(x).apply(lambda x: [normalize(y) for y in x])\n", + " for idx, val in enumerate(score_grid(x)):\n", + " data.append([i, j, idx, val])" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "3a17742b-b8f4-4a79-9d2c-47201ce00e19", + "metadata": {}, + "outputs": [], + "source": [ + "scores_df = pd.DataFrame(data, columns = ['dashboard', 'job', 'column', 'score'])" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "92e5ace0-7842-4e31-8551-aa48a39c2601", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dashboardjobcolumnscore
38411\"redhat-openshift-ocp-release-4.7-informing\"release-openshift-ocp-installer-e2e-gcp-serial...611.000
41419\"redhat-openshift-ocp-release-4.7-informing\"release-openshift-origin-installer-old-rhcos-e...10.078
47293\"redhat-osde2e-stage-moa\"osde2e-stage-moa-e2e-default930.049
27374\"redhat-openshift-ocp-release-4.5-informing\"release-openshift-origin-installer-old-rhcos-e...1160.000
5784\"redhat-openshift-ocp-release-4.3-blocking\"release-openshift-ocp-installer-e2e-aws-serial...20.000
\n", + "
" + ], + "text/plain": [ + " dashboard \\\n", + "38411 \"redhat-openshift-ocp-release-4.7-informing\" \n", + "41419 \"redhat-openshift-ocp-release-4.7-informing\" \n", + "47293 \"redhat-osde2e-stage-moa\" \n", + "27374 \"redhat-openshift-ocp-release-4.5-informing\" \n", + "5784 \"redhat-openshift-ocp-release-4.3-blocking\" \n", + "\n", + " job column score \n", + "38411 release-openshift-ocp-installer-e2e-gcp-serial... 61 1.000 \n", + "41419 release-openshift-origin-installer-old-rhcos-e... 1 0.078 \n", + "47293 osde2e-stage-moa-e2e-default 93 0.049 \n", + "27374 release-openshift-origin-installer-old-rhcos-e... 116 0.000 \n", + "5784 release-openshift-ocp-installer-e2e-aws-serial... 2 0.000 " + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scores_df.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "5c1a7f41-49e7-493f-a130-2aebd1a4eb87", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.histplot(scores_df['score'])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "e9909cce-de25-4720-819b-5f80d27569bb", + "metadata": {}, + "source": [ + "We see that the tests can mostly be put into two categories: near 0, and near 1." + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "cced4db3-15a2-4f37-87f0-4688fe5108f6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Less than 10%: 0.686293476889431\n", + "More than 90%: 0.16101694915254236\n" + ] + } + ], + "source": [ + "n_cols = len(scores_df)\n", + "\n", + "print(f\"Less than 10%: {sum(scores_df['score'] < .1) / n_cols}\")\n", + "print(f\"More than 90%: {sum(scores_df['score'] > .9) / n_cols}\")" + ] + }, + { + "cell_type": "markdown", + "id": "3c7cc31a-e130-4df6-9633-787e1d464452", + "metadata": {}, + "source": [ + "Those two categories make up ~85% of the columns." + ] + }, + { + "cell_type": "markdown", + "id": "daf1ea6e-39ab-4688-a6c6-eaa116e499ee", + "metadata": {}, + "source": [ + "Are runs related to their previous run? We had an intuition that infra flakes show up as waterfall patterns, and the infrastructure continues to be flaky for more than one run. We would like to see if there is correlation between subsequent runs that shows up." + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "b99f3ecc-e63f-4309-be9f-745e8f43481d", + "metadata": {}, + "outputs": [], + "source": [ + "all_data = []\n", + "for i, j in all_grids:\n", + " scores = scores_df[(scores_df['dashboard'] == i) & (scores_df['job'] == j)].score.values\n", + " all_data += list(zip(scores[:-1], scores[1:]))" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "8c9065fa-57ea-415f-98a6-4e9314d53194", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x = [d[0] for d in all_data], y = [d[1] for d in all_data], alpha = 0.2)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "36d72de6-01fc-4c15-a984-00b5a3de4a98", + "metadata": {}, + "source": [ + "While this is certainly an interesting looking plot. There isn't too much to take away from this in my opinion except that things seem pretty random." + ] + }, + { + "cell_type": "markdown", + "id": "a66c8c11-f3ed-476f-b4b4-87db1b11c9bd", + "metadata": {}, + "source": [ + "We define an unexpected column as having a score >0.9. We wish to look at the histograms of scores coming directly after an unexpected run and an expected run (score <.10)." + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "edbc5ae4-bdaf-40d6-bd8b-feb7731d61bb", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "unexpected_scores = [d[0] for d in all_data if d[1] > 0.9 ]\n", + "expected_scores = [d[0] for d in all_data if d[1] < 0.1]\n", + "\n", + "sns.histplot(unexpected_scores, stat = 'probability', color = 'r', bins = 25, alpha = 0.5)\n", + "sns.histplot(expected_scores, stat = 'probability', color = 'g', bins = 25, alpha = 0.5)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "e35b32d3-83df-4dcb-a67c-f5c4b80cd368", + "metadata": {}, + "source": [ + "We really don't see much difference here either. It would be pretty safe to say a high column score don't affect the next column's score all that much." + ] + }, + { + "cell_type": "markdown", + "id": "9f12fb2e-7f35-42bb-8e59-71f979f7395f", + "metadata": {}, + "source": [ + "## Save to Ceph or Local" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "84753d11-b8be-4cf1-9845-dd4727e2d586", + "metadata": {}, + "outputs": [], + "source": [ + "filename = f\"{METRIC_NAME}-{timestamp.year}-{timestamp.month}-{timestamp.day}.parquet\"\n", + "\n", + "if AUTOMATION == \"True\":\n", + " cc = CephCommunication(s3_endpoint_url, s3_access_key, s3_secret_key, s3_bucket)\n", + " cc.upload_to_ceph(scores_df, metric_path, filename)\n", + "else:\n", + " save_to_disk(scores_df, OUTPUT_DATA_PATH, filename)" + ] + }, + { + "cell_type": "markdown", + "id": "64391a44-4936-41ab-ab47-53640f38456e", + "metadata": {}, + "source": [ + "## Conclusion" + ] + }, + { + "cell_type": "markdown", + "id": "6ffa7454-2ac6-44c2-bc1b-2dcbb9025ab2", + "metadata": {}, + "source": [ + "We came up with a method to classify the unexpectness of a given number of tests failing during a run. We are going to use this along with other data from the run to try to extract useful information about what makes these runs have such unexpected failures." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}