From 232bc122f94e3f8eef14e7e2016d2592a31e19b6 Mon Sep 17 00:00:00 2001 From: sean-morris Date: Wed, 3 Jul 2024 17:12:34 -0700 Subject: [PATCH 1/2] [Otter Grader 5.5] Updates --- hw/hw01/hw01.ipynb | 87 ++++++++-- hw/hw02/hw02.ipynb | 162 +++++++++++++----- hw/hw03/hw03.ipynb | 145 ++++++++++++---- hw/hw04/hw04.ipynb | 91 +++++++--- hw/hw05/hw05.ipynb | 41 +++-- hw/hw06/hw06.ipynb | 171 +++++++++++++------ hw/hw07/hw07.ipynb | 159 +++++++++++++----- hw/hw08/hw08.ipynb | 67 ++++++-- hw/hw09/hw09.ipynb | 106 +++++++++--- hw/hw10/hw10.ipynb | 150 +++++++++++++---- hw/hw11/hw11.ipynb | 82 ++++++--- hw/hw12/hw12.ipynb | 69 ++++++-- lab/lab01/lab01.ipynb | 61 ++++--- lab/lab02/lab02.ipynb | 56 +++++-- lab/lab03/lab03.ipynb | 121 ++++++++++---- lab/lab04/lab04.ipynb | 253 +++++++++++++++++----------- lab/lab05/lab05.ipynb | 71 ++++++-- lab/lab06/lab06.ipynb | 153 ++++++++--------- lab/lab07/lab07.ipynb | 199 +++++++++++----------- lab/lab08/lab08.ipynb | 70 ++++++-- lab/lab09/lab09.ipynb | 285 ++++++++++++++++++-------------- lab/lab10/lab10.ipynb | 191 ++++++++++----------- {lec => lectures}/lec01.ipynb | 0 {lec => lectures}/lec03.ipynb | 0 {lec => lectures}/lec03_0.ipynb | 0 {lec => lectures}/lec04.ipynb | 0 {lec => lectures}/lec04_0.ipynb | 0 {lec => lectures}/lec05.ipynb | 0 {lec => lectures}/lec05_0.ipynb | 0 {lec => lectures}/lec06.ipynb | 0 {lec => lectures}/lec07.ipynb | 0 {lec => lectures}/lec08.ipynb | 0 {lec => lectures}/lec09.ipynb | 0 {lec => lectures}/lec09_0.ipynb | 0 {lec => lectures}/lec10.ipynb | 0 {lec => lectures}/lec11.ipynb | 0 {lec => lectures}/lec11_0.ipynb | 0 {lec => lectures}/lec12.ipynb | 0 {lec => lectures}/lec13.ipynb | 0 {lec => lectures}/lec14.ipynb | 0 {lec => lectures}/lec15.ipynb | 0 {lec => lectures}/lec16.ipynb | 0 {lec => lectures}/lec17.ipynb | 0 {lec => lectures}/lec18.ipynb | 0 {lec => lectures}/lec19.ipynb | 0 {lec => lectures}/lec20.ipynb | 0 {lec => lectures}/lec21.ipynb | 0 {lec => lectures}/lec23.ipynb | 0 {lec => lectures}/lec24.ipynb | 0 {lec => lectures}/lec25.ipynb | 0 {lec => lectures}/lec26.ipynb | 0 {lec => lectures}/lec27.ipynb | 0 {lec => lectures}/lec28.ipynb | 0 {lec => lectures}/lec29.ipynb | 0 {lec => lectures}/lec30.ipynb | 0 {lec => lectures}/lec31.ipynb | 0 {lec => lectures}/lec32.ipynb | 0 {lec => lectures}/lec33.ipynb | 0 {lec => lectures}/lec35.ipynb | 0 {lec => lectures}/lec36.ipynb | 0 {lec => lectures}/lec37.ipynb | 0 {lec => lectures}/lec38.ipynb | 0 {lec => lectures}/lec38_0.ipynb | 0 {lec => lectures}/lec39.ipynb | 0 project/project1/project1.ipynb | 194 ++++++++++++++-------- project/project2/project2.ipynb | 167 +++++++++++++++---- project/project3/project3.ipynb | 223 ++++++++++++++++++------- 67 files changed, 2295 insertions(+), 1079 deletions(-) rename {lec => lectures}/lec01.ipynb (100%) rename {lec => lectures}/lec03.ipynb (100%) rename {lec => lectures}/lec03_0.ipynb (100%) rename {lec => lectures}/lec04.ipynb (100%) rename {lec => lectures}/lec04_0.ipynb (100%) rename {lec => lectures}/lec05.ipynb (100%) rename {lec => lectures}/lec05_0.ipynb (100%) rename {lec => lectures}/lec06.ipynb (100%) rename {lec => lectures}/lec07.ipynb (100%) rename {lec => lectures}/lec08.ipynb (100%) rename {lec => lectures}/lec09.ipynb (100%) rename {lec => lectures}/lec09_0.ipynb (100%) rename {lec => lectures}/lec10.ipynb (100%) rename {lec => lectures}/lec11.ipynb (100%) rename {lec => lectures}/lec11_0.ipynb (100%) rename {lec => lectures}/lec12.ipynb (100%) rename {lec => lectures}/lec13.ipynb (100%) rename {lec => lectures}/lec14.ipynb (100%) rename {lec => lectures}/lec15.ipynb (100%) rename {lec => lectures}/lec16.ipynb (100%) rename {lec => lectures}/lec17.ipynb (100%) rename {lec => lectures}/lec18.ipynb (100%) rename {lec => lectures}/lec19.ipynb (100%) rename {lec => lectures}/lec20.ipynb (100%) rename {lec => lectures}/lec21.ipynb (100%) rename {lec => lectures}/lec23.ipynb (100%) rename {lec => lectures}/lec24.ipynb (100%) rename {lec => lectures}/lec25.ipynb (100%) rename {lec => lectures}/lec26.ipynb (100%) rename {lec => lectures}/lec27.ipynb (100%) rename {lec => lectures}/lec28.ipynb (100%) rename {lec => lectures}/lec29.ipynb (100%) rename {lec => lectures}/lec30.ipynb (100%) rename {lec => lectures}/lec31.ipynb (100%) rename {lec => lectures}/lec32.ipynb (100%) rename {lec => lectures}/lec33.ipynb (100%) rename {lec => lectures}/lec35.ipynb (100%) rename {lec => lectures}/lec36.ipynb (100%) rename {lec => lectures}/lec37.ipynb (100%) rename {lec => lectures}/lec38.ipynb (100%) rename {lec => lectures}/lec38_0.ipynb (100%) rename {lec => lectures}/lec39.ipynb (100%) diff --git a/hw/hw01/hw01.ipynb b/hw/hw01/hw01.ipynb index f15cea1..a85b3f8 100644 --- a/hw/hw01/hw01.ipynb +++ b/hw/hw01/hw01.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -62,7 +62,10 @@ { "cell_type": "markdown", "id": "cell-hw01-4", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -88,7 +91,10 @@ { "cell_type": "markdown", "id": "cell-hw01-6", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -135,7 +141,10 @@ { "cell_type": "markdown", "id": "cell-hw01-9", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Around how many periods are there in the chapter with the most characters? Assign either 1, 2, 3, 4, or 5 to the name `characters_q1` below. **(4 Points)**\n", "\n", @@ -185,7 +194,10 @@ { "cell_type": "markdown", "id": "cell-hw01-13", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Which of the following chapters has the most characters per period? Assign either 1, 2, or 3 to the name `characters_q2` below. **(4 Points)**\n", "\n", @@ -268,7 +280,10 @@ { "cell_type": "markdown", "id": "cell-hw01-21", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Choose the best explanation of what's wrong with the code, and then assign 1, 2, 3, or 4 to `names_q1` below to indicate your answer. **(4 Points)**\n", "\n", @@ -332,7 +347,10 @@ { "cell_type": "markdown", "id": "cell-hw01-26", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Choose the best explanation of what's wrong with the code and assign 1, 2, 3, or 4 to `names_q2` below to indicate your answer. **(4 Points)**\n", "\n", @@ -397,7 +415,10 @@ { "cell_type": "markdown", "id": "cell-hw01-31", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "What is `y` after running this cell, and why? Choose the best explanation and assign 1, 2, 3, or 4 to `names_q3` below to indicate your answer. **(4 Points)**\n", "\n", @@ -461,7 +482,10 @@ { "cell_type": "markdown", "id": "cell-hw01-36", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Suppose you want to find the **biggest** absolute difference between the number of degree recipients in the two years, among the three majors.\n", "\n", @@ -498,7 +522,10 @@ { "cell_type": "markdown", "id": "cell-hw01-39", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Which of the three majors had the **smallest** absolute difference? Assign `smallest_change_major` to 1, 2, or 3 where each number corresponds to the following major:\n", "\n", @@ -541,7 +568,10 @@ { "cell_type": "markdown", "id": "cell-hw01-42", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** For each major, define the \u201crelative change\u201d to be the following: $\\large{\\frac{\\text{absolute difference}}{\\text{value in 2008-2009}} * 100}$ \n", "\n", @@ -581,7 +611,10 @@ { "cell_type": "markdown", "id": "cell-hw01-45", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** Assign `biggest_rel_change_major` to 1, 2, or 3 where each number corresponds to to the following: \n", "\n", @@ -638,7 +671,10 @@ { "cell_type": "markdown", "id": "cell-hw01-50", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -656,7 +692,10 @@ { "cell_type": "markdown", "id": "cell-hw01-52", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -678,7 +717,10 @@ { "cell_type": "markdown", "id": "cell-hw01-54", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -698,7 +740,10 @@ { "cell_type": "markdown", "id": "cell-hw01-56", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -720,7 +765,10 @@ { "cell_type": "markdown", "id": "cell-hw01-58", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -730,7 +778,10 @@ { "cell_type": "markdown", "id": "cell-hw01-59", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "The Reverend Henry Whitehead was skeptical of John Snow\u2019s conclusion about the Broad Street pump. After the Broad Street cholera epidemic ended, Whitehead set about trying to prove Snow wrong. (The history of the event is detailed [here](http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1034367/pdf/medhist00183-0026.pdf).)\n", "\n", diff --git a/hw/hw02/hw02.ipynb b/hw/hw02/hw02.ipynb index cc7dd52..2babcd9 100644 --- a/hw/hw02/hw02.ipynb +++ b/hw/hw02/hw02.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -93,7 +93,10 @@ { "cell_type": "markdown", "id": "cell-hw02-6", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Make an array called `weird_numbers` containing the following numbers (in the given order) **(4 Points)**:\n", "\n", @@ -139,7 +142,10 @@ { "cell_type": "markdown", "id": "cell-hw02-9", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Make an array called `book_title_words` containing the following three strings: \"Eats\", \"Shoots\", and \"and Leaves\". **(4 Points)**\n" ] @@ -173,7 +179,10 @@ { "cell_type": "markdown", "id": "cell-hw02-12", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Strings have a method called `join`. `join` takes one argument, an array of strings. It returns a single string. Specifically, the value of `a_string.join(an_array)` is a single string that's the [concatenation](https://en.wikipedia.org/wiki/Concatenation) (\"putting together\") of all the strings in `an_array`, **except** `a_string` is inserted in between each string.\n", "\n", @@ -238,7 +247,10 @@ { "cell_type": "markdown", "id": "cell-hw02-17", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** The cell below creates an array of some numbers. Set `third_element` to the third element of `some_numbers`. **(4 Points)**\n" ] @@ -274,7 +286,10 @@ { "cell_type": "markdown", "id": "cell-hw02-20", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** The next cell creates a table that displays some information about the elements of `some_numbers` and their order. Run the cell to see the partially-completed table, then fill in the missing information (the cells that say \"Ellipsis\") by assigning `blank_a`, `blank_b`, `blank_c`, and `blank_d` to the correct elements in the table. **(4 Points)**\n", "\n", @@ -318,7 +333,10 @@ { "cell_type": "markdown", "id": "cell-hw02-23", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** You'll sometimes want to find the **last** element of an array. Suppose an array has 142 elements. What is the index of its last element? **(4 Points)**\n" ] @@ -351,7 +369,10 @@ { "cell_type": "markdown", "id": "cell-hw02-26", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "More often, you don't know the number of elements in an array, its *length*. (For example, it might be a large dataset you found on the Internet.) The function `len` takes a single argument, an array, and returns an integer that represents the `len`gth of that array.\n", "\n", @@ -389,7 +410,10 @@ { "cell_type": "markdown", "id": "cell-hw02-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** Finally, assign `min_of_birth_years` to the minimum of the first, sixteenth, and last birth years listed in `president_birth_years`. **(4 Points)**\n" ] @@ -431,7 +455,10 @@ { "cell_type": "markdown", "id": "cell-hw02-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Multiply the numbers 42, -4224, 424224242, and 250 by 157. Assign each variable below such that `first_product` is assigned to the result of $42 * 157$, `second_product` is assigned to the result of $-4224 * 157$, and so on. **(4 Points)**\n", "\n", @@ -473,7 +500,10 @@ { "cell_type": "markdown", "id": "cell-hw02-36", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Now, do the same calculation, but using an array called `numbers` and only a single multiplication (`*`) operator. Store the 4 results in an array named `products`. **(4 Points)**\n" ] @@ -508,7 +538,10 @@ { "cell_type": "markdown", "id": "cell-hw02-39", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** Oops, we made a typo! Instead of 157, we wanted to multiply each number by 1577. Compute the correct products in the cell below using array arithmetic. Notice that your job is really easy if you previously defined an array containing the 4 numbers. **(4 Points)**\n" ] @@ -542,7 +575,10 @@ { "cell_type": "markdown", "id": "cell-hw02-42", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** We've loaded an array of temperatures in the next cell. Each number is the highest temperature observed on a day at a climate observation station, mostly from the US. Since they're from the US government agency [NOAA](https://www.noaa.gov/), all the temperatures are in Fahrenheit.\n", "\n", @@ -580,7 +616,10 @@ { "cell_type": "markdown", "id": "cell-hw02-45", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** The cell below loads all the *lowest* temperatures from each day (in Fahrenheit). Compute the daily temperature range for each day. That is, compute the difference between each daily maximum temperature and the corresponding daily minimum temperature. **Pay attention to the units and give your answer in Celsius!** Make sure **NOT** to round your answer for this question! **(4 Points)**\n", "\n", @@ -635,7 +674,10 @@ { "cell_type": "markdown", "id": "cell-hw02-50", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** The first line below assigns `waiting_times` to an array of 272 consecutive waiting times between eruptions, taken from a classic 1938 dataset. Assign the names `shortest`, `longest`, and `average` so that the `print` statement is correct. **(4 Points)**\n" ] @@ -674,7 +716,10 @@ { "cell_type": "markdown", "id": "cell-hw02-53", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Assign `biggest_decrease` to the biggest decrease in waiting time between two consecutive eruptions. For example, the third eruption occurred after 74 minutes and the fourth after 62 minutes, so the decrease in waiting time was 74 - 62 = 12 minutes. **(4 Points)**\n", "\n", @@ -715,7 +760,10 @@ { "cell_type": "markdown", "id": "cell-hw02-56", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** The `faithful_with_eruption_nums` table contains two columns: `eruption_number`, which represents the number of that eruption, and `waiting`, which represents the time spent waiting after that eruption. For example, take the first two rows of the table:\n", "\n", @@ -767,7 +815,10 @@ { "cell_type": "markdown", "id": "cell-hw02-59", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** Let\u2019s imagine your guess for the next waiting time was always just the length of the previous waiting time. If you always guessed the previous waiting time, how big would your error in guessing the waiting times be, on average? **(4 Points)**\n", "\n", @@ -812,7 +863,10 @@ { "cell_type": "markdown", "id": "cell-hw02-63", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Suppose you have 4 apples, 3 oranges, and 3 pineapples. (Perhaps you're using Python to solve a high school Algebra problem.) Create a table that contains this information. It should have two columns: `fruit name` and `count`. Assign the new table to the variable `fruits`. **(4 Points)**\n", "\n", @@ -851,7 +905,10 @@ { "cell_type": "markdown", "id": "cell-hw02-66", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** The file `https://ds-modules.github.io/materials-sp22-assets/hw/hw02/inventory.csv` contains information about the inventory at a fruit stand. Each row represents the contents of one box of fruit. Load it as a table named `inventory` using the `Table.read_table()` function. `Table.read_table(...)` takes one argument (data file name in string format) and returns a table. **(4 Points)**\n" ] @@ -885,7 +942,10 @@ { "cell_type": "markdown", "id": "cell-hw02-69", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** Does each box at the fruit stand contain a different fruit? Set `all_different` to `True` if each box contains a different fruit or to `False` if multiple boxes contain the same fruit. **(4 Points)**\n", "\n", @@ -921,7 +981,10 @@ { "cell_type": "markdown", "id": "cell-hw02-72", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** The file `https://ds-modules.github.io/materials-sp22-assets/hw/hw02/sales.csv` contains the number of fruit sold from each box last Saturday. It has an extra column called `price per fruit ($)` that's the price *per item of fruit* for fruit in that box. The rows are in the same order as the `inventory` table. Load these data into a table called `sales`. **(5 Points)**\n" ] @@ -955,7 +1018,10 @@ { "cell_type": "markdown", "id": "cell-hw02-75", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** How many fruits did the store sell in total on that day? **(5 Points)**\n" ] @@ -989,7 +1055,10 @@ { "cell_type": "markdown", "id": "cell-hw02-78", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 6.** What was the store's total revenue (the total price of all fruits sold) on that day? **(5 Points)**\n", "\n", @@ -1025,7 +1094,10 @@ { "cell_type": "markdown", "id": "cell-hw02-81", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 7.** Make a new table called `remaining_inventory`. It should have the same rows and columns as `inventory`, except that the amount of fruit sold from each box should be subtracted from that box's **original** count, so that the `count` column is **updated to be** the amount of fruit remaining after Saturday. **(5 Points)**\n" ] @@ -1142,7 +1214,7 @@ { "cases": [ { - "code": ">>> import numpy as np\n>>> # It looks like you didn't make an array.\n>>> type(weird_numbers) == np.ndarray\nTrue", + "code": ">>> import numpy as np\n>>> type(weird_numbers) == np.ndarray\nTrue", "hidden": false, "locked": false }, @@ -1172,17 +1244,17 @@ { "cases": [ { - "code": ">>> import numpy as np\n>>> # It looks like you didn't make an array.\n>>> type(book_title_words) == np.ndarray\nTrue", + "code": ">>> import numpy as np\n>>> type(book_title_words) == np.ndarray\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # It looks like you included commas in the text.\n>>> # The three pieces of text in the array should be:\n>>> # \"Eats\"\n>>> # \"Shoots\"\n>>> # \"and Leaves\"\n>>> not any([',' in text for text in book_title_words])\nTrue", + "code": ">>> not any([',' in text for text in book_title_words])\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # It looks like you didn't include both words in the\n>>> # last piece of text. It should be the actual string:\n>>> # \"and Leaves\"\n>>> 'and ' in book_title_words.item(2)\nTrue", + "code": ">>> 'and ' in book_title_words.item(2)\nTrue", "hidden": false, "locked": false }, @@ -1192,7 +1264,7 @@ "locked": false }, { - "code": ">>> book_title_words.item(0) == 'Eats' and book_title_words.item(1) == 'Shoots' and book_title_words.item(2) == 'and Leaves'\nTrue", + "code": ">>> book_title_words.item(0) == 'Eats' and book_title_words.item(1) == 'Shoots' and (book_title_words.item(2) == 'and Leaves')\nTrue", "hidden": false, "locked": false } @@ -1241,7 +1313,7 @@ { "cases": [ { - "code": ">>> # It looks like you wrote:\n>>> # some_numbers.item(3)\n>>> # But the third element has index 2, not 3.\n>>> third_element != -10\nTrue", + "code": ">>> third_element != -10\nTrue", "hidden": false, "locked": false }, @@ -1270,12 +1342,12 @@ { "cases": [ { - "code": ">>> elements_of_some_numbers.column(0).item(2) == \"third\"\nTrue", + "code": ">>> elements_of_some_numbers.column(0).item(2) == 'third'\nTrue", "hidden": false, "locked": false }, { - "code": ">>> elements_of_some_numbers.column(0).item(3) == \"fourth\"\nTrue", + "code": ">>> elements_of_some_numbers.column(0).item(3) == 'fourth'\nTrue", "hidden": false, "locked": false }, @@ -1369,7 +1441,7 @@ { "cases": [ { - "code": ">>> first_product > 0, second_product < 0, third_product > 0, fourth_product > 0\n(True, True, True, True)", + "code": ">>> (first_product > 0, second_product < 0, third_product > 0, fourth_product > 0)\n(True, True, True, True)", "hidden": false, "locked": false } @@ -1390,7 +1462,7 @@ { "cases": [ { - "code": ">>> # products should be an array!\n>>> len(products) == 4\nTrue", + "code": ">>> len(products) == 4\nTrue", "hidden": false, "locked": false } @@ -1435,7 +1507,7 @@ { "cases": [ { - "code": ">>> # It looks like you multiplied and subtracted in the wrong\n>>> # order.\n>>> sum(celsius_max_temperatures) != 356705.0\nTrue", + "code": ">>> sum(celsius_max_temperatures) != 356705.0\nTrue", "hidden": false, "locked": false }, @@ -1473,7 +1545,7 @@ { "cases": [ { - "code": ">>> # celsius_temperature_ranges should be an array\n>>> type(celsius_temperature_ranges) is np.ndarray\nTrue", + "code": ">>> type(celsius_temperature_ranges) is np.ndarray\nTrue", "hidden": false, "locked": false }, @@ -1505,12 +1577,12 @@ { "cases": [ { - "code": ">>> # Hint: shortest is a number between 40 and 50.\n>>> 40 <= shortest <= 50\nTrue", + "code": ">>> 40 <= shortest <= 50\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Hint: the average is between the shortest and the longest\n>>> shortest <= average <= longest\nTrue", + "code": ">>> shortest <= average <= longest\nTrue", "hidden": false, "locked": false } @@ -1532,12 +1604,12 @@ { "cases": [ { - "code": ">>> # Hint: If you are getting 47 as your answer, you might be computing the biggest change \n>>> # rather than the biggest decrease!\n>>> biggest_decrease == 47\nFalse", + "code": ">>> biggest_decrease == 47\nFalse", "hidden": false, "locked": false }, { - "code": ">>> # Hint: biggest decrease is above 30, but not 47.\n>>> 30 <= biggest_decrease < 47\nTrue", + "code": ">>> 30 <= biggest_decrease < 47\nTrue", "hidden": false, "locked": false } @@ -1690,7 +1762,7 @@ { "cases": [ { - "code": ">>> # We're asking for the number of *pieces* of fruit, not the\n>>> # number of kinds of fruit or the number of boxes from which\n>>> # there were sales.\n>>> total_fruits_sold > 10\nTrue", + "code": ">>> total_fruits_sold > 10\nTrue", "hidden": false, "locked": false } @@ -1711,7 +1783,7 @@ { "cases": [ { - "code": ">>> # If you're stuck, here's a hint: You want to multiply the count\n>>> # sold in each box by the per-item price of fruits in that box.\n>>> # You can use elementwise multiplication for that.\n>>> # Then you want the sum of those products. Use sum().\n>>> 50 <= total_revenue <= 150\nTrue", + "code": ">>> 50 <= total_revenue <= 150\nTrue", "hidden": false, "locked": false } @@ -1734,12 +1806,12 @@ { "cases": [ { - "code": ">>> # It looks like your table doesn't have all 3 columns that are\n>>> # in the inventory table.\n>>> remaining_inventory.num_columns\n3", + "code": ">>> remaining_inventory.num_columns\n3", "hidden": false, "locked": false }, { - "code": ">>> #It looks like you forgot to subtract off the sales.\n>>> remaining_inventory.column(\"count\").item(0) != 45\nTrue", + "code": ">>> remaining_inventory.column('count').item(0) != 45\nTrue", "hidden": false, "locked": false }, diff --git a/hw/hw03/hw03.ipynb b/hw/hw03/hw03.ipynb index 0ea67af..ac7ff79 100644 --- a/hw/hw03/hw03.ipynb +++ b/hw/hw03/hw03.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -105,7 +105,10 @@ { "cell_type": "markdown", "id": "cell-hw03-7", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** The data are in a CSV file called `https://ds-modules.github.io/materials-sp22-assets/hw/hw03/unemployment.csv`. Load that file into a table called `unemployment`. **(4 Points)**\n" ] @@ -139,7 +142,10 @@ { "cell_type": "markdown", "id": "cell-hw03-10", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Sort the data in descending order by NEI, naming the sorted table `by_nei`. Create another table called `by_nei_pter` that's sorted in descending order by NEI-PTER instead. **(4 Points)**\n" ] @@ -195,7 +201,10 @@ { "cell_type": "markdown", "id": "cell-hw03-15", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** Use `take` to make a table containing the data for the 11 quarters when NEI was greatest. Call that table `greatest_nei`.\n", "\n", @@ -231,7 +240,10 @@ { "cell_type": "markdown", "id": "cell-hw03-18", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** It's believed that many people became PTER (recall: \"Part-Time for Economic Reasons\") in the \"Great Recession\" of 2008-2009. NEI-PTER is the percentage of people who are unemployed (included in the NEI) plus the percentage of people who are PTER.\n", "\n", @@ -269,7 +281,10 @@ { "cell_type": "markdown", "id": "cell-hw03-21", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** Add `pter` as a column to `unemployment` (name the column `PTER`) and sort the resulting table by that column in descending order. Call the resulting table `by_pter`.\n", "\n", @@ -305,7 +320,10 @@ { "cell_type": "markdown", "id": "cell-hw03-24", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 6.** Create a line plot of PTER over time. To do this, create a new table called `pter_over_time` by making a copy of the `unemployment` table and adding two new columns: `Year` and `PTER` using the `year` array and the `pter` array, respectively. Then, generate a line plot using one of the table methods you've learned in class.\n", "\n", @@ -345,7 +363,10 @@ { "cell_type": "markdown", "id": "cell-hw03-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 7.** Were PTER rates high during the Great Recession (that is to say, were PTER rates particularly high in the years 2008 through 2011)? Assign `highPTER` to `True` if you think PTER rates were high in this period, or `False` if you think they weren't. **(4 Points)**\n" ] @@ -414,7 +435,10 @@ { "cell_type": "markdown", "id": "cell-hw03-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Assign `us_birth_rate` to the total US annual birth rate during this time interval. The annual birth rate for a year-long period is the total number of births in that period as a proportion of the total population size at the start of the time period. **(4 Points)**\n", "\n", @@ -450,7 +474,10 @@ { "cell_type": "markdown", "id": "cell-hw03-36", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Assign `movers` to the number of states for which the **absolute value** of the **annual rate of migration** was higher than 1%. The annual rate of migration for a year-long period is the net number of migrations (in and out) as a proportion of the population size at the start of the period. The `MIGRATION` column contains estimated annual net migration counts by state. **(4 Points)**\n", "\n", @@ -487,7 +514,10 @@ { "cell_type": "markdown", "id": "cell-hw03-39", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** Assign `west_births` to the total number of births that occurred in region 4 (the Western US). **(4 Points)**\n", "\n", @@ -523,7 +553,10 @@ { "cell_type": "markdown", "id": "cell-hw03-42", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** In the next question, you will be creating a visualization to understand the relationship between birth and death rates. The annual death rate for a year-long period is the total number of deaths in that period as a proportion of the population size at the start of the time period.\n", "\n", @@ -564,7 +597,10 @@ { "cell_type": "markdown", "id": "cell-hw03-45", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -597,7 +633,10 @@ { "cell_type": "markdown", "id": "cell-hw03-47", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -668,7 +707,10 @@ { "cell_type": "markdown", "id": "cell-hw03-53", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -694,7 +736,10 @@ { "cell_type": "markdown", "id": "cell-hw03-55", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -723,7 +768,10 @@ { "cell_type": "markdown", "id": "cell-hw03-57", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -763,7 +811,10 @@ { "cell_type": "markdown", "id": "cell-hw03-60", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** Assign `boston_under_15` and `manila_under_15` to the percentage of rides that are less than 15 minutes in their respective metropolitan areas. Use the height variables provided below in order to compute the percentages. Your solution should only use height variables, numbers, and mathematical operations. You should **not** access the tables `boston` and `manila` in any way. **(4 Points)**\n" ] @@ -808,7 +859,10 @@ { "cell_type": "markdown", "id": "cell-hw03-63", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** Let's take a closer look at the distribution of ride times in Boston. Assign `boston_median_bin` to an integer (1, 2, 3, or 4) that corresponds to the bin that contains the median time. **(4 Points)**\n", "\n", @@ -849,7 +903,10 @@ { "cell_type": "markdown", "id": "cell-hw03-66", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -869,7 +926,10 @@ { "cell_type": "markdown", "id": "cell-hw03-68", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -895,6 +955,8 @@ "cell_type": "markdown", "id": "cell-hw03-70", "metadata": { + "deletable": false, + "editable": false, "tags": [] }, "source": [ @@ -955,7 +1017,10 @@ { "cell_type": "markdown", "id": "cell-hw03-75", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Suppose we run `t.hist('x')`. Which histogram does this code produce? Assign `histogram_column_x` to either 1, 2, or 3. **(5 Points)**\n", "\n", @@ -992,7 +1057,10 @@ { "cell_type": "markdown", "id": "cell-hw03-78", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1010,7 +1078,10 @@ { "cell_type": "markdown", "id": "cell-hw03-80", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1049,7 +1120,10 @@ { "cell_type": "markdown", "id": "cell-hw03-83", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1067,7 +1141,10 @@ { "cell_type": "markdown", "id": "cell-hw03-85", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1218,7 +1295,7 @@ { "cases": [ { - "code": ">>> # It looks like you subtracted in the wrong order.\n>>> round(pter.item(6), 4) != -1.1282\nTrue", + "code": ">>> round(pter.item(6), 4) != -1.1282\nTrue", "hidden": false, "locked": false }, @@ -1349,7 +1426,7 @@ { "cases": [ { - "code": ">>> 5e5 < west_births < 1e6\nTrue", + "code": ">>> 500000.0 < west_births < 1000000.0\nTrue", "hidden": false, "locked": false } @@ -1371,12 +1448,12 @@ { "cases": [ { - "code": ">>> # visualization should be assigned to an integer.\n>>> type(visualization) == int\nTrue", + "code": ">>> type(visualization) == int\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Make sure visualization is assigned to 1, 2 or 3.\n>>> 1 <= visualization <= 3\nTrue", + "code": ">>> 1 <= visualization <= 3\nTrue", "hidden": false, "locked": false } @@ -1466,7 +1543,7 @@ { "cases": [ { - "code": ">>> # Make sure boston_median_bin is assigned to 1, 2, 3, or 4.\n>>> 1 <= boston_median_bin <= 4\nTrue", + "code": ">>> 1 <= boston_median_bin <= 4\nTrue", "hidden": false, "locked": false } @@ -1488,7 +1565,7 @@ { "cases": [ { - "code": ">>> # Make sure you assign histogram_column_x to either 1 or 2!\n>>> type(histogram_column_x) == int\nTrue", + "code": ">>> type(histogram_column_x) == int\nTrue", "hidden": false, "locked": false }, @@ -1515,12 +1592,12 @@ { "cases": [ { - "code": ">>> # Make sure you assign histogram_column_y to either 1 or 2!\n>>> type(histogram_column_y) == int\nTrue", + "code": ">>> type(histogram_column_y) == int\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Make sure histogram_column_y is assigned to 1, 2 or 3.\n>>> histogram_column_y == 1 or histogram_column_y == 2 or histogram_column_y == 3\nTrue", + "code": ">>> histogram_column_y == 1 or histogram_column_y == 2 or histogram_column_y == 3\nTrue", "hidden": false, "locked": false } diff --git a/hw/hw04/hw04.ipynb b/hw/hw04/hw04.ipynb index e646a9e..61d5091 100644 --- a/hw/hw04/hw04.ipynb +++ b/hw/hw04/hw04.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -133,7 +133,10 @@ { "cell_type": "markdown", "id": "cell-hw04-9", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** It would be easier if we could combine the information in both tables. Assign `burritos` to the result of joining the two tables together, so that we have a table with the ratings for every corresponding menu item from every restaurant. Each menu item has the same rating as the restaurant from which it is from. **(8 Points)**\n", "\n", @@ -172,7 +175,10 @@ { "cell_type": "markdown", "id": "cell-hw04-12", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -212,7 +218,10 @@ { "cell_type": "markdown", "id": "cell-hw04-15", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -232,7 +241,10 @@ { "cell_type": "markdown", "id": "cell-hw04-17", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -255,7 +267,10 @@ { "cell_type": "markdown", "id": "cell-hw04-19", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** There are so many types of California burritos in the `burritos` table! Sara wants to know which type is the highest rated across all restaurants. For the sake of these questions, we are treating each menu item's rating the same as its respective restaurant's, as we do not have the rating of every single item at these restaurants. You do not need to worry about this fact, but we thought to mention it!\n", "\n", @@ -295,7 +310,10 @@ { "cell_type": "markdown", "id": "cell-hw04-22", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** Given this new table `california_burritos`, Sara can figure out the name of the California burrito with the highest overall average rating! Assign `best_california_burrito` to a line of code that outputs the string that represents the name of the California burrito with the highest overall average rating. If multiple burritos satisfy this criteria, you can output any one of them. **(8 Points)**\n" ] @@ -329,7 +347,10 @@ { "cell_type": "markdown", "id": "cell-hw04-25", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -353,7 +374,10 @@ { "cell_type": "markdown", "id": "cell-hw04-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -484,7 +508,10 @@ { "cell_type": "markdown", "id": "cell-hw04-38", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Set `job_titles` to a table with two columns. The first column should be called `Organization Group` and have the name of every \"Organization Group\" once, and the second column should be called `Jobs` with each row in that second column containing an *array* of the names of all the job titles within that \"Organization Group\". Don't worry if there are multiple of the same job titles. **(9 Points)**\n", "\n", @@ -538,7 +565,10 @@ { "cell_type": "markdown", "id": "cell-hw04-42", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -558,7 +588,10 @@ { "cell_type": "markdown", "id": "cell-hw04-44", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -602,7 +635,10 @@ { "cell_type": "markdown", "id": "cell-hw04-47", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -620,7 +656,10 @@ { "cell_type": "markdown", "id": "cell-hw04-49", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -740,7 +779,7 @@ { "cases": [ { - "code": ">>> # Make sure you have all the columns from both tables\n>>> set([\"Name\", \"Menu_Item\", \"Yelp\", \"Google\", \"Overall\", \"Cost\"]) == set(burritos.labels)\nTrue", + "code": ">>> set(['Name', 'Menu_Item', 'Yelp', 'Google', 'Overall', 'Cost']) == set(burritos.labels)\nTrue", "hidden": false, "locked": false }, @@ -764,7 +803,7 @@ { "cases": [ { - "code": ">>> # Make sure you are using the correct table! \n>>> yelp_and_google.num_rows == 212\nTrue", + "code": ">>> yelp_and_google.num_rows == 212\nTrue", "hidden": false, "locked": false } @@ -786,12 +825,12 @@ { "cases": [ { - "code": ">>> # Number of columns should be 2\n>>> california_burritos.num_columns == 2\nTrue", + "code": ">>> california_burritos.num_columns == 2\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Number of rows should be 19\n>>> california_burritos.num_rows == 19\nTrue", + "code": ">>> california_burritos.num_rows == 19\nTrue", "hidden": false, "locked": false } @@ -812,7 +851,7 @@ { "cases": [ { - "code": ">>> # Your answer should be a string\n>>> type(best_california_burrito) == str\nTrue", + "code": ">>> type(best_california_burrito) == str\nTrue", "hidden": false, "locked": false } @@ -833,7 +872,7 @@ { "cases": [ { - "code": ">>> # Your answer should be between 0 and 100.\n>>> 0 <= burritos_less_than_6 <= 100\nTrue", + "code": ">>> 0 <= burritos_less_than_6 <= 100\nTrue", "hidden": false, "locked": false } @@ -867,12 +906,12 @@ "locked": false }, { - "code": ">>> # Make sure that you have the correct column labels!\n>>> np.asarray(job_titles.labels).item(1) != \"Job full_array\"\nTrue", + "code": ">>> np.asarray(job_titles.labels).item(1) != 'Job full_array'\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Make sure that you have the correct column labels!\n>>> np.asarray(job_titles.labels).item(1) == \"Jobs\"\nTrue", + "code": ">>> np.asarray(job_titles.labels).item(1) == 'Jobs'\nTrue", "hidden": false, "locked": false } @@ -895,12 +934,12 @@ { "cases": [ { - "code": ">>> # Double check that your salary_range function is correct\n>>> compensation_range(make_array(5, 1, 20, 1000)) == 999\nTrue", + "code": ">>> compensation_range(make_array(5, 1, 20, 1000)) == 999\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Make sure that the table has departments as the rows and organization groups as the columns.\n>>> set([\"Department\", \"Community Health\", \"Culture & Recreation\", \"General Administration & Finance\", \"Human Welfare & Neighborhood Development\", \"Public Protection\", \"Public Works, Transportation & Commerce\"]) == set(department_ranges.labels)\nTrue", + "code": ">>> set(['Department', 'Community Health', 'Culture & Recreation', 'General Administration & Finance', 'Human Welfare & Neighborhood Development', 'Public Protection', 'Public Works, Transportation & Commerce']) == set(department_ranges.labels)\nTrue", "hidden": false, "locked": false }, @@ -927,12 +966,12 @@ { "cases": [ { - "code": ">>> # Make sure that your final answer is a number\n>>> isinstance(num_over_125k, int)\nTrue", + "code": ">>> isinstance(num_over_125k, int)\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Make sure that your answer makes sense given the sf table\n>>> 0 <= num_over_125k <= 51\nTrue", + "code": ">>> 0 <= num_over_125k <= 51\nTrue", "hidden": false, "locked": false } diff --git a/hw/hw05/hw05.ipynb b/hw/hw05/hw05.ipynb index 911c9c9..1acd4e5 100644 --- a/hw/hw05/hw05.ipynb +++ b/hw/hw05/hw05.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -115,7 +115,10 @@ { "cell_type": "markdown", "id": "cell-hw05-8", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Let's start by finding the total points each team scored in a game.\n", "\n", @@ -157,7 +160,10 @@ { "cell_type": "markdown", "id": "cell-hw05-11", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Create a new table `final_scores` with three columns in this *specific* order: `Opponent`, `Cal Score`, `Opponent Score`. You will have to create the `Cal Score` and `Opponent Score` columns. Use the function `sum_scores` you just defined in the previous question for this problem. **(20 Points)**\n", "\n", @@ -234,7 +240,10 @@ { "cell_type": "markdown", "id": "cell-hw05-17", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** We want to see for a particular game whether or not Cal lost. Write a function called `did_cal_lose`. It should take one argument: a **row object** from the `final_scores` table. It should return either `True` if Cal's score was less than the Opponent's score, and `False` otherwise. **(20 Points)**\n", "\n", @@ -274,7 +283,10 @@ { "cell_type": "markdown", "id": "cell-hw05-20", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** James wants to see how Cal did against every opponent during the 2021 season. Using the `final_scores` table:\n", "\n", @@ -326,7 +338,10 @@ { "cell_type": "markdown", "id": "cell-hw05-23", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5:** Sometimes in football the two teams are equally matched and the game is quite close. Other times, it is a blowout, where the winning team wins by a large margin of victory. Let's define a **big win** to be a game in which the winning team won by more than 10 points.\n", "\n", @@ -419,7 +434,10 @@ { "cell_type": "markdown", "id": "cell-hw05-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Optional Question 1.** Unroll the code below." ] @@ -428,7 +446,10 @@ "cell_type": "code", "execution_count": null, "id": "cell-hw05-30", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "outputs": [], "source": [ "for joke_iteration in np.arange(3):\n", @@ -544,7 +565,7 @@ "locked": false }, { - "code": ">>> sum_scores(-2,3,5,-10)\n-4", + "code": ">>> sum_scores(-2, 3, 5, -10)\n-4", "hidden": false, "locked": false } @@ -652,7 +673,7 @@ "locked": false }, { - "code": ">>> # Make sure big_wins is an array.\n>>> import numpy as np\n>>> type(big_wins) == np.ndarray\nTrue", + "code": ">>> import numpy as np\n>>> type(big_wins) == np.ndarray\nTrue", "hidden": false, "locked": false } diff --git a/hw/hw06/hw06.ipynb b/hw/hw06/hw06.ipynb index 1b31de0..fe234f5 100644 --- a/hw/hw06/hw06.ipynb +++ b/hw/hw06/hw06.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -147,7 +147,10 @@ { "cell_type": "markdown", "id": "cell-hw06-11", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Define a function `dollar_bet_on_red` that takes the name of a color and returns your gain in dollars if that color had won and you had placed a one-dollar bet on red. Remember that the gain can be negative. Make sure your function returns an integer. **(4 points)**\n", "\n", @@ -203,7 +206,10 @@ { "cell_type": "markdown", "id": "cell-hw06-16", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Add a column labeled `Winnings: Red` to the table `wheel`. For each pocket, the column should contain your gain in dollars if that pocket won and you had bet one dollar on red. Your code should use the function `dollar_bet_on_red`. **(4 points)**\n" ] @@ -239,7 +245,10 @@ { "cell_type": "markdown", "id": "cell-hw06-19", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "### Simulating 10 Bets on Red\n", "Roulette wheels are set up so that each time they are spun, the winning pocket is equally likely to be any of the 38 pockets regardless of the results of all other spins. Let's see what would happen if we decided to bet one dollar on red each round.\n", @@ -287,7 +296,10 @@ { "cell_type": "markdown", "id": "cell-hw06-23", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** Let's see what would happen if you made more bets. Define a function `net_gain_red` that takes the number of bets and returns the net gain in that number of one-dollar bets on red. **(4 points)**\n", "\n", @@ -341,7 +353,10 @@ { "cell_type": "markdown", "id": "cell-hw06-28", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** Complete the cell below to simulate the net gain in 200 one-dollar bets on red, repeating the process 10,000 times. After the cell is run, `all_gains_red` should be an array with 10,000 entries, each of which is the net gain in 200 one-dollar bets on red. **(4 points)**\n", "\n", @@ -403,7 +418,10 @@ { "cell_type": "markdown", "id": "cell-hw06-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 6:** Using the histogram above, decide whether the following statement is true or false:\n", "\n", @@ -440,7 +458,10 @@ { "cell_type": "markdown", "id": "cell-hw06-36", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "### Betting on a Split ###\n", "If betting on red doesn't seem like a good idea, maybe a gambler might want to try a different bet. A bet on a *split* is a bet on two consecutive numbers such as 5 and 6. This bets pays 17 to 1. That means if you place a one-dollar bet on the split 5 and 6, then:\n", @@ -505,7 +526,10 @@ { "cell_type": "markdown", "id": "cell-hw06-41", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 8.** Add a column `Winnings: Split` to the `wheel` table. For each pocket, the column should contain your gain in dollars if that pocket won and you had bet one dollar on the 5-6 split. **(4 points)**\n" ] @@ -541,7 +565,10 @@ { "cell_type": "markdown", "id": "cell-hw06-44", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 9.** Simulate the net gain in 200 one-dollar bets on the 5-6 split, repeating the process 10,000 times and saving your gains in the array `all_gains_split`. **(4 points)**\n", "\n", @@ -583,7 +610,10 @@ { "cell_type": "markdown", "id": "cell-hw06-47", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 10.** Look carefully at the visualization above, and assign `histogram_statements` to an array of the numbers of each statement below that can be correctly inferred from the overlaid histogram. **(4 points)**\n", "\n", @@ -662,7 +692,10 @@ { "cell_type": "markdown", "id": "cell-hw06-54", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** The winning pocket is black on all of the first three spins. **(4 points)**\n" ] @@ -695,7 +728,10 @@ { "cell_type": "markdown", "id": "cell-hw06-57", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** The color green never wins in the first 10 spins. **(4 points)**\n" ] @@ -728,7 +764,10 @@ { "cell_type": "markdown", "id": "cell-hw06-60", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** The color green wins at least once on the first 10 spins. **(4 points)**\n" ] @@ -761,7 +800,10 @@ { "cell_type": "markdown", "id": "cell-hw06-63", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** Two of the three colors never win in the first 10 spins. **(4 points)**\n", "\n", @@ -811,7 +853,10 @@ { "cell_type": "markdown", "id": "cell-hw06-67", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** A child picks four times at random from a box that has four toy animals: a bear, an elephant, a giraffe, and a kangaroo. **(4 points)**\n", "\n", @@ -847,7 +892,10 @@ { "cell_type": "markdown", "id": "cell-hw06-70", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 6.** In a lottery, two numbers are drawn at random with replacement from the integers 1 through 1000. **(4 points)**\n", "\n", @@ -883,7 +931,10 @@ { "cell_type": "markdown", "id": "cell-hw06-73", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 7.** A fair coin is tossed repeatedly. **(4 points)**\n", "\n", @@ -1017,7 +1068,10 @@ { "cell_type": "markdown", "id": "cell-hw06-83", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1039,7 +1093,10 @@ { "cell_type": "markdown", "id": "cell-hw06-85", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1064,6 +1121,8 @@ "cell_type": "markdown", "id": "cell-hw06-87", "metadata": { + "deletable": false, + "editable": false, "tags": [] }, "source": [ @@ -1094,7 +1153,10 @@ { "cell_type": "markdown", "id": "cell-hw06-89", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Assign `deck_model_probabilities` to a two-item array containing the chance of drawing a face card as the first element, and the chance of drawing a numbered card as the second element under Jade's model. Since we're working with probabilities, make sure your values are between 0 and 1. **(4 Points)**\n" ] @@ -1128,7 +1190,10 @@ { "cell_type": "markdown", "id": "cell-hw06-92", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** We believe Jade's model is incorrect. In particular, we believe there to be a *larger* chance of getting a face card. Which of the following statistics can we use during our simulation to test between the model and our alternative? Assign `statistic_choice` to the correct answer. **(5 Points)**\n", "\n", @@ -1166,7 +1231,10 @@ { "cell_type": "markdown", "id": "cell-hw06-95", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** Define the function `deck_simulation_and_statistic`, which, given a sample size and an array of model proportions (like the one you created in Question 1), returns the number of face cards in one simulation of drawing cards under the model specified in `model_proportions`. **(5 Points)**\n", "\n", @@ -1204,7 +1272,10 @@ { "cell_type": "markdown", "id": "cell-hw06-98", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** Use your function from above to simulate the drawing of 13 cards 5000 times under the proportions that you specified in Question 1. Keep track of all of your statistics in `deck_statistics`. **(5 Points)**\n" ] @@ -1259,7 +1330,10 @@ { "cell_type": "markdown", "id": "cell-hw06-103", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1277,7 +1351,10 @@ { "cell_type": "markdown", "id": "cell-hw06-105", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1378,12 +1455,12 @@ { "cases": [ { - "code": ">>> # Make sure histogram_statements is an array.\n>>> import numpy as np\n>>> type(histogram_statements) == np.ndarray\nTrue", + "code": ">>> import numpy as np\n>>> type(histogram_statements) == np.ndarray\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Make sure you have set histogram_statements to an array with at least 1 number\n>>> 1 <= histogram_statements.item(0) <= 3\nTrue", + "code": ">>> 1 <= histogram_statements.item(0) <= 3\nTrue", "hidden": false, "locked": false } @@ -1404,7 +1481,7 @@ { "cases": [ { - "code": ">>> # Make sure your column names are correct\n>>> wheel.labels[2] == \"Winnings: Red\"\nTrue", + "code": ">>> wheel.labels[2] == 'Winnings: Red'\nTrue", "hidden": false, "locked": false } @@ -1425,7 +1502,7 @@ { "cases": [ { - "code": ">>> # If this test isn't passing, try running the cells from the top.\n>>> set([\"Pocket\", \"Color\", \"Winnings: Red\"]) == set(ten_bets.labels)\nTrue", + "code": ">>> set(['Pocket', 'Color', 'Winnings: Red']) == set(ten_bets.labels)\nTrue", "hidden": false, "locked": false } @@ -1530,7 +1607,7 @@ { "cases": [ { - "code": ">>> # Make sure your column names are correct\n>>> wheel.labels[3] == \"Winnings: Split\"\nTrue", + "code": ">>> wheel.labels[3] == 'Winnings: Split'\nTrue", "hidden": false, "locked": false } @@ -1572,7 +1649,7 @@ { "cases": [ { - "code": ">>> # Your probability output should be a value between 0 and 1.\n>>> 0 < first_three_black < 1\nTrue", + "code": ">>> 0 < first_three_black < 1\nTrue", "hidden": false, "locked": false } @@ -1593,7 +1670,7 @@ { "cases": [ { - "code": ">>> # Your probability output should be a value between 0 and 1.\n>>> 0 < no_green < 1\nTrue", + "code": ">>> 0 < no_green < 1\nTrue", "hidden": false, "locked": false } @@ -1614,7 +1691,7 @@ { "cases": [ { - "code": ">>> # Your probability output should be a value between 0 and 1.\n>>> 0 < at_least_one_green < 1\nTrue", + "code": ">>> 0 < at_least_one_green < 1\nTrue", "hidden": false, "locked": false } @@ -1635,7 +1712,7 @@ { "cases": [ { - "code": ">>> # Your probability output should be a value between 0 and 1.\n>>> 0 < lone_winners < 1\nTrue", + "code": ">>> 0 < lone_winners < 1\nTrue", "hidden": false, "locked": false } @@ -1657,12 +1734,12 @@ { "cases": [ { - "code": ">>> # Make sure you are setting toys_option to an int\n>>> type(toys_option) == int\nTrue", + "code": ">>> type(toys_option) == int\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # toys_option should be assigned to either 1, 2 or 3.\n>>> 1 <= toys_option <= 3\nTrue", + "code": ">>> 1 <= toys_option <= 3\nTrue", "hidden": false, "locked": false } @@ -1684,12 +1761,12 @@ { "cases": [ { - "code": ">>> # Make sure you are setting lottery_option to an int\n>>> type(lottery_option) == int\nTrue", + "code": ">>> type(lottery_option) == int\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # toys_option should be assigned to either 1, 2 or 3.\n>>> 1 <= lottery_option <= 3\nTrue", + "code": ">>> 1 <= lottery_option <= 3\nTrue", "hidden": false, "locked": false } @@ -1711,7 +1788,7 @@ { "cases": [ { - "code": ">>> # Make sure you are setting coin_option to an int\n>>> type(coin_option) == int\nTrue", + "code": ">>> type(coin_option) == int\nTrue", "hidden": false, "locked": false }, @@ -1738,12 +1815,12 @@ { "cases": [ { - "code": ">>> # The array should have length 2\n>>> len(deck_model_probabilities) == 2\nTrue", + "code": ">>> len(deck_model_probabilities) == 2\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # The elements in the array should add up to 1.\n>>> sum(deck_model_probabilities) == 1\nTrue", + "code": ">>> sum(deck_model_probabilities) == 1\nTrue", "hidden": false, "locked": false } @@ -1765,12 +1842,12 @@ { "cases": [ { - "code": ">>> # Make sure you are setting statistic_choice to an int\n>>> type(statistic_choice) == int\nTrue", + "code": ">>> type(statistic_choice) == int\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # statistic_choice should be assigned to either 1, 2 or 3.\n>>> 1 <= statistic_choice <= 3\nTrue", + "code": ">>> 1 <= statistic_choice <= 3\nTrue", "hidden": false, "locked": false } @@ -1791,7 +1868,7 @@ { "cases": [ { - "code": ">>> # The statistic should be between 0 and 13 face cards for\n>>> # a sample size of 13\n>>> num_face = deck_simulation_and_statistic(13, deck_model_probabilities)\n>>> 0 <= num_face <= 13\nTrue", + "code": ">>> num_face = deck_simulation_and_statistic(13, deck_model_probabilities)\n>>> 0 <= num_face <= 13\nTrue", "hidden": false, "locked": false } @@ -1813,12 +1890,12 @@ { "cases": [ { - "code": ">>> # There should be exactly as many elements in deck_statistics\n>>> # as the number 'repetitions'\n>>> len(deck_statistics) == repetitions\nTrue", + "code": ">>> len(deck_statistics) == repetitions\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Each element of deck_statistics should be between 0\n>>> # and 13 inclusive\n>>> all([0 <= k <= 13 for k in deck_statistics])\nTrue", + "code": ">>> all([0 <= k <= 13 for k in deck_statistics])\nTrue", "hidden": false, "locked": false } diff --git a/hw/hw07/hw07.ipynb b/hw/hw07/hw07.ipynb index 79c27c9..e71d8c3 100644 --- a/hw/hw07/hw07.ipynb +++ b/hw/hw07/hw07.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -105,7 +105,10 @@ { "cell_type": "markdown", "id": "cell-hw07-6", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** Given the information above, what was the sample size for the data, and what was the percentage of people who got **Vaccine 1?** **(4 points)**\n", "\n", @@ -144,7 +147,10 @@ { "cell_type": "markdown", "id": "cell-hw07-9", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -164,7 +170,10 @@ { "cell_type": "markdown", "id": "cell-hw07-11", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -186,7 +195,10 @@ { "cell_type": "markdown", "id": "cell-hw07-13", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -227,7 +239,10 @@ { "cell_type": "markdown", "id": "cell-hw07-16", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.5.** Using your answer from Questions 1.1 and 1.4, find the observed value of the test statistic and assign it to the variable `observed_statistic`. **(4 points)**\n" ] @@ -261,7 +276,10 @@ { "cell_type": "markdown", "id": "cell-hw07-19", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.6.** In order to perform this hypothesis test, you must simulate the test statistic. From the four options below, pick the assumption that is needed for this simulation. Assign `assumption_needed` to an integer corresponding to the assumption. **(4 points)**\n", "\n", @@ -300,7 +318,10 @@ { "cell_type": "markdown", "id": "cell-hw07-22", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -336,7 +357,10 @@ { "cell_type": "markdown", "id": "cell-hw07-24", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n" @@ -358,7 +382,10 @@ { "cell_type": "markdown", "id": "cell-hw07-26", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.8.** Using `simulated_statistics`, `observed_statistic`, and `num_simulations`, find the empirical p-value based on the simulation. **(4 points)**\n" ] @@ -392,7 +419,10 @@ { "cell_type": "markdown", "id": "cell-hw07-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.9.** Assign `correct_doctor` to the number corresponding to the correct statement below. Use the 5% cutoff for the p-value. **(4 points)**\n", "\n", @@ -516,7 +546,10 @@ { "cell_type": "markdown", "id": "cell-hw07-39", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -539,7 +572,10 @@ { "cell_type": "markdown", "id": "cell-hw07-41", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -580,7 +616,10 @@ { "cell_type": "markdown", "id": "cell-hw07-44", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.3.** Create an array called `simulated_tvds` that contains 10,000 simulated values under the null hypothesis. Assume that the original sample consisted of 1,000 individuals. **(4 points)**\n", "\n", @@ -637,7 +676,10 @@ { "cell_type": "markdown", "id": "cell-hw07-49", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.4.** Use your simulated statistics to calculate the p-value of your test. Make sure that this number is consistent with what you observed in the histogram above. **(4 points)**\n" ] @@ -671,7 +713,10 @@ { "cell_type": "markdown", "id": "cell-hw07-52", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -689,7 +734,10 @@ { "cell_type": "markdown", "id": "cell-hw07-54", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -712,7 +760,10 @@ { "cell_type": "markdown", "id": "cell-hw07-56", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.** How many females were there in our sample? Please use the provided skeleton code. **(4 points)**\n", "\n", @@ -748,7 +799,10 @@ { "cell_type": "markdown", "id": "cell-hw07-59", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.2.** Complete the cell below so that `avg_male_vs_female` evaluates to `True` if the sampled males are older than the sampled females on average, and `False` otherwise. Use Python code to achieve this. **(4 points)**\n" ] @@ -784,7 +838,10 @@ { "cell_type": "markdown", "id": "cell-hw07-62", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.3.** The data scientists want to use the data to test whether males are older than females\u2014or, in other words, whether the ages of the two groups have the same distribution. One of the following statements is their null hypothesis and another is their alternative hypothesis. Assign `null_statement_number` and `alternative_statement_number` to the numbers of the correct statements in the code cell below. **(4 points)**\n", "\n", @@ -825,7 +882,10 @@ { "cell_type": "markdown", "id": "cell-hw07-65", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.4.** The data scientists have decided to use a permutation test. Assign `permutation_test_reason` to the number corresponding to the reason they made this choice. **(4 points)**\n", "\n", @@ -863,7 +923,10 @@ { "cell_type": "markdown", "id": "cell-hw07-68", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.5.** To test their hypotheses, the data scientists have followed our textbook's advice and chosen a test statistic where the following statement is true: Large values of the test statistic favor the alternative hypothesis.\n", "\n", @@ -902,7 +965,10 @@ { "cell_type": "markdown", "id": "cell-hw07-71", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.6.** Complete the cell below so that `observed_statistic_ab` evaluates to the observed value of the data scientists' test statistic. Use as many lines of code as you need, and remember that you can use any quantity, table, or array that you created earlier. **(4 points)**\n" ] @@ -936,7 +1002,10 @@ { "cell_type": "markdown", "id": "cell-hw07-74", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.7.** Assign `shuffled_labels` to an array of shuffled male/female labels. The rest of the code puts the array in a table along with the data in `sampled_ages`. **(4 points)**\n", "\n", @@ -973,7 +1042,10 @@ { "cell_type": "markdown", "id": "cell-hw07-77", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.8.** The comparison below uses the array `shuffled_labels` from Question 3.7 and the count `num_females` from Question 3.1.\n", "\n", @@ -1015,7 +1087,10 @@ { "cell_type": "markdown", "id": "cell-hw07-80", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.9.** Define a function `simulate_one_statistic` that takes no arguments and returns one simulated value of the test statistic. We've given you a skeleton, but feel free to approach this question in a way that makes sense to you. Use as many lines of code as you need. Refer to the code you have previously written in this problem, as you might be able to re-use some of it. **(4 points)**\n" ] @@ -1071,7 +1146,10 @@ { "cell_type": "markdown", "id": "cell-hw07-85", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.10.** Complete the cell to simulate 4,000 values of the statistic. We have included the code that draws the empirical distribution of the statistic and shows the value of `observed_statistic_ab` from Question 3.6. Feel free to use as many lines of code as you need. **(4 points)**\n", "\n", @@ -1115,7 +1193,10 @@ { "cell_type": "markdown", "id": "cell-hw07-88", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.11.** Use the simulation to find an empirical approximation to the p-value. Assign `p_val` to the appropriate p-value from this simulation. Then, assign `conclusion` to either `null_hyp` or `alt_hyp`. **(4 points)** \n", "\n", @@ -1222,7 +1303,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.0" + "version": "3.1.-1" }, "otter": { "OK_FORMAT": true, @@ -1281,7 +1362,7 @@ "locked": false }, { - "code": ">>> any((valid_test_stat == x for x in np.arange(1,5)))\nTrue", + "code": ">>> any((valid_test_stat == x for x in np.arange(1, 5)))\nTrue", "hidden": false, "locked": false } @@ -1389,7 +1470,7 @@ "locked": false }, { - "code": ">>> any((correct_doctor == x for x in (1,2)))\nTrue", + "code": ">>> any((correct_doctor == x for x in (1, 2)))\nTrue", "hidden": false, "locked": false } @@ -1450,12 +1531,12 @@ "locked": false }, { - "code": ">>> # Make sure that your test statistic is not always the same value \n>>> len(np.unique(simulated_tvds)) != 1\nTrue", + "code": ">>> len(np.unique(simulated_tvds)) != 1\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Make sure you're simulating under the null hypothesis\n>>> np.mean(simulated_tvds) < 0.1\nTrue", + "code": ">>> np.mean(simulated_tvds) < 0.1\nTrue", "hidden": false, "locked": false } @@ -1607,12 +1688,12 @@ "locked": false }, { - "code": ">>> any(null_statement_number == x for x in np.arange(1,7))\nTrue", + "code": ">>> any((null_statement_number == x for x in np.arange(1, 7)))\nTrue", "hidden": false, "locked": false }, { - "code": ">>> any(alternative_statement_number == x for x in np.arange(1,7))\nTrue", + "code": ">>> any((alternative_statement_number == x for x in np.arange(1, 7)))\nTrue", "hidden": false, "locked": false }, @@ -1644,7 +1725,7 @@ "locked": false }, { - "code": ">>> any(permutation_test_reason == x for x in np.arange(1,4))\nTrue", + "code": ">>> any((permutation_test_reason == x for x in np.arange(1, 4)))\nTrue", "hidden": false, "locked": false } @@ -1671,7 +1752,7 @@ "locked": false }, { - "code": ">>> any(correct_test_stat == x for x in np.arange(1,3))\nTrue", + "code": ">>> any((correct_test_stat == x for x in np.arange(1, 3)))\nTrue", "hidden": false, "locked": false } @@ -1726,7 +1807,7 @@ "locked": false }, { - "code": ">>> original_with_shuffled_labels.labels == (\"Gender\", \"Age\", \"Shuffled Label\")\nTrue", + "code": ">>> original_with_shuffled_labels.labels == ('Gender', 'Age', 'Shuffled Label')\nTrue", "hidden": false, "locked": false }, @@ -1758,7 +1839,7 @@ "locked": false }, { - "code": ">>> any(correct_q8 == x for x in np.arange(1,4))\nTrue", + "code": ">>> any((correct_q8 == x for x in np.arange(1, 4)))\nTrue", "hidden": false, "locked": false } diff --git a/hw/hw08/hw08.ipynb b/hw/hw08/hw08.ipynb index cbf9dfc..de70053 100644 --- a/hw/hw08/hw08.ipynb +++ b/hw/hw08/hw08.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -127,7 +127,10 @@ { "cell_type": "markdown", "id": "cell-hw08-8", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** Complete the function `one_resampled_percentage` below. It should return Imm Thai's ***percentage*** of votes after taking the original table (`tbl`) and performing one bootstrap sample of it. Reminder that a percentage is between 0 and 100. **(9 Points)**\n", "\n", @@ -167,7 +170,10 @@ { "cell_type": "markdown", "id": "cell-hw08-11", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.** Complete the `percentages_in_resamples` function such that it simulates and returns an array of 2022 elements, where each element represents a bootstrapped estimate of the percentage of voters who will vote for Imm Thai. You should use the `one_resampled_percentage` function you wrote above. **(9 Points)**\n" ] @@ -224,7 +230,10 @@ { "cell_type": "markdown", "id": "cell-hw08-16", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.3.** Using the array `resampled_percentages`, find the values at the two edges of the middle 95% of the bootstrapped percentage estimates. Compute the lower and upper ends of the interval, named `imm_lower_bound` and `imm_upper_bound` respectively. **(9 Points)**\n", "\n", @@ -262,7 +271,10 @@ { "cell_type": "markdown", "id": "cell-hw08-19", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.4.** The survey results seem to indicate that Imm Thai is beating all the other Thai restaurants among the voters. We would like to use confidence intervals to determine a range of likely values for Imm Thai's true lead over all the other restaurants combined. The calculation for Imm Thai's lead over Lucky House, Thai Temple, and Thai Basil combined is:\n", "\n", @@ -308,7 +320,10 @@ { "cell_type": "markdown", "id": "cell-hw08-22", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -337,7 +352,10 @@ { "cell_type": "markdown", "id": "cell-hw08-24", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -395,7 +413,10 @@ { "cell_type": "markdown", "id": "cell-hw08-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -420,7 +441,10 @@ { "cell_type": "markdown", "id": "cell-hw08-31", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -475,7 +499,10 @@ { "cell_type": "markdown", "id": "cell-hw08-35", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.3.** Say we use a 5% p-value cutoff. Do we reject the null, fail to reject the null, or are we unable to tell using the staff's confidence interval? **(9 Points)**\n", "\n", @@ -517,7 +544,10 @@ { "cell_type": "markdown", "id": "cell-hw08-38", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.4.** What if, instead, we use a p-value cutoff of 1%? Do we reject the null, fail to reject the null, or are we unable to tell using our staff confidence interval? **(9 Points)**\n", "\n", @@ -557,7 +587,10 @@ { "cell_type": "markdown", "id": "cell-hw08-41", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.5.** What if we use a p-value cutoff of 10%? Do we reject, fail to reject, or are we unable to tell using our confidence interval? **(9 Points)**\n", "\n", @@ -678,12 +711,12 @@ { "cases": [ { - "code": ">>> type(one_resampled_percentage(votes)) in set([float, np.float64]) \nTrue", + "code": ">>> type(one_resampled_percentage(votes)) in set([float, np.float64])\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Remember, the question asks for a percentage, not a proportion. \n>>> one_resampled_percentage(votes) <= 0\nFalse", + "code": ">>> one_resampled_percentage(votes) <= 0\nFalse", "hidden": false, "locked": false }, @@ -751,7 +784,7 @@ { "cases": [ { - "code": ">>> type(one_resampled_difference(votes)) in set([float, np.float64]) \nTrue", + "code": ">>> type(one_resampled_difference(votes)) in set([float, np.float64])\nTrue", "hidden": false, "locked": false } @@ -821,12 +854,12 @@ "locked": false }, { - "code": ">>> # Remember, we are using a 95% confidence interval of [1.2, 11.2].\n>>> # Our null hypothesis claims that Imm Thai's lead is 0.\n>>> # This falls outside of our 95% confidence interval.\n>>> # What can we conclude if we use a 5% p-value cutoff? \n>>> cutoff_five_percent == 3\nFalse", + "code": ">>> cutoff_five_percent == 3\nFalse", "hidden": false, "locked": false }, { - "code": ">>> # We are using a 95% confidence interval of [1.2, 11.2].\n>>> # Our null hypothesis claims that Imm Thai's lead is 0.\n>>> # This falls outside of our 95% confidence interval. \n>>> # 100% - 95% = 5%, which is our p-value cutoff.\n>>> # At the 5% level of significance, 0 doesn't seem like a plausible value\n>>> # for Imm Thai's lead. We would reject the null.\n>>> # Remember, there is a duality between confidence intervals and tests:\n>>> # if you are testing whether or not the true lead is a particular value x,\n>>> # and you use the 5% cutoff for the p-value, then you will reject the null\n>>> # if x is not in your 95% confidence interval for the lead.\n>>> cutoff_five_percent == 2\nFalse", + "code": ">>> cutoff_five_percent == 2\nFalse", "hidden": false, "locked": false } diff --git a/hw/hw09/hw09.ipynb b/hw/hw09/hw09.ipynb index 807b734..9c24533 100644 --- a/hw/hw09/hw09.ipynb +++ b/hw/hw09/hw09.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -99,7 +99,10 @@ { "cell_type": "markdown", "id": "cell-hw09-6", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -117,7 +120,10 @@ { "cell_type": "markdown", "id": "cell-hw09-8", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -137,7 +143,10 @@ { "cell_type": "markdown", "id": "cell-hw09-10", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -159,7 +168,10 @@ { "cell_type": "markdown", "id": "cell-hw09-12", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -173,7 +185,10 @@ { "cell_type": "markdown", "id": "cell-hw09-13", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.** Assign `smallest` to the smallest number of students they should sample to ensure that a **95%** confidence interval for the parameter has a width of no more than 6% from left end to right end. **(6 points)**\n", "\n", @@ -211,7 +226,10 @@ { "cell_type": "markdown", "id": "cell-hw09-16", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -233,7 +251,10 @@ { "cell_type": "markdown", "id": "cell-hw09-18", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -339,7 +360,10 @@ { "cell_type": "markdown", "id": "cell-hw09-28", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -357,7 +381,10 @@ { "cell_type": "markdown", "id": "cell-hw09-30", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n" @@ -377,7 +404,10 @@ { "cell_type": "markdown", "id": "cell-hw09-32", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.5.** The cell above shows that the proportion that is at most 2.33 SDs above average in a normal distribution is 99%. Assign `option` to the right option to fill in the blank: **(6 points)**\n", "\n", @@ -486,7 +516,10 @@ { "cell_type": "markdown", "id": "cell-hw09-39", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.** Michelle wants to use 10,000 bootstrap resamples to compute a confidence interval for the proportion of all California voters who will vote Yes. \n", "\n", @@ -533,7 +566,10 @@ { "cell_type": "markdown", "id": "cell-hw09-42", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -551,7 +587,10 @@ { "cell_type": "markdown", "id": "cell-hw09-44", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -565,7 +604,10 @@ { "cell_type": "markdown", "id": "cell-hw09-45", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.3.** Using only the Central Limit Theorem and the numbers of Yes and No voters in our sample of 400, *algebraically* compute the predicted standard deviation of the `resample_yes_proportions` array. Assign this number to `approximate_sd`. **Do not access the data in `resample_yes_proportions` in any way.** **(6 points)**\n", "\n", @@ -608,7 +650,10 @@ { "cell_type": "markdown", "id": "cell-hw09-48", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.4.** Compute the standard deviation of the array `resample_yes_proportions`, which will act as an approximation to the true SD of the possible sample proportions. This will help verify whether your answer to question 3.3 is approximately correct. **(6 points)**\n" ] @@ -648,7 +693,10 @@ { "cell_type": "markdown", "id": "cell-hw09-51", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.5.** **Again, without accessing `resample_yes_proportions` in any way**, compute an approximate 95% confidence interval for the proportion of Yes voters in California. **(6 points)**\n", "\n", @@ -727,7 +775,10 @@ { "cell_type": "markdown", "id": "cell-hw09-56", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.6.** Does Michelle's sample size achieve the desired standard deviation of sample means? What SD would you achieve with a smaller sample size? A higher sample size?\n", "\n", @@ -772,7 +823,10 @@ { "cell_type": "markdown", "id": "cell-hw09-59", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.7.** Next, compute the SD of sample means that you would get from a smaller sample size. Ideally, you should pick a number that is significantly smaller, but any sample size smaller than Michelle's will do. **(5 points)**\n" ] @@ -813,7 +867,10 @@ { "cell_type": "markdown", "id": "cell-hw09-62", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.8.** Finally, compute the SD of sample means that you would get from a larger sample size. Here, a number that is significantly larger would make any difference more obvious, but any sample size larger than Michelle's will do. **(5 points)**\n", "\n" @@ -855,7 +912,10 @@ { "cell_type": "markdown", "id": "cell-hw09-65", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.9.** Based off of this, was Michelle's sample size approximately the minimum sufficient sample, given her assumption that the sample SD is the same as the population SD? Assign `min_sufficient` to `True` if 9,975 was indeed approximately the minimum sufficient sample, and `False` if it wasn't. **(4 points)**\n" ] @@ -1043,7 +1103,7 @@ "locked": false }, { - "code": ">>> abs(np.mean(resample_yes_proportions) - .525) < .025 and np.std(resample_yes_proportions) < .08\nTrue", + "code": ">>> abs(np.mean(resample_yes_proportions) - 0.525) < 0.025 and np.std(resample_yes_proportions) < 0.08\nTrue", "hidden": false, "locked": false } @@ -1190,7 +1250,7 @@ { "cases": [ { - "code": ">>> min_sufficient or not min_sufficient \nTrue", + "code": ">>> min_sufficient or not min_sufficient\nTrue", "hidden": false, "locked": false } diff --git a/hw/hw10/hw10.ipynb b/hw/hw10/hw10.ipynb index 571e80d..9e41b05 100644 --- a/hw/hw10/hw10.ipynb +++ b/hw/hw10/hw10.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -122,7 +122,10 @@ { "cell_type": "markdown", "id": "cell-hw10-8", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** Create a function `standard_units` that converts the values in the array `data` to standard units. **(5 points)**\n" ] @@ -156,7 +159,10 @@ { "cell_type": "markdown", "id": "cell-hw10-11", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.** Now, using the `standard_units` function, define the function `correlation` which computes the correlation between `x` and `y`. **(5 points)**\n" ] @@ -190,7 +196,10 @@ { "cell_type": "markdown", "id": "cell-hw10-14", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -222,7 +231,10 @@ { "cell_type": "markdown", "id": "cell-hw10-16", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -242,7 +254,10 @@ { "cell_type": "markdown", "id": "cell-hw10-18", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -289,7 +304,10 @@ { "cell_type": "markdown", "id": "cell-hw10-21", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.6.** Now suppose you want to go the other way and predict a triple jump distance given a vertical jump distance. What would the regression parameters of this linear model be? How do they compare to the regression parameters from the model where you were predicting vertical jump distance given a triple jump distance (in Question 1.5)? **(5 points)**\n", "\n", @@ -325,7 +343,10 @@ { "cell_type": "markdown", "id": "cell-hw10-24", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.7.** Let's use `parameters` (from Question 1.5) to predict what certain athletes' vertical jump heights would be given their triple jump distances. **(5 points)**\n", "\n", @@ -364,7 +385,10 @@ { "cell_type": "markdown", "id": "cell-hw10-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -384,7 +408,10 @@ { "cell_type": "markdown", "id": "cell-hw10-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -445,7 +472,10 @@ { "cell_type": "markdown", "id": "cell-hw10-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -472,7 +502,10 @@ { "cell_type": "markdown", "id": "cell-hw10-35", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -513,7 +546,10 @@ { "cell_type": "markdown", "id": "cell-hw10-38", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.3.** Write a function `eth_predictor` which takes an opening BTC price and predicts the opening price of ETH. Again, it will be helpful to use the function `parameter_estimates` that you defined earlier in this homework. **(5 points)**\n", "\n", @@ -555,7 +591,10 @@ { "cell_type": "markdown", "id": "cell-hw10-41", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -585,7 +624,10 @@ { "cell_type": "markdown", "id": "cell-hw10-43", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -605,7 +647,10 @@ { "cell_type": "markdown", "id": "cell-hw10-45", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -680,7 +725,10 @@ { "cell_type": "markdown", "id": "cell-hw10-52", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.** Compute the correlation coefficient between outcomes and spreads. **(5 points)**\n", "\n", @@ -717,7 +765,10 @@ { "cell_type": "markdown", "id": "cell-hw10-55", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.2.** Among games with a spread between 3.5 and 6.5 (including both 3.5 and 6.5), what was the average outcome? **(5 points)**\n" ] @@ -753,7 +804,10 @@ { "cell_type": "markdown", "id": "cell-hw10-58", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.3.** Use the function `parameter_estimates` that you defined earlier to compute the least-squares linear regression line that predicts outcomes from spreads, in original units. We have provided a two column table for you in the cell below with the first column representing `Spread` (x) and the second column representing `Outcome` (y), which you should use as an argument to the function. **(5 points)**\n" ] @@ -793,7 +847,10 @@ { "cell_type": "markdown", "id": "cell-hw10-61", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -811,7 +868,10 @@ { "cell_type": "markdown", "id": "cell-hw10-63", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -823,7 +883,10 @@ { "cell_type": "markdown", "id": "cell-hw10-64", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.5.** Define a function called `errors`. It should take three arguments:\n", "1. a table `tbl` like `spreads` (with the same column names and meanings, but not necessarily the same data)\n", @@ -864,7 +927,10 @@ { "cell_type": "markdown", "id": "cell-hw10-67", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.6.** Using `errors`, compute the errors for the line with slope `0.5` and intercept `25` on the `spreads` dataset. Name that array `outcome_errors`. Then, make a scatter plot of the errors. **(3 points)**\n", "\n", @@ -909,7 +975,10 @@ { "cell_type": "markdown", "id": "cell-hw10-71", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.7.** Define a function called `fit_line`. It should take a table like `spreads` (with the same column names and meanings) as its argument. It should return an array containing the slope (as the first element) and intercept (as the second element) of the least-squares regression line predicting outcome from spread for that table. **(3 points)**\n", "\n", @@ -957,7 +1026,10 @@ { "cell_type": "markdown", "id": "cell-hw10-74", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.8.** Use `fit_line` to fit a line to `spreads`, and assign the output to `best_line`. Assign the first and second elements in `best_line` to `best_line_slope` and `best_line_intercept`, respectively.\n", "\n", @@ -1008,7 +1080,10 @@ { "cell_type": "markdown", "id": "cell-hw10-77", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1026,7 +1101,10 @@ { "cell_type": "markdown", "id": "cell-hw10-79", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1109,17 +1187,17 @@ { "cases": [ { - "code": ">>> standard_units([1,2,3,4,5])\narray([-1.41421356, -0.70710678, 0. , 0.70710678, 1.41421356])", + "code": ">>> standard_units([1, 2, 3, 4, 5])\narray([-1.41421356, -0.70710678, 0. , 0.70710678, 1.41421356])", "hidden": false, "locked": false }, { - "code": ">>> np.isclose(np.mean(standard_units([1,2,3,4,5])), 0)\nTrue", + "code": ">>> np.isclose(np.mean(standard_units([1, 2, 3, 4, 5])), 0)\nTrue", "hidden": false, "locked": false }, { - "code": ">>> np.isclose(np.std(standard_units([1,2,3,4,5])), 0.9999999999999999)\nTrue", + "code": ">>> np.isclose(np.std(standard_units([1, 2, 3, 4, 5])), 0.9999999999999999)\nTrue", "hidden": false, "locked": false } @@ -1140,7 +1218,7 @@ { "cases": [ { - "code": ">>> np.isclose(correlation([1,2,3], [4,5,6]), 0.9999999999999999)\nTrue", + "code": ">>> np.isclose(correlation([1, 2, 3], [4, 5, 6]), 0.9999999999999999)\nTrue", "hidden": false, "locked": false } @@ -1168,7 +1246,7 @@ "locked": false }, { - "code": ">>> # Make sure your function is returning an array!\n>>> type(parameters) == np.ndarray\nTrue", + "code": ">>> type(parameters) == np.ndarray\nTrue", "hidden": false, "locked": false }, @@ -1195,7 +1273,7 @@ { "cases": [ { - "code": ">>> # Make sure regression_changes is a numpy array with 3 items!\n>>> len(regression_changes) == 3 and type(regression_changes) == np.ndarray\nTrue", + "code": ">>> len(regression_changes) == 3 and type(regression_changes) == np.ndarray\nTrue", "hidden": false, "locked": false }, @@ -1243,12 +1321,12 @@ { "cases": [ { - "code": ">>> # Correlation is a number between -1 and 1\n>>> -1 <= r <= 1\nTrue", + "code": ">>> -1 <= r <= 1\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # It appears that you implemented std_units, but did so incorrectly\n>>> standard_units(np.arange(5)) is None or np.allclose(standard_units(np.arange(5)), [-1.41421356, -0.70710678, 0, 0.70710678, 1.41421356])\nTrue", + "code": ">>> standard_units(np.arange(5)) is None or np.allclose(standard_units(np.arange(5)), [-1.41421356, -0.70710678, 0, 0.70710678, 1.41421356])\nTrue", "hidden": false, "locked": false } @@ -1386,7 +1464,7 @@ { "cases": [ { - "code": ">>> # If you're not passing this test, double check your errors function!\n>>> np.round(outcome_errors.item(0), 2) == -25.25\nTrue", + "code": ">>> np.round(outcome_errors.item(0), 2) == -25.25\nTrue", "hidden": false, "locked": false } diff --git a/hw/hw11/hw11.ipynb b/hw/hw11/hw11.ipynb index 82a64bf..cd910d6 100644 --- a/hw/hw11/hw11.ipynb +++ b/hw/hw11/hw11.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -183,7 +183,10 @@ { "cell_type": "markdown", "id": "cell-hw11-12", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -207,7 +210,10 @@ { "cell_type": "markdown", "id": "cell-hw11-14", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -256,7 +262,10 @@ { "cell_type": "markdown", "id": "cell-hw11-17", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.3.** Using the functions you just implemented, create a function called `fit_line`. It should take a table like `birds` and the column names associated to *x* and *y* as its arguments and return an *array* containing the slope and intercept of the regression line (in that order) that predicts the *y* column in the table using the *x* column. **(8 points)**\n" ] @@ -338,7 +347,10 @@ { "cell_type": "markdown", "id": "cell-hw11-23", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -383,7 +395,10 @@ { "cell_type": "markdown", "id": "cell-hw11-26", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -425,7 +440,10 @@ { "cell_type": "markdown", "id": "cell-hw11-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -443,7 +461,10 @@ { "cell_type": "markdown", "id": "cell-hw11-31", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -465,7 +486,10 @@ { "cell_type": "markdown", "id": "cell-hw11-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -485,7 +509,10 @@ { "cell_type": "markdown", "id": "cell-hw11-35", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.** Define the function `fitted_value`. It should take in four arguments:\n", "\n", @@ -542,7 +569,10 @@ { "cell_type": "markdown", "id": "cell-hw11-38", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.2.** Raymond, the resident Snowy Plover expert at Point Reyes, tells us that the egg he has been carefully observing has a weight of 9 grams. Using `fitted_value` above, assign the variable `experts_egg` to the predicted bird weight for Raymond's egg. \n" ] @@ -606,7 +636,10 @@ { "cell_type": "markdown", "id": "cell-hw11-43", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.3.** Define a function `compute_resampled_line` that takes in a table `tbl`and two column names, `x_col` and `y_col`, and returns an array containing the parameters of the best fit line (slope and intercept) for one bootstrapped resample of the table.\n" ] @@ -688,7 +721,10 @@ { "cell_type": "markdown", "id": "cell-hw11-48", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -733,7 +769,10 @@ { "cell_type": "markdown", "id": "cell-hw11-51", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -777,7 +816,10 @@ { "cell_type": "markdown", "id": "cell-hw11-54", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.6.** Set `plover_statements` to an array of integer(s) that correspond to statement(s) that are true. **(6 points)**\n", "\n", @@ -900,7 +942,7 @@ { "cases": [ { - "code": ">>> # Ensure your correlation function returns one number between -1 and 1\n>>> abs(correlation(Table().with_columns('a', np.random.normal(0, 1, 10),'b', np.random.normal(0, 1, 10)), \"a\", \"b\")) <= 1\nTrue", + "code": ">>> abs(correlation(Table().with_columns('a', np.random.normal(0, 1, 10), 'b', np.random.normal(0, 1, 10)), 'a', 'b')) <= 1\nTrue", "hidden": false, "locked": false } @@ -923,17 +965,17 @@ { "cases": [ { - "code": ">>> type(fit_line(Table().with_columns('x', make_array(0, 1), 'y', make_array(1, 3)), \"x\", \"y\")) == np.ndarray\nTrue", + "code": ">>> type(fit_line(Table().with_columns('x', make_array(0, 1), 'y', make_array(1, 3)), 'x', 'y')) == np.ndarray\nTrue", "hidden": false, "locked": false }, { - "code": ">>> len(fit_line(Table().with_columns('x', make_array(0, 1), 'y', make_array(1, 3)), \"x\", \"y\")) == 2\nTrue", + "code": ">>> len(fit_line(Table().with_columns('x', make_array(0, 1), 'y', make_array(1, 3)), 'x', 'y')) == 2\nTrue", "hidden": false, "locked": false }, { - "code": ">>> np.allclose(np.round(fit_line(Table().with_columns('x', make_array(0, 1), 'y', make_array(1, 3)), \"x\", \"y\"), 5), np.array([2, 1]))\nTrue", + "code": ">>> np.allclose(np.round(fit_line(Table().with_columns('x', make_array(0, 1), 'y', make_array(1, 3)), 'x', 'y'), 5), np.array([2, 1]))\nTrue", "hidden": false, "locked": false } @@ -1042,7 +1084,7 @@ { "cases": [ { - "code": ">>> len(compute_resampled_line(Table().with_columns('x', make_array(0, 1), 'y', make_array(1, 3)), \"x\", \"y\")) == 2\nTrue", + "code": ">>> len(compute_resampled_line(Table().with_columns('x', make_array(0, 1), 'y', make_array(1, 3)), 'x', 'y')) == 2\nTrue", "hidden": false, "locked": false } diff --git a/hw/hw12/hw12.ipynb b/hw/hw12/hw12.ipynb index 70314a3..9243cae 100644 --- a/hw/hw12/hw12.ipynb +++ b/hw/hw12/hw12.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -183,7 +183,10 @@ { "cell_type": "markdown", "id": "cell-hw12-14", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** Let's begin implementing the k-Nearest Neighbors algorithm. Define the `distance` function, which takes in two arguments: an array of numerical features (`arr1`), and a different array of numerical features (`arr2`). The function should return the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) between the two arrays. Euclidean distance is often referred to as the straight-line distance formula that you may have learned previously. **(10 points)**\n", "\n" @@ -237,7 +240,10 @@ { "cell_type": "markdown", "id": "cell-hw12-18", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.** Next, let's split our dataset into a training set and a test set. Since `coordinates` has 100 rows, let's create a training set with the first 75 rows and a test set with the remaining 25 rows. Remember that assignment to each group should be random, so we should shuffle the table first. **(10 points)**\n", "\n", @@ -279,7 +285,10 @@ { "cell_type": "markdown", "id": "cell-hw12-21", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.3.** Assign `features` to an array of column names (strings) of the features from the `coordinates` table. **(10 points)**\n", "\n", @@ -318,7 +327,10 @@ { "cell_type": "markdown", "id": "cell-hw12-24", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.4.** Now define the `classify` function. This function should take in a `test_row` from a table like `test` and classify in using the k-Nearest Neighbors based on the correct `features` and the data in `train`. A refresher on k-Nearest Neighbors can be found [here](https://www.inferentialthinking.com/chapters/17/4/Implementing_the_Classifier.html). **(10 points)**\n", "\n", @@ -380,7 +392,10 @@ { "cell_type": "markdown", "id": "cell-hw12-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "
\\pagebreak
\n", "\n", @@ -429,7 +444,10 @@ { "cell_type": "markdown", "id": "cell-hw12-30", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.6.** There are 77 rows of Berkeley students and 23 rows of Stanford students in the `coordinates` table. If we used the entire `coordinates` table as the training set, what is the smallest value of k that would ensure that a k-Nearest Neighbor classifier would *always* predict Berkeley as the class? Assign the value to `k`. **(10 points)**\n" ] @@ -463,7 +481,10 @@ { "cell_type": "markdown", "id": "cell-hw12-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -483,7 +504,10 @@ { "cell_type": "markdown", "id": "cell-hw12-35", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -503,7 +527,10 @@ { "cell_type": "markdown", "id": "cell-hw12-37", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -526,7 +553,10 @@ { "cell_type": "markdown", "id": "cell-hw12-38", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.9.1.** Given that a row is in the test set, what is the probability that it corresponds to a Stanford student? Assign that probability to `prob_furd`. **(10 points)**\n" ] @@ -560,7 +590,10 @@ { "cell_type": "markdown", "id": "cell-hw12-41", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.9.2.** Given that a row is Stanford, what is the probability that the student is in the test set? Assign that probability to `prob_test`. **(10 points)**\n" ] @@ -805,12 +838,12 @@ { "cases": [ { - "code": ">>> # Double check that you have the correct number of rows for the `train` table.\n>>> train.num_rows == 75\nTrue", + "code": ">>> train.num_rows == 75\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Double check that you have the correct number of rows for the `test` table.\n>>> test.num_rows == 25\nTrue", + "code": ">>> test.num_rows == 25\nTrue", "hidden": false, "locked": false } @@ -873,7 +906,7 @@ { "cases": [ { - "code": ">>> sorted_coordinates = coordinates.sort(\"school\")\n>>> classify(sorted_coordinates.row(29), 3, train) == three_classify(sorted_coordinates.row(29))\nTrue", + "code": ">>> sorted_coordinates = coordinates.sort('school')\n>>> classify(sorted_coordinates.row(29), 3, train) == three_classify(sorted_coordinates.row(29))\nTrue", "hidden": false, "locked": false } @@ -894,7 +927,7 @@ { "cases": [ { - "code": ">>> # `k` should be an int\n>>> type(k) == int\nTrue", + "code": ">>> type(k) == int\nTrue", "hidden": false, "locked": false } @@ -921,7 +954,7 @@ "locked": false }, { - "code": ">>> # Should be a decimal, not a percentage\n>>> 0 <= prob_furd <= 1\nTrue", + "code": ">>> 0 <= prob_furd <= 1\nTrue", "hidden": false, "locked": false } @@ -948,7 +981,7 @@ "locked": false }, { - "code": ">>> # Should be a decimal, not a percentage\n>>> 0 <= prob_test <= 1\nTrue", + "code": ">>> 0 <= prob_test <= 1\nTrue", "hidden": false, "locked": false } diff --git a/lab/lab01/lab01.ipynb b/lab/lab01/lab01.ipynb index 7d77d3d..5558944 100644 --- a/lab/lab01/lab01.ipynb +++ b/lab/lab01/lab01.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -361,7 +361,10 @@ { "cell_type": "markdown", "id": "cell-lab01-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "In standard math notation, the first expression is\n", "\n", @@ -549,7 +552,10 @@ { "cell_type": "markdown", "id": "cell-lab01-45", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.2.** Assign the name `seconds_in_a_decade` to the number of seconds between midnight January 1, 2010 and midnight January 1, 2020. Note that there are two leap years in this span of a decade. A non-leap year has 365 days and a leap year has 366 days.\n", "\n", @@ -670,7 +676,10 @@ { "cell_type": "markdown", "id": "cell-lab01-53", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.3.1.** Complete the code in the next cell to fill in the data from the experiment.\n", "\n", @@ -712,7 +721,10 @@ { "cell_type": "markdown", "id": "cell-lab01-56", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.3.2.** Now, complete the code in the next cell to compute the difference between the predicted and estimated distances (in meters) that the hammer fell in this experiment.\n", "\n", @@ -806,7 +818,10 @@ { "cell_type": "markdown", "id": "cell-lab01-62", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "## 4.1. Application: Computing Walking Distances\n", "Chunhua is on the corner of 7th Avenue and 42nd Street in Midtown Manhattan, and she wants to know far she'd have to walk to get to Gramercy School on the corner of 10th Avenue and 34th Street.\n", @@ -936,7 +951,10 @@ { "cell_type": "markdown", "id": "cell-lab01-70", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.1.** Say that Genghis's length is 16.7 inches. In the next cell, use `abs` to compute the absolute value of the difference between Genghis's length and the average cat length. Give that value the name `genghis_distance_from_average_in`.\n" ] @@ -1038,7 +1056,10 @@ { "cell_type": "markdown", "id": "cell-lab01-77", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Ok, your turn. \n", "\n", @@ -1171,17 +1192,17 @@ { "cases": [ { - "code": ">>> # It looks like you didn't give anything the name\n>>> # seconds_in_a_decade. Maybe there's a typo, or maybe you\n>>> # just need to run the cell below Question 3.2 where you defined\n>>> # seconds_in_a_decade. (Click that cell and then click the \"run\n>>> # cell\" button in the menu bar above.)\n>>> 'seconds_in_a_decade' in vars()\nTrue", + "code": ">>> 'seconds_in_a_decade' in vars()\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # It looks like you didn't change the cell to define\n>>> # seconds_in_a_decade appropriately. It should be a number,\n>>> # computed using Python's arithmetic. For example, this is\n>>> # almost right:\n>>> # seconds_in_a_decade = 10*365*24*60*60\n>>> seconds_in_a_decade != ...\nTrue", + "code": ">>> seconds_in_a_decade != ...\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # It looks like you didn't account for leap years.\n>>> # There were 2 leap years and 8 non-leap years in this period.\n>>> # Leap years have 366 days instead of 365.\n>>> seconds_in_a_decade != 315360000\nTrue", + "code": ">>> seconds_in_a_decade != 315360000\nTrue", "hidden": false, "locked": false }, @@ -1205,27 +1226,27 @@ { "cases": [ { - "code": ">>> # Fill in the row\n>>> # time = ...\n>>> # with something like:\n>>> # time = 4.567\n>>> # (except with the right number).\n>>> time != ...\nTrue", + "code": ">>> time != ...\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Read the text above the question to see what\n>>> # time should be. \n>>> round(time, 5)\n1.2", + "code": ">>> round(time, 5)\n1.2", "hidden": false, "locked": false }, { - "code": ">>> # Fill in the row\n>>> # estimated_distance_m = ...\n>>> # with something like:\n>>> # estimated_distance_m = 4.567\n>>> # (except with the right number). \n>>> estimated_distance_m != ...\nTrue", + "code": ">>> estimated_distance_m != ...\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Note that the units are meters, but the text used\n>>> # centimeters.\n>>> estimated_distance_m != 113\nTrue", + "code": ">>> estimated_distance_m != 113\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Read the text above the question to see what\n>>> # estimated_distance_m should be.\n>>> round(estimated_distance_m, 5)\n1.13", + "code": ">>> round(estimated_distance_m, 5)\n1.13", "hidden": false, "locked": false } @@ -1244,12 +1265,12 @@ { "cases": [ { - "code": ">>> # Fill in the line that currently says\n>>> # predicted_distance_m = ...\n>>> # in the cell above.\n>>> predicted_distance_m != ...\nTrue", + "code": ">>> predicted_distance_m != ...\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Compute predicted_distance_m using the formula in the text\n>>> # above. Hint: it should start with something like this:\n>>> # predicted_distance_m = (1/2) * gravity_constant ...\n>>> round(predicted_distance_m, 5)\n1.17022", + "code": ">>> round(predicted_distance_m, 5)\n1.17022", "hidden": false, "locked": false }, @@ -1273,12 +1294,12 @@ { "cases": [ { - "code": ">>> # Fill in the line\n>>> # num_avenues_away = ...\n>>> # in the cell above. \n>>> num_avenues_away != ...\nTrue", + "code": ">>> num_avenues_away != ...\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Remember to compute the absolute value of 7-10. Traveling \n>>> # \"-3 blocks\" doesn't really make sense!\n>>> num_avenues_away != -3\nTrue", + "code": ">>> num_avenues_away != -3\nTrue", "hidden": false, "locked": false }, diff --git a/lab/lab02/lab02.ipynb b/lab/lab02/lab02.ipynb index d9a535f..12bd071 100644 --- a/lab/lab02/lab02.ipynb +++ b/lab/lab02/lab02.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -106,7 +106,10 @@ { "cell_type": "markdown", "id": "cell-lab02-4", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** In the next cell, assign the name `new_year` to the larger number among the following two numbers:\n", "\n", @@ -193,7 +196,10 @@ { "cell_type": "markdown", "id": "cell-lab02-11", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.** The module `math` also provides the name `e` for the base of the natural logarithm, which is roughly 2.71. Compute $e^{\\pi}-\\pi$, giving it the name `near_twenty`.\n", "\n", @@ -230,7 +236,10 @@ { "cell_type": "markdown", "id": "cell-lab02-14", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "## 2.1. Accessing Functions\n", "\n", @@ -416,7 +425,10 @@ { "cell_type": "markdown", "id": "cell-lab02-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Notice that some of the values in this table are missing, as denoted by \"nan.\" This means either that the value is not available (e.g. if we don\u2019t know the market\u2019s street address) or not applicable (e.g. if the market doesn\u2019t have a street address). You'll also notice that the table has a large number of columns in it!\n", "\n", @@ -486,7 +498,10 @@ { "cell_type": "markdown", "id": "cell-lab02-34", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "### `select`\n", "\n", @@ -533,7 +548,10 @@ { "cell_type": "markdown", "id": "cell-lab02-37", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "### `drop`\n", "\n", @@ -610,7 +628,10 @@ { "cell_type": "markdown", "id": "cell-lab02-44", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "(The `descending=True` bit is called an *optional argument*. It has a default value of `False`, so when you explicitly tell the function `descending=True`, then the function will sort in descending order.)\n", "\n", @@ -707,7 +728,10 @@ { "cell_type": "markdown", "id": "cell-lab02-51", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.6.** Use `california_farmers_markets` to create a table called `berkeley_markets` containing farmers' markets in Berkeley, California." ] @@ -808,7 +832,10 @@ { "cell_type": "markdown", "id": "cell-lab02-58", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.1.** Create a table of movies released between 2010 and 2015 (inclusive) with ratings above 8. The table should only contain the columns `Title` and `Rating`, **in that order**.\n", "\n", @@ -847,7 +874,10 @@ { "cell_type": "markdown", "id": "cell-lab02-61", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.2.** Use `num_rows` (and arithmetic) to find the *proportion* of movies in the dataset that were released 1900-1999, and the *proportion* of movies in the dataset that were released in the year 2000 or later.\n", "\n", @@ -1129,7 +1159,7 @@ "locked": false }, { - "code": ">>> # HINT: Check the order of your table. \n>>> list(np.round(list(farmers_markets_locations_by_latitude.column('y').take(range(3))),6)) == [64.86275, 64.8459, 64.844414]\nTrue", + "code": ">>> list(np.round(list(farmers_markets_locations_by_latitude.column('y').take(range(3))), 6)) == [64.86275, 64.8459, 64.844414]\nTrue", "hidden": false, "locked": false } @@ -1182,7 +1212,7 @@ "locked": false }, { - "code": ">>> # Make sure your columns are in the right order!\n>>> # First column should be 'Title', second column should be 'Rating'\n>>> print(above_eight.sort(0).take([17]))\nTitle | Rating\nToy Story 3 | 8.3\n", + "code": ">>> print(above_eight.sort(0).take([17]))\nTitle | Rating\nToy Story 3 | 8.3\n", "hidden": false, "locked": false } diff --git a/lab/lab03/lab03.ipynb b/lab/lab03/lab03.ipynb index 61f1b20..b7ad969 100644 --- a/lab/lab03/lab03.ipynb +++ b/lab/lab03/lab03.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -107,7 +107,10 @@ { "cell_type": "markdown", "id": "cell-lab03-8", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** Yuri Gagarin was the first person to travel through outer space. When he emerged from his capsule upon landing on Earth, he [reportedly](https://en.wikiquote.org/wiki/Yuri_Gagarin) had the following conversation with a woman and girl who saw the landing:\n", "\n", @@ -225,7 +228,10 @@ { "cell_type": "markdown", "id": "cell-lab03-17", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.1.** Use `replace` to transform the string `'hitchhiker'` into `'matchmaker'`. Assign your result to `new_word`.\n" ] @@ -333,7 +339,10 @@ { "cell_type": "markdown", "id": "cell-lab03-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Suppose you're writing a program that looks for dates in a text, and you want your program to find the amount of time that elapsed between two years it has identified. It doesn't make sense to subtract two texts, but you can first convert the text containing the years into numbers.\n", "\n", @@ -378,7 +387,10 @@ { "cell_type": "markdown", "id": "cell-lab03-30", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "## 1.3. Passing strings to functions\n", "\n", @@ -478,7 +490,10 @@ { "cell_type": "markdown", "id": "cell-lab03-37", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.1.** Make an array containing the numbers 0, 1, -1, and $\\pi$, in that order. Name it `interesting_numbers`. \n", "\n", @@ -515,7 +530,10 @@ { "cell_type": "markdown", "id": "cell-lab03-40", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.2.** Make an array containing the five strings `\"Hello\"`, `\",\"`, `\" \"`, `\"world\"`, and `\"!\"`. (The third one is a single space inside quotes.) Name it `hello_world_components`.\n", "\n", @@ -591,7 +609,10 @@ { "cell_type": "markdown", "id": "cell-lab03-46", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.3.** Import `numpy` as `np` and then use `np.arange` to create an array with the multiples of 99 from 0 up to (**and including**) 9999. (So its elements are 0, 99, 198, 297, etc.)\n" ] @@ -743,7 +764,10 @@ { "cell_type": "markdown", "id": "cell-lab03-59", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.2.1.** Set `population_1973` to the world population in 1973, by getting the appropriate element from `population_amounts` using `item`.\n" ] @@ -813,7 +837,10 @@ { "cell_type": "markdown", "id": "cell-lab03-64", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "But this is tedious and doesn't really take advantage of the fact that we are using a computer.\n", "\n", @@ -980,7 +1007,10 @@ { "cell_type": "markdown", "id": "cell-lab03-77", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.** In the cell below, we've created 2 arrays. Using the steps above, assign `top_10_movies` to a table that has two columns called \"Name\" and \"Rating\", which hold `top_10_movie_names` and `top_10_movie_ratings` respectively.\n" ] @@ -1032,7 +1062,10 @@ { "cell_type": "markdown", "id": "cell-lab03-80", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Loading a table from a file\n", "\n", @@ -1155,7 +1188,10 @@ { "cell_type": "markdown", "id": "cell-lab03-90", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.1.** Check out the `population` table from section 2 of this lab. Compute the year when the world population first went above 6 billion. Assign the year to `year_population_crossed_6_billion`.\n" ] @@ -1190,7 +1226,10 @@ { "cell_type": "markdown", "id": "cell-lab03-93", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.2.** Find the average rating for movies released before the year 2000 and the average rating for movies released in the year 2000 or after for the movies in `imdb`.\n", "\n", @@ -1229,7 +1268,10 @@ { "cell_type": "markdown", "id": "cell-lab03-96", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.3.** Here's a challenge: Find the number of movies that came out in *even* years.\n", "\n", @@ -1290,7 +1332,10 @@ { "cell_type": "markdown", "id": "cell-lab03-101", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "##### Temperature readings\n", "NOAA (the US National Oceanic and Atmospheric Administration) operates weather stations that measure surface temperatures at different sites around the United States. The hourly readings are [publicly available](http://www.ncdc.noaa.gov/qclcd/QCLCD?prior=N).\n", @@ -1360,7 +1405,10 @@ { "cell_type": "markdown", "id": "cell-lab03-106", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1396,7 +1444,10 @@ { "cell_type": "markdown", "id": "cell-lab03-109", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.2.2.** The array `more_restaurant_bills` contains 100,000 bills! Compute the total charge for each one in `more_restaurant_bills`. How is your code different?\n" ] @@ -1431,7 +1482,10 @@ { "cell_type": "markdown", "id": "cell-lab03-112", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "The function `sum` takes a single array of numbers as its argument. It returns the sum of all the numbers in that array (so it returns a single number, not an array).\n", "\n", @@ -1467,7 +1521,10 @@ { "cell_type": "markdown", "id": "cell-lab03-115", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.2.4.** The powers of 2 ($2^0 = 1$, $2^1 = 2$, $2^2 = 4$, etc) arise frequently in computer science. (For example, you may have noticed that storage on smartphones or USBs come in powers of 2, like 16 GB, 32 GB, or 64 GB.) Use `np.arange` and the exponentiation operator `**` to compute the first 30 powers of 2, starting from `2^0`.\n", "\n", @@ -1716,7 +1773,7 @@ "locked": false }, { - "code": ">>> import numpy as np\n>>> all(hello_world_components == np.array([\"Hello\", \",\", \" \", \"world\", \"!\"]))\nTrue", + "code": ">>> import numpy as np\n>>> all(hello_world_components == np.array(['Hello', ',', ' ', 'world', '!']))\nTrue", "hidden": false, "locked": false } @@ -1745,7 +1802,7 @@ "locked": false }, { - "code": ">>> all(multiples_of_99 == np.arange(0, 9999+99, 99))\nTrue", + "code": ">>> all(multiples_of_99 == np.arange(0, 9999 + 99, 99))\nTrue", "hidden": false, "locked": false } @@ -1783,12 +1840,12 @@ { "cases": [ { - "code": ">>> # It looks like you're not making an array. You shouldn't need to\n>>> # use .item anywhere in your solution.\n>>> import numpy as np\n>>> type(population_rounded) == np.ndarray\nTrue", + "code": ">>> import numpy as np\n>>> type(population_rounded) == np.ndarray\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # You made an array, but it doesn't have the right numbers in it.\n>>> import numpy as np\n>>> sum(population_rounded) == 312868000000\nTrue", + "code": ">>> import numpy as np\n>>> sum(population_rounded) == 312868000000\nTrue", "hidden": false, "locked": false } @@ -1841,7 +1898,7 @@ "locked": false }, { - "code": ">>> imdb.select('Votes', 'Rating', 'Title', 'Year', 'Decade').sort(0).take(range(2,5))\nVotes | Rating | Title | Year | Decade\n31003 | 8.1 | Le salaire de la peur | 1953 | 1950\n32385 | 8 | La battaglia di Algeri | 1966 | 1960\n35983 | 8.1 | The Best Years of Our Lives | 1946 | 1940", + "code": ">>> imdb.select('Votes', 'Rating', 'Title', 'Year', 'Decade').sort(0).take(range(2, 5))\nVotes | Rating | Title | Year | Decade\n31003 | 8.1 | Le salaire de la peur | 1953 | 1950\n32385 | 8 | La battaglia di Algeri | 1966 | 1960\n35983 | 8.1 | The Best Years of Our Lives | 1946 | 1940", "hidden": false, "locked": false } @@ -1860,7 +1917,7 @@ { "cases": [ { - "code": ">>> # Oops, your name is assigned to the wrong data type!\n>>> import numpy as np\n>>> type(year_population_crossed_6_billion) == int or type(year_population_crossed_6_billion) == np.int64\nTrue", + "code": ">>> import numpy as np\n>>> type(year_population_crossed_6_billion) == int or type(year_population_crossed_6_billion) == np.int64\nTrue", "hidden": false, "locked": false }, @@ -1884,12 +1941,12 @@ { "cases": [ { - "code": ">>> abs(before_2000 - 8.2783625730994146) < 1e-5\nTrue", + "code": ">>> abs(before_2000 - 8.278362573099415) < 1e-05\nTrue", "hidden": false, "locked": false }, { - "code": ">>> abs(after_or_in_2000 - 8.2379746835443033) < 1e-5\nTrue", + "code": ">>> abs(after_or_in_2000 - 8.237974683544303) < 1e-05\nTrue", "hidden": false, "locked": false } @@ -1961,12 +2018,12 @@ { "cases": [ { - "code": ">>> # It looks like you're not making an array. You shouldn't need to\n>>> # use .item anywhere in your solution.\n>>> import numpy as np\n>>> type(total_charges) == np.ndarray\nTrue", + "code": ">>> import numpy as np\n>>> type(total_charges) == np.ndarray\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # You made an array, but it doesn't have the right numbers in it.\n>>> import numpy as np\n>>> sum(abs(total_charges - np.array([24.144, 47.88, 37.212]))) < 1e-6\nTrue", + "code": ">>> import numpy as np\n>>> sum(abs(total_charges - np.array([24.144, 47.88, 37.212]))) < 1e-06\nTrue", "hidden": false, "locked": false } @@ -1985,12 +2042,12 @@ { "cases": [ { - "code": ">>> # It looks like you're not making an array. You shouldn't need to\n>>> # use .item anywhere in your solution.\n>>> import numpy as np\n>>> type(more_total_charges) == np.ndarray\nTrue", + "code": ">>> import numpy as np\n>>> type(more_total_charges) == np.ndarray\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # You made an array, but it doesn't have the right numbers in it.\n>>> import numpy as np\n>>> sum(abs(more_total_charges - 1.2 * more_restaurant_bills)) < 1e-6\nTrue", + "code": ">>> import numpy as np\n>>> sum(abs(more_total_charges - 1.2 * more_restaurant_bills)) < 1e-06\nTrue", "hidden": false, "locked": false } diff --git a/lab/lab04/lab04.ipynb b/lab/lab04/lab04.ipynb index 92f0ac8..ef6e19f 100644 --- a/lab/lab04/lab04.ipynb +++ b/lab/lab04/lab04.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -183,7 +183,10 @@ { "cell_type": "markdown", "id": "cell-lab04-12", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** Define `to_percentage` in the cell below. Call your function to convert the proportion .2 to a percentage. Name that percentage `twenty_percent`.\n" ] @@ -249,7 +252,10 @@ { "cell_type": "markdown", "id": "cell-lab04-17", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Like you\u2019ve done with built-in functions in previous labs (max, abs, etc.), you can pass in named values as arguments to your function.\n", "\n", @@ -314,7 +320,10 @@ { "cell_type": "markdown", "id": "cell-lab04-22", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "##### Calls on calls on calls\n", "Just as you write a series of lines to build up a complex computation, it's useful to define a series of small functions that build on each other. Since you can write any code inside a function's body, you can call other functions you've written.\n", @@ -384,7 +393,10 @@ { "cell_type": "markdown", "id": "cell-lab04-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "After writing this, you realize you also wanted to print out the 2nd and 3rd-highest years. Instead of copying your code, you decide to put it in a function. Since the rank varies, you make that an argument to your function.\n", "\n", @@ -564,9 +576,20 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "cell-lab04-43", "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "cell-lab04-44", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.** Use the `type` function and set `total_pay_type` to the type of the first value in the \"Total Pay\" column.\n" ] @@ -574,7 +597,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-44", + "id": "cell-lab04-45", "metadata": { "tags": [] }, @@ -587,7 +610,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-45", + "id": "cell-lab04-46", "metadata": { "deletable": false, "editable": false @@ -599,8 +622,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-46", - "metadata": {}, + "id": "cell-lab04-47", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.2.** You should have found that the values in the `Total Pay` column are strings. It doesn't make sense to take the average of string values, so we need to convert them to numbers if we want to do this. Extract the first value in `Total Pay`. It's Mark Hurd's pay in 2015, in *millions* of dollars. Call it `mark_hurd_pay_string`.\n" ] @@ -608,7 +634,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-47", + "id": "cell-lab04-48", "metadata": { "tags": [] }, @@ -621,7 +647,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-48", + "id": "cell-lab04-49", "metadata": { "deletable": false, "editable": false @@ -633,8 +659,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-49", - "metadata": {}, + "id": "cell-lab04-50", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.3.** Convert `mark_hurd_pay_string` to a number of *dollars*. \n", "\n", @@ -647,7 +676,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-50", + "id": "cell-lab04-51", "metadata": { "tags": [] }, @@ -660,7 +689,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-51", + "id": "cell-lab04-52", "metadata": { "deletable": false, "editable": false @@ -672,8 +701,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-52", - "metadata": {}, + "id": "cell-lab04-53", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "To compute the average pay, we need to do this for every CEO. But that looks like it would involve copying this code 102 times.\n", "\n", @@ -689,7 +721,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-53", + "id": "cell-lab04-54", "metadata": { "tags": [] }, @@ -703,7 +735,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-54", + "id": "cell-lab04-55", "metadata": { "deletable": false, "editable": false @@ -715,7 +747,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-55", + "id": "cell-lab04-56", "metadata": {}, "source": [ "Running that cell doesn't convert any particular pay string. Instead, it creates a function called `convert_pay_string_to_number` that can convert *any* string with the right format to a number representing millions of dollars.\n", @@ -726,7 +758,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-56", + "id": "cell-lab04-57", "metadata": {}, "outputs": [], "source": [ @@ -736,7 +768,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-57", + "id": "cell-lab04-58", "metadata": {}, "outputs": [], "source": [ @@ -746,7 +778,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-58", + "id": "cell-lab04-59", "metadata": {}, "outputs": [], "source": [ @@ -756,7 +788,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-59", + "id": "cell-lab04-60", "metadata": {}, "source": [ "So, what have we gained by defining the `convert_pay_string_to_number` function? \n", @@ -765,7 +797,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-60", + "id": "cell-lab04-61", "metadata": {}, "source": [ "## 3. `apply`ing functions\n", @@ -778,7 +810,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-61", + "id": "cell-lab04-62", "metadata": {}, "outputs": [], "source": [ @@ -788,7 +820,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-62", + "id": "cell-lab04-63", "metadata": {}, "source": [ "The old name for `max` is still around:" @@ -797,7 +829,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-63", + "id": "cell-lab04-64", "metadata": {}, "outputs": [], "source": [ @@ -806,7 +838,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-64", + "id": "cell-lab04-65", "metadata": {}, "source": [ "Try just writing `max` or `our_name_for_max` (or the name of any other function) in a cell, and run that cell. Python will print out a (very brief) description of the function." @@ -815,7 +847,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-65", + "id": "cell-lab04-66", "metadata": {}, "outputs": [], "source": [ @@ -824,7 +856,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-66", + "id": "cell-lab04-67", "metadata": {}, "source": [ "Now try writing `max?` or `our_name_for_max?` (or the name of any other function) in a cell, and run that cell. A information box should show up at the bottom of your screen a longer description of the function\n", @@ -835,7 +867,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-67", + "id": "cell-lab04-68", "metadata": {}, "outputs": [], "source": [ @@ -844,7 +876,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-68", + "id": "cell-lab04-69", "metadata": {}, "source": [ "Let's look at what happens when we set `max`to a non-function value. You'll notice that a TypeError will occur when you try calling `max`. Things like integers and strings are not callable like this: `\"hello\"(2)`, which is calling a string and will error. Look out for any functions that might have been renamed when you encounter this type of error" @@ -853,7 +885,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-69", + "id": "cell-lab04-70", "metadata": { "tags": [ "raises-exception" @@ -868,7 +900,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-70", + "id": "cell-lab04-71", "metadata": {}, "outputs": [], "source": [ @@ -879,7 +911,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-71", + "id": "cell-lab04-72", "metadata": {}, "source": [ "Why is this useful? Since functions are just values, it's possible to pass them as arguments to other functions. Here's a simple but not-so-practical example: we can make an array of functions." @@ -888,7 +920,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-72", + "id": "cell-lab04-73", "metadata": {}, "outputs": [], "source": [ @@ -897,8 +929,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-73", - "metadata": {}, + "id": "cell-lab04-74", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.** Make an array containing any 3 other functions you've seen. Call it `some_functions`.\n" ] @@ -906,7 +941,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-74", + "id": "cell-lab04-75", "metadata": { "tags": [] }, @@ -919,7 +954,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-75", + "id": "cell-lab04-76", "metadata": { "deletable": false, "editable": false @@ -931,7 +966,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-76", + "id": "cell-lab04-77", "metadata": {}, "source": [ "Working with functions as values can lead to some funny-looking code. For example, see if you can figure out why the following code works. Check your explanation with a peer or a staff member." @@ -940,7 +975,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-77", + "id": "cell-lab04-78", "metadata": {}, "outputs": [], "source": [ @@ -949,7 +984,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-78", + "id": "cell-lab04-79", "metadata": {}, "source": [ "A more useful example of passing functions to other functions as arguments is the table method `apply`.\n", @@ -962,7 +997,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-79", + "id": "cell-lab04-80", "metadata": {}, "outputs": [], "source": [ @@ -971,8 +1006,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-80", - "metadata": {}, + "id": "cell-lab04-81", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Here's an illustration of what that did:\n", "\n", @@ -986,7 +1024,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-81", + "id": "cell-lab04-82", "metadata": { "tags": [] }, @@ -1002,7 +1040,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-82", + "id": "cell-lab04-83", "metadata": { "deletable": false, "editable": false @@ -1014,7 +1052,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-83", + "id": "cell-lab04-84", "metadata": {}, "source": [ "Now that we have all the pays as numbers, we can learn more about them through computation." @@ -1022,8 +1060,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-84", - "metadata": {}, + "id": "cell-lab04-85", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.3.** Compute the average total pay of the CEOs in the dataset.\n" ] @@ -1031,7 +1072,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-85", + "id": "cell-lab04-86", "metadata": { "tags": [] }, @@ -1044,7 +1085,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-86", + "id": "cell-lab04-87", "metadata": { "deletable": false, "editable": false @@ -1056,8 +1097,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-87", - "metadata": {}, + "id": "cell-lab04-88", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.4** Companies pay executives in a variety of ways: in cash, by granting stock or other equity in the company, or with ancillary benefits (like private jets). Compute the proportion of each CEO's pay that was cash. (Your answer should be an array of numbers, one for each CEO in the dataset.)\n", "\n", @@ -1067,7 +1111,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-88", + "id": "cell-lab04-89", "metadata": { "tags": [] }, @@ -1080,7 +1124,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-89", + "id": "cell-lab04-90", "metadata": { "deletable": false, "editable": false @@ -1092,7 +1136,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-90", + "id": "cell-lab04-91", "metadata": { "for_assignment_type": "solution" }, @@ -1104,8 +1148,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-91", - "metadata": {}, + "id": "cell-lab04-92", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Check out the `% Change` column in `compensation`. It shows the percentage increase in the CEO's pay from the previous year. For CEOs with no previous year on record, it instead says \"(No previous year)\". The values in this column are *strings*, not numbers, so like the `Total Pay` column, it's not usable without a bit of extra work.\n", "\n", @@ -1121,7 +1168,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-92", + "id": "cell-lab04-93", "metadata": { "tags": [] }, @@ -1152,7 +1199,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-93", + "id": "cell-lab04-94", "metadata": { "deletable": false, "editable": false @@ -1164,8 +1211,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-94", - "metadata": {}, + "id": "cell-lab04-95", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.6** Determine the average pay in 2014 of the CEOs that appear in the `with_previous_compensation` table. Assign this value to the variable `average_pay_2014`.\n" ] @@ -1173,7 +1223,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-95", + "id": "cell-lab04-96", "metadata": { "tags": [] }, @@ -1186,7 +1236,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-96", + "id": "cell-lab04-97", "metadata": { "deletable": false, "editable": false @@ -1198,7 +1248,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-97", + "id": "cell-lab04-98", "metadata": {}, "source": [ "## 4. Histograms\n", @@ -1214,7 +1264,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-98", + "id": "cell-lab04-99", "metadata": { "tags": [] }, @@ -1225,8 +1275,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-99", - "metadata": {}, + "id": "cell-lab04-100", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.2.** How many CEOs made more than $30 million in total pay? Find the value using code, then check that the value you found is consistent with what you see in the histogram.\n", "\n", @@ -1236,7 +1289,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-100", + "id": "cell-lab04-101", "metadata": { "tags": [] }, @@ -1249,7 +1302,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-101", + "id": "cell-lab04-102", "metadata": { "deletable": false, "editable": false @@ -1261,7 +1314,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-102", + "id": "cell-lab04-103", "metadata": { "tags": [] }, @@ -1285,7 +1338,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-103", + "id": "cell-lab04-104", "metadata": { "deletable": false, "editable": false @@ -1299,7 +1352,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab04-104", + "id": "cell-lab04-105", "metadata": { "deletable": false, "editable": false @@ -1312,7 +1365,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab04-105", + "id": "cell-lab04-106", "metadata": {}, "source": [ " " @@ -1335,7 +1388,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.9" + "version": "3.11.0" }, "otter": { "OK_FORMAT": true, @@ -1347,12 +1400,12 @@ { "cases": [ { - "code": ">>> # Make sure your function has the proper syntax!\n>>> to_percentage(.35) == 35.0\nTrue", + "code": ">>> to_percentage(0.35) == 35.0\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Make sure you are passing in the proper argument!\n>>> twenty_percent == 20.0\nTrue", + "code": ">>> twenty_percent == 20.0\nTrue", "hidden": false, "locked": false } @@ -1371,7 +1424,7 @@ { "cases": [ { - "code": ">>> num_non_vowels(\"Go bears!\") == 6\nTrue", + "code": ">>> num_non_vowels('Go bears!') == 6\nTrue", "hidden": false, "locked": false } @@ -1409,7 +1462,7 @@ { "cases": [ { - "code": ">>> # Make sure you are examining the values in the column, not the column itself\n>>> import numpy\n>>> total_pay_type != numpy.ndarray\nTrue", + "code": ">>> import numpy\n>>> total_pay_type != numpy.ndarray\nTrue", "hidden": false, "locked": false }, @@ -1424,7 +1477,7 @@ "locked": false }, { - "code": ">>> # Make sure to call the type function on a value in the column\n>>> total_pay_type != int\nTrue", + "code": ">>> total_pay_type != int\nTrue", "hidden": false, "locked": false } @@ -1462,17 +1515,17 @@ { "cases": [ { - "code": ">>> # Your answer should be a number\n>>> type(mark_hurd_pay) != str\nTrue", + "code": ">>> type(mark_hurd_pay) != str\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Don't forget to give your answer in dollars, not millions of \n>>> # Dollars! \n>>> mark_hurd_pay != 5325\nTrue", + "code": ">>> mark_hurd_pay != 5325\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Don't forget to give your answer in dollars, not millions of \n>>> # Dollars! \n>>> mark_hurd_pay == 53250000\nTrue", + "code": ">>> mark_hurd_pay == 53250000\nTrue", "hidden": false, "locked": false } @@ -1491,12 +1544,12 @@ { "cases": [ { - "code": ">>> convert_pay_string_to_number(\"$100 \") == 100000000.0\nTrue", + "code": ">>> convert_pay_string_to_number('$100 ') == 100000000.0\nTrue", "hidden": false, "locked": false }, { - "code": ">>> convert_pay_string_to_number(\"$23 \") == 23000000.0\nTrue", + "code": ">>> convert_pay_string_to_number('$23 ') == 23000000.0\nTrue", "hidden": false, "locked": false } @@ -1520,17 +1573,17 @@ "locked": false }, { - "code": ">>> # The first thing in your array may not be a function\n>>> callable(some_functions.item(0))\nTrue", + "code": ">>> callable(some_functions.item(0))\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # The second thing in your array may not be a function\n>>> callable(some_functions.item(1))\nTrue", + "code": ">>> callable(some_functions.item(1))\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # The third thing in your array may not be a function\n>>> callable(some_functions.item(2))\nTrue", + "code": ">>> callable(some_functions.item(2))\nTrue", "hidden": false, "locked": false } @@ -1549,12 +1602,12 @@ { "cases": [ { - "code": ">>> # You either didn't add the 'Total Pay ($)' column, \n>>> # or you mislabeled it\n>>> 'Total Pay ($)' in compensation.column_labels\nTrue", + "code": ">>> 'Total Pay ($)' in compensation.labels\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # You have the column in your table, but the values may be wrong\n>>> t = compensation.sort('Total Pay ($)', descending = True)\n>>> np.isclose(t.column('Total Pay ($)').item(0), 53250000.0)\nTrue", + "code": ">>> t = compensation.sort('Total Pay ($)', descending=True)\n>>> np.isclose(t.column('Total Pay ($)').item(0), 53250000.0)\nTrue", "hidden": false, "locked": false } @@ -1573,7 +1626,7 @@ { "cases": [ { - "code": ">>> import math\n>>> math.isclose(average_total_pay, 11558613.861386139, rel_tol = 0.1)\nTrue", + "code": ">>> import math\n>>> math.isclose(average_total_pay, 11558613.861386139, rel_tol=0.1)\nTrue", "hidden": false, "locked": false } @@ -1597,7 +1650,7 @@ "locked": false }, { - "code": ">>> import math\n>>> math.isclose(cash_proportion.item(0), 0.01784038, rel_tol = .001)\nTrue", + "code": ">>> import math\n>>> math.isclose(cash_proportion.item(0), 0.01784038, rel_tol=0.001)\nTrue", "hidden": false, "locked": false } @@ -1616,17 +1669,17 @@ { "cases": [ { - "code": ">>> # Make sure to remove the \"(No previous year)\" CEOs \n>>> \"(No previous year)\" not in with_previous_compensation.column(\"% Change\")\nTrue", + "code": ">>> '(No previous year)' not in with_previous_compensation.column('% Change')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> import math\n>>> # You have the column, but some of your values may be wrong\n>>> t = with_previous_compensation.sort(\"2014 Total Pay ($)\", descending = True)\n>>> value = t.column(\"2014 Total Pay ($)\").item(0)\n>>> math.isclose(value, 67700000.0, rel_tol = 0.01)\nTrue", + "code": ">>> import math\n>>> t = with_previous_compensation.sort('2014 Total Pay ($)', descending=True)\n>>> value = t.column('2014 Total Pay ($)').item(0)\n>>> math.isclose(value, 67700000.0, rel_tol=0.01)\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # You have the column, but your number of rows is off\n>>> with_previous_compensation.num_rows == 80\nTrue", + "code": ">>> with_previous_compensation.num_rows == 80\nTrue", "hidden": false, "locked": false } @@ -1645,7 +1698,7 @@ { "cases": [ { - "code": ">>> import math\n>>> math.isclose(average_pay_2014, 11794790.817048479, rel_tol = 0.01)\nTrue", + "code": ">>> import math\n>>> math.isclose(average_pay_2014, 11794790.817048479, rel_tol=0.01)\nTrue", "hidden": false, "locked": false } diff --git a/lab/lab05/lab05.ipynb b/lab/lab05/lab05.ipynb index fd5569f..2f764b6 100644 --- a/lab/lab05/lab05.ipynb +++ b/lab/lab05/lab05.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -215,7 +215,10 @@ { "cell_type": "markdown", "id": "cell-lab05-19", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1** Assume we took ten nachos at random, and stored the results in an array called `ten_nachos` as done below. Find the number of nachos with only cheese using code (do not hardcode the answer). \n", "\n", @@ -292,7 +295,10 @@ { "cell_type": "markdown", "id": "cell-lab05-23", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2** Complete the following conditional statement so that the string `'More please'` is assigned to the variable `say_please` if the number of nachos with cheese in `ten_nachos` is less than `5`. Use the if statement to do this (do not directly reassign the variable `say_please`). \n", "\n", @@ -332,7 +338,10 @@ { "cell_type": "markdown", "id": "cell-lab05-26", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.3** Write a function called `nacho_reaction` that returns a reaction (as a string) based on the type of nacho passed in as an argument. Use the table below to match the nacho type to the appropriate reaction.\n", "\n", @@ -381,7 +390,10 @@ { "cell_type": "markdown", "id": "cell-lab05-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.4** Create a table `ten_nachos_reactions` that consists of the nachos in `ten_nachos` as well as the reactions for each of those nachos. The columns should be called `Nachos` and `Reactions`.\n", "\n", @@ -419,7 +431,10 @@ { "cell_type": "markdown", "id": "cell-lab05-32", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.5** Using code, find the number of 'Wow!' reactions for the nachos in `ten_nachos_reactions`.\n" ] @@ -514,7 +529,10 @@ { "cell_type": "markdown", "id": "cell-lab05-41", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1** In the following cell, we've loaded the text of _Pride and Prejudice_ by Jane Austen, split it into individual words, and stored these words in an array `p_and_p_words`. Using a `for` loop, assign `longer_than_five` to the number of words in the novel that are more than 5 letters long.\n", "\n", @@ -583,7 +601,10 @@ { "cell_type": "markdown", "id": "cell-lab05-46", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.2** Using a simulation with 10,000 trials, assign `num_different` to the number of times, in 10,000 trials, that two words picked uniformly at random (with replacement) from Pride and Prejudice have different lengths. \n", "\n", @@ -706,7 +727,10 @@ { "cell_type": "markdown", "id": "cell-lab05-55", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1**. Create a function called `compute_statistics` that takes a table containing an \"Age\" column and a \"Salary\" column and:\n", "- Draws a histogram of ages\n", @@ -833,7 +857,10 @@ { "cell_type": "markdown", "id": "cell-lab05-65", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n" @@ -856,7 +883,10 @@ { "cell_type": "markdown", "id": "cell-lab05-67", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -876,7 +906,10 @@ { "cell_type": "markdown", "id": "cell-lab05-69", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.1** We can use `np.random.choice` to simulate multiple trials.\n", "\n", @@ -943,7 +976,10 @@ { "cell_type": "markdown", "id": "cell-lab05-75", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n" @@ -966,7 +1002,10 @@ { "cell_type": "markdown", "id": "cell-lab05-77", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1122,7 +1161,7 @@ { "cases": [ { - "code": ">>> # One or more of the reaction results could be incorrect\n>>> np.count_nonzero(ten_nachos_reactions.column('Reactions') == make_array('Meh.', 'Cheesy!', 'Wow!', 'Wow!', 'Cheesy!', 'Spicy!', 'Wow!', 'Meh.', 'Cheesy!', 'Wow!')) == 10\nTrue", + "code": ">>> np.count_nonzero(ten_nachos_reactions.column('Reactions') == make_array('Meh.', 'Cheesy!', 'Wow!', 'Wow!', 'Cheesy!', 'Spicy!', 'Wow!', 'Meh.', 'Cheesy!', 'Wow!')) == 10\nTrue", "hidden": false, "locked": false } @@ -1146,7 +1185,7 @@ "locked": false }, { - "code": ">>> # Incorrect value for number_wow_reactions\n>>> number_wow_reactions == 4\nTrue", + "code": ">>> number_wow_reactions == 4\nTrue", "hidden": false, "locked": false } diff --git a/lab/lab06/lab06.ipynb b/lab/lab06/lab06.ipynb index 4605e1e..ebdfb44 100644 --- a/lab/lab06/lab06.ipynb +++ b/lab/lab06/lab06.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -101,7 +101,10 @@ { "cell_type": "markdown", "id": "cell-lab06-5", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1**: Describe Emily\u2019s model for how likely the TT practitioners are to choose the correct hand. What alternative model is her model meant to discredit? Discuss with students around you to come to a conclusion. Check in with a TA or AI if you are stuck.\n" ] @@ -109,15 +112,10 @@ { "cell_type": "markdown", "id": "cell-lab06-6", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab06-7", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2:** Remember that the practitioner got the correct answer 44% (0.44) of the time. According to Emily's model, on average, what proportion of times do we expect the practitioner to guess the correct hand? Make sure your answer is a number between 0 and 1. \n" ] @@ -125,7 +123,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-8", + "id": "cell-lab06-7", "metadata": { "tags": [] }, @@ -138,7 +136,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-9", + "id": "cell-lab06-8", "metadata": { "deletable": false, "editable": false @@ -150,8 +148,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-10", - "metadata": {}, + "id": "cell-lab06-9", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "The goal now is to see if our deviation from this expected proportion of correct answers is due to something other than chance. \n", "\n", @@ -167,7 +168,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-11", + "id": "cell-lab06-10", "metadata": { "tags": [] }, @@ -180,7 +181,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-12", + "id": "cell-lab06-11", "metadata": { "deletable": false, "editable": false @@ -192,24 +193,22 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-13", - "metadata": {}, + "id": "cell-lab06-12", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.4:** Why is the statistic from Question 1.3 the appropriate choice for comparing outcomes in Emily's experiment? How does it relate to the models you defined in Question 1.1?\n" ] }, { "cell_type": "markdown", - "id": "cell-lab06-14", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab06-15", - "metadata": {}, + "id": "cell-lab06-13", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.5:** Define the function `statistic` which takes in an expected proportion and an actual proportion, and returns the value of the statistic chosen in Question 1.3. Assume that the argument takes in proportions, but return your answer as a percentage. \n", "\n", @@ -219,7 +218,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-16", + "id": "cell-lab06-14", "metadata": { "tags": [] }, @@ -232,7 +231,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-17", + "id": "cell-lab06-15", "metadata": { "deletable": false, "editable": false @@ -244,8 +243,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-18", - "metadata": {}, + "id": "cell-lab06-16", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.6:** Use your newly defined function to calculate the observed statistic from Emily's experiment. \n" ] @@ -253,7 +255,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-19", + "id": "cell-lab06-17", "metadata": { "tags": [] }, @@ -266,7 +268,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-20", + "id": "cell-lab06-18", "metadata": { "deletable": false, "editable": false @@ -278,7 +280,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-21", + "id": "cell-lab06-19", "metadata": {}, "source": [ "**Is this observed statistic consistent with what we expect to see under Emily\u2019s model?**\n", @@ -299,7 +301,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-22", + "id": "cell-lab06-20", "metadata": {}, "outputs": [], "source": [ @@ -310,7 +312,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-23", + "id": "cell-lab06-21", "metadata": {}, "source": [ "`sample_proportions` returns an array that is the same length as the proportion array that is passed through. It contains the proportion of each category that appears in the sample. \n", @@ -321,7 +323,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-24", + "id": "cell-lab06-22", "metadata": {}, "outputs": [], "source": [ @@ -334,8 +336,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-25", - "metadata": {}, + "id": "cell-lab06-23", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.7:** To begin simulating, we should start by creating a representation of Emily's model to use for our simulation. This will be an array with two items in it. The first item should be the proportion of times a TT practictioner picks the correct hand, assuming that Emily\u2019s model was correct. The second item should be the proportion of times, under the same assumption, that the TT practitioner picks the incorrect hand. Assign `model_proportions` to this array. \n", "\n", @@ -347,7 +352,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-26", + "id": "cell-lab06-24", "metadata": { "tags": [] }, @@ -367,7 +372,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-27", + "id": "cell-lab06-25", "metadata": { "deletable": false, "editable": false @@ -379,7 +384,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-28", + "id": "cell-lab06-26", "metadata": {}, "source": [ "**Question 1.8:** Let's now see what the distribution of statistics is actually like under Emily's model. \n", @@ -389,18 +394,10 @@ "*Hint:* This should follow the same pattern as the code you did in the previous problem. " ] }, - { - "cell_type": "markdown", - "id": "cell-lab06-29", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-30", + "id": "cell-lab06-27", "metadata": { "for_assignment_type": "solution", "tags": [] @@ -416,7 +413,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-31", + "id": "cell-lab06-28", "metadata": { "tags": [] }, @@ -433,7 +430,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-32", + "id": "cell-lab06-29", "metadata": { "deletable": false, "editable": false @@ -445,7 +442,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-33", + "id": "cell-lab06-30", "metadata": {}, "source": [ "Let's view the distribution of the simulated statistics under Emily's model, and visually compare where the observed statistic lies relative to the simulated statistics." @@ -454,7 +451,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-34", + "id": "cell-lab06-31", "metadata": {}, "outputs": [], "source": [ @@ -465,7 +462,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-35", + "id": "cell-lab06-32", "metadata": {}, "source": [ "We can make a visual argument as to whether we believe the observed statistic is consistent with Emily\u2019s model. Here, since larger values of the test statistic suggest the alternative model (where the chance of guessing the correct hand is something other than 50%), we can formalize our analysis by finding what proportion of simulated statistics were as large or larger than our observed test statistic (the area at or to the right of the observed test statistic). If this area is small enough, we\u2019ll declare that the observed data are inconsistent with our simulated model. Here is the [link](https://inferentialthinking.com/chapters/11/1/Assessing_a_Model.html) to the section in the textbook." @@ -473,8 +470,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-36", - "metadata": {}, + "id": "cell-lab06-33", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.9:** Calculate the proportion of simulated statistics greater than or equal to the observed statistic. \n", "\n", @@ -484,7 +484,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-37", + "id": "cell-lab06-34", "metadata": { "scrolled": true, "tags": [] @@ -498,7 +498,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-38", + "id": "cell-lab06-35", "metadata": { "deletable": false, "editable": false @@ -510,7 +510,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-39", + "id": "cell-lab06-36", "metadata": {}, "source": [ "By convention, we often compare the proportion we just calculated to 0.05. If the proportion of simulated statistics greater than or equal to the observed statistic is sufficiently small (less than or equal to 0.05), then this is evidence against Emily's model. Conceptually, you may think of this as the case where less than 5% of simulated values are as far or farther away from what we had expected. If this is not the case, we don\u2019t have any reason to doubt Emily\u2019s model. \n", @@ -522,8 +522,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-40", - "metadata": {}, + "id": "cell-lab06-37", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.10:** Now, take some time to think to yourself and discuss with your peers:\n", "\n", @@ -536,7 +539,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-41", + "id": "cell-lab06-38", "metadata": { "tags": [] }, @@ -549,7 +552,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-42", + "id": "cell-lab06-39", "metadata": {}, "outputs": [], "source": [ @@ -558,7 +561,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-43", + "id": "cell-lab06-40", "metadata": {}, "source": [ "## 2. Submission\n", @@ -578,7 +581,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-44", + "id": "cell-lab06-41", "metadata": { "deletable": false, "editable": false @@ -592,7 +595,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab06-45", + "id": "cell-lab06-42", "metadata": { "deletable": false, "editable": false @@ -605,7 +608,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab06-46", + "id": "cell-lab06-43", "metadata": {}, "source": [ " " @@ -688,12 +691,12 @@ { "cases": [ { - "code": ">>> int(round(statistic(.5,.5) + statistic(.4,.1),1))\n30", + "code": ">>> int(round(statistic(0.5, 0.5) + statistic(0.4, 0.1), 1))\n30", "hidden": false, "locked": false }, { - "code": ">>> int(statistic(.4,.1) - statistic(.1,.4))\n0", + "code": ">>> int(statistic(0.4, 0.1) - statistic(0.1, 0.4))\n0", "hidden": false, "locked": false } @@ -712,7 +715,7 @@ { "cases": [ { - "code": ">>> int(round(observed_statistic,2))\n6", + "code": ">>> int(round(observed_statistic, 2))\n6", "hidden": false, "locked": false } @@ -814,7 +817,7 @@ "locked": false }, { - "code": ">>> proportion_greater_or_equal*1000 == np.count_nonzero(simulated_statistics >= observed_statistic)\nTrue", + "code": ">>> proportion_greater_or_equal * 1000 == np.count_nonzero(simulated_statistics >= observed_statistic)\nTrue", "hidden": false, "locked": false } diff --git a/lab/lab07/lab07.ipynb b/lab/lab07/lab07.ipynb index 4a58c94..dd44256 100644 --- a/lab/lab07/lab07.ipynb +++ b/lab/lab07/lab07.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -72,7 +72,10 @@ { "cell_type": "markdown", "id": "cell-lab07-4", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** The following statements are the steps of an A/B hypothesis test presented in a *random order*:\n", "\n", @@ -119,7 +122,10 @@ { "cell_type": "markdown", "id": "cell-lab07-7", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.** If the null hypothesis of an A/B test is correct, should the order of labels affect the differences in means between each group? Why do we shuffle labels in an A/B test? If you are in a lab section, confirm your answer with a neighbor or staff member before moving on. \n" ] @@ -128,14 +134,6 @@ "cell_type": "markdown", "id": "cell-lab07-8", "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab07-9", - "metadata": {}, "source": [ "## 2. The Great British Bake Off\n", "\n", @@ -146,23 +144,18 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-10", - "metadata": {}, + "id": "cell-lab07-9", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.** We want to know whether winning more Star Baker awards *causes* a change in likelihood of winning the season. Why is it not sufficient to compare star baker rates for winners and losers?\n" ] }, { "cell_type": "markdown", - "id": "cell-lab07-11", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab07-12", + "id": "cell-lab07-10", "metadata": {}, "source": [ "### Running an Experiment\n", @@ -178,23 +171,18 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-13", - "metadata": {}, + "id": "cell-lab07-11", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.2.** Should we use an A/B test to test these hypotheses? If yes, what is our \"A\" group and what is our \"B\" group?\n" ] }, { "cell_type": "markdown", - "id": "cell-lab07-14", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab07-15", + "id": "cell-lab07-12", "metadata": {}, "source": [ "Check your answers with your neighbors or a staff member before you move on to the next section." @@ -202,7 +190,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-16", + "id": "cell-lab07-13", "metadata": {}, "source": [ "The `bakers` table below describes the number of star baker awards each contest won and whether or not they won their season (`1` if they won, `0` if they did not win). The data was manually aggregated from Wikipedia for seasons 2-11 of the show. We randomized the order of rows as to not spoil the outcome of the show." @@ -211,7 +199,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-17", + "id": "cell-lab07-14", "metadata": {}, "outputs": [], "source": [ @@ -221,8 +209,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-18", - "metadata": {}, + "id": "cell-lab07-15", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.3.** Create a new table called `means` that contains the mean number of star baker awards for bakers who did not win (`won==0`) and bakers that did win (`won==1`). The table should have the column names `won` and `star baker awards mean`." ] @@ -230,7 +221,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-19", + "id": "cell-lab07-16", "metadata": { "tags": [] }, @@ -243,7 +234,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-20", + "id": "cell-lab07-17", "metadata": { "deletable": false, "editable": false @@ -255,8 +246,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-21", - "metadata": {}, + "id": "cell-lab07-18", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.4.** Visualize the distribution of Star Baker awards for winners and non-winners. You should use the bins we provided.\n", "\n", @@ -266,7 +260,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-22", + "id": "cell-lab07-19", "metadata": { "tags": [] }, @@ -278,8 +272,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-23", - "metadata": {}, + "id": "cell-lab07-20", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.5.** We want to figure out if there is a difference between the distribution of Star Baker awards between winners and non winners. \n", "\n", @@ -292,16 +289,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-24", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab07-25", - "metadata": {}, + "id": "cell-lab07-21", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.6.** Set `observed_difference` to the observed test statistic using the `means` table. \n" ] @@ -309,7 +301,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-26", + "id": "cell-lab07-22", "metadata": { "tags": [] }, @@ -322,7 +314,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-27", + "id": "cell-lab07-23", "metadata": { "deletable": false, "editable": false @@ -334,8 +326,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-28", - "metadata": {}, + "id": "cell-lab07-24", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.7.** Given a table like `bakers`, a label column `label_col`, and a values column `val_col`, write a function that calculates the appropriate test statistic.\n", "\n", @@ -345,7 +340,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-29", + "id": "cell-lab07-25", "metadata": { "scrolled": true, "tags": [] @@ -361,7 +356,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-30", + "id": "cell-lab07-26", "metadata": { "deletable": false, "editable": false @@ -373,7 +368,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-31", + "id": "cell-lab07-27", "metadata": {}, "source": [ "When we run a simulation for A/B testing, we resample by **shuffling the labels** of the original sample. If the null hypothesis is true and the star baker award distributions are the same, we expect that the difference in mean star baker awards will be not change when `\"won\"` labels are changed." @@ -381,8 +376,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-32", - "metadata": {}, + "id": "cell-lab07-28", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.8.** Write a function `simulate_and_test_statistic` to compute one trial of our A/B test. Your function should run a simulation and return a test statistic.\n" ] @@ -390,7 +388,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-33", + "id": "cell-lab07-29", "metadata": { "tags": [] }, @@ -405,7 +403,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-34", + "id": "cell-lab07-30", "metadata": { "deletable": false, "editable": false @@ -417,8 +415,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-35", - "metadata": {}, + "id": "cell-lab07-31", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.9.** Simulate 5000 trials of our A/B test and store the test statistics in an array called `differences`.\n" ] @@ -426,7 +427,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-36", + "id": "cell-lab07-32", "metadata": { "tags": [] }, @@ -443,7 +444,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-37", + "id": "cell-lab07-33", "metadata": { "deletable": false, "editable": false @@ -455,7 +456,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-38", + "id": "cell-lab07-34", "metadata": {}, "source": [ "Run the cell below to view a histogram of your simulated test statistics plotted with your observed test statistic." @@ -464,7 +465,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-39", + "id": "cell-lab07-35", "metadata": {}, "outputs": [], "source": [ @@ -475,8 +476,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-40", - "metadata": {}, + "id": "cell-lab07-36", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.10.** Find the p-value for your test and assign it to `empirical_p`.\n" ] @@ -484,7 +488,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-41", + "id": "cell-lab07-37", "metadata": { "tags": [] }, @@ -497,7 +501,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-42", + "id": "cell-lab07-38", "metadata": { "deletable": false, "editable": false @@ -509,24 +513,19 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-43", - "metadata": {}, + "id": "cell-lab07-39", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.11.** Using a 5% P-value cutoff, draw a conclusion about the null and alternative hypotheses. Describe your findings using simple, non-technical language. What does your analysis tell you about the association between star baker awards and winning? What can you claim about causation from your statistical analysis? Call your GSI or AI over and confirm your answer. \n" ] }, - { - "cell_type": "markdown", - "id": "cell-lab07-44", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-45", + "id": "cell-lab07-40", "metadata": { "deletable": false, "editable": false @@ -538,7 +537,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-46", + "id": "cell-lab07-41", "metadata": {}, "source": [ "## 3. Submission\n", @@ -560,7 +559,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-47", + "id": "cell-lab07-42", "metadata": { "deletable": false, "editable": false @@ -574,7 +573,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab07-48", + "id": "cell-lab07-43", "metadata": { "deletable": false, "editable": false @@ -587,7 +586,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab07-49", + "id": "cell-lab07-44", "metadata": {}, "source": [ " " @@ -627,17 +626,17 @@ "locked": false }, { - "code": ">>> np.allclose(np.array(ab_test_order)[:3] % 2 == 1, True) # Check the first three elements in your array.\nTrue", + "code": ">>> np.allclose(np.array(ab_test_order)[:3] % 2 == 1, True)\nTrue", "hidden": false, "locked": false }, { - "code": ">>> np.allclose(np.array(ab_test_order)[3:] % 2 == 0, True) # Check the last three elements in your array.\nTrue", + "code": ">>> np.allclose(np.array(ab_test_order)[3:] % 2 == 0, True)\nTrue", "hidden": false, "locked": false }, { - "code": ">>> \n>>> # This imports a hashing library for the autograder.\n>>> import hashlib\n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(np.array(ab_test_order).astype(int))\n'a7196ed0f271c873d9750cb92422d911'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(np.array(ab_test_order).astype(int))\n'a7196ed0f271c873d9750cb92422d911'", "hidden": false, "locked": false } @@ -680,12 +679,12 @@ "locked": false }, { - "code": ">>> np.round(min(means.column(\"star baker awards mean\")), 2) == 0.65\nTrue", + "code": ">>> np.round(min(means.column('star baker awards mean')), 2) == 0.65\nTrue", "hidden": false, "locked": false }, { - "code": ">>> np.round(max(means.column(\"star baker awards mean\")), 2) == 1.5\nTrue", + "code": ">>> np.round(max(means.column('star baker awards mean')), 2) == 1.5\nTrue", "hidden": false, "locked": false } @@ -728,7 +727,7 @@ { "cases": [ { - "code": ">>> np.isclose(round(find_test_stat(bakers, \"won\", \"star baker awards\"), 3) - 0.848, 0)\nTrue", + "code": ">>> np.isclose(round(find_test_stat(bakers, 'won', 'star baker awards'), 3) - 0.848, 0)\nTrue", "hidden": false, "locked": false } @@ -747,12 +746,12 @@ { "cases": [ { - "code": ">>> test_stat = round(simulate_and_test_statistic(bakers, \"won\", \"star baker awards\"), 3)\n>>> -2 < test_stat < 2\nTrue", + "code": ">>> test_stat = round(simulate_and_test_statistic(bakers, 'won', 'star baker awards'), 3)\n>>> -2 < test_stat < 2\nTrue", "hidden": false, "locked": false }, { - "code": ">>> np.random.seed(1)\n>>> test_stat2 = simulate_and_test_statistic(bakers, \"won\", \"star baker awards\")\n>>> np.round(test_stat2, 3) == -0.023 or np.round(test_stat2, 3) == -0.132\nTrue", + "code": ">>> np.random.seed(1)\n>>> test_stat2 = simulate_and_test_statistic(bakers, 'won', 'star baker awards')\n>>> np.round(test_stat2, 3) == -0.023 or np.round(test_stat2, 3) == -0.132\nTrue", "hidden": false, "locked": false } @@ -776,12 +775,12 @@ "locked": false }, { - "code": ">>> abs(np.average(differences)) < 0.05 # On average, your test statistic should be close to 0\nTrue", + "code": ">>> abs(np.average(differences)) < 0.05\nTrue", "hidden": false, "locked": false }, { - "code": ">>> all(differences == differences.item(0)) == False # Make sure all of the test statistics are different\nTrue", + "code": ">>> all(differences == differences.item(0)) == False\nTrue", "hidden": false, "locked": false } diff --git a/lab/lab08/lab08.ipynb b/lab/lab08/lab08.ipynb index 9b27c01..6e00558 100644 --- a/lab/lab08/lab08.ipynb +++ b/lab/lab08/lab08.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -80,7 +80,10 @@ { "cell_type": "markdown", "id": "cell-lab08-5", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** The next cell loads the table `births` from lecture, which is a large random sample of US births and includes information about mother-child pairs. \n", "\n", @@ -116,7 +119,10 @@ { "cell_type": "markdown", "id": "cell-lab08-8", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.** Run the following cell to compare the mean (red) and median (blue) of the distribution of mothers ages.\n" ] @@ -152,7 +158,10 @@ { "cell_type": "markdown", "id": "cell-lab08-11", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.3.** Assign `age_mean` to the mean of the maternal ages and `age_sd` to the SD of the maternal ages. Then run the cell below to see the width of one SD (blue) from the sample mean (red) plotted on the histogram of maternal ages.\n" ] @@ -204,7 +213,10 @@ { "cell_type": "markdown", "id": "cell-lab08-15", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.4.** Fill in the following code to examine the distribution of maternal heights, which is roughly normally distributed. We\u2019ll plot the standard deviation on the histogram, as before - notice where one standard deviation (blue) away from the mean (red) falls on the plot.\n" ] @@ -316,7 +328,10 @@ { "cell_type": "markdown", "id": "cell-lab08-24", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.** Define a function `one_sample_mean`. Its arguments should be `table` (the name of a table), `label` (the label of the column containing the variable), and `sample_size`(the number of employees in the sample). It should sample with replacement from the table and\n", "return the mean of the `label` column of the sample.\n" @@ -353,7 +368,10 @@ { "cell_type": "markdown", "id": "cell-lab08-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.2.** Use `one_sample_mean` to define a function `simulate_sample_mean`. The arguments are the name of the `table`, the `label` of the column containing the variable, the `sample_size`, and the number of simulations (`repetitions`).\n", "\n", @@ -436,7 +454,10 @@ { "cell_type": "markdown", "id": "cell-lab08-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.3.** Simulate two sample means, one for a sample of 400 salaries and one for a sample of 625 salaries. In each case, perform 10,000 repetitions. Don't worry about the `plots.xlim` or `plots.ylim` lines \u2013 they just make sure that all of the plots have the same x-axis and y-axis, respectively. \n" ] @@ -465,7 +486,10 @@ { "cell_type": "markdown", "id": "cell-lab08-35", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.4.** Assign `q2_4` to an array of numbers corresponding to true statement(s) about the plots from 2.3. \n", "\n", @@ -507,7 +531,10 @@ { "cell_type": "markdown", "id": "cell-lab08-38", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.5.** Assign `q2_5` to an array of numbers corresponding to true statement(s) about the plots from 2.3.\n", "\n", @@ -609,7 +636,10 @@ { "cell_type": "markdown", "id": "cell-lab08-47", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.6.** Assign the variable `SD_of_sample_means` to the integer corresponding to your answer to the following question:\n", "\n", @@ -650,7 +680,10 @@ { "cell_type": "markdown", "id": "cell-lab08-50", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.7.** Let's think about how the relationships between population SD, sample SD, and SD of sample means change with varying sample size. Which of the following is true? Assign the variable `pop_vs_sample` to an array of integer(s) that correspond to true statement(s).\n", "\n", @@ -693,7 +726,10 @@ { "cell_type": "markdown", "id": "cell-lab08-53", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.8.** Is there a relationship between the sample size and the standard deviation of the sample means? Assign `q2_8` to the number corresponding to the statement that answers this question.\n", "\n", @@ -889,7 +925,7 @@ { "cases": [ { - "code": ">>> \n>>> import hashlib # This imports a hashing library for the autograder.\n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(set(np.array(q2_4).astype(int))) == get_hash(set(np.array(make_array(1, 4)).astype(int)))\nTrue", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(set(np.array(q2_4).astype(int))) == get_hash(set(np.array(make_array(1, 4)).astype(int)))\nTrue", "hidden": false, "locked": false } @@ -908,7 +944,7 @@ { "cases": [ { - "code": ">>> \n>>> import hashlib # This imports a hashing library for the autograder.\n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(set(np.array(q2_5).astype(int)))== get_hash(set(np.array(make_array(1, 3)).astype(int)))\nTrue", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(set(np.array(q2_5).astype(int))) == get_hash(set(np.array(make_array(1, 3)).astype(int)))\nTrue", "hidden": false, "locked": false } @@ -946,7 +982,7 @@ { "cases": [ { - "code": ">>> import hashlib # This imports a hashing library for the autograder.\n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(set(np.array(pop_vs_sample).astype(int))) == get_hash(set(np.array(make_array(3, 4)).astype(int)))\nTrue", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(set(np.array(pop_vs_sample).astype(int))) == get_hash(set(np.array(make_array(3, 4)).astype(int)))\nTrue", "hidden": false, "locked": false } @@ -965,7 +1001,7 @@ { "cases": [ { - "code": ">>> \n>>> int(q2_8) == 1\nTrue", + "code": ">>> int(q2_8) == 1\nTrue", "hidden": false, "locked": false } diff --git a/lab/lab09/lab09.ipynb b/lab/lab09/lab09.ipynb index 02b678b..be3a22f 100644 --- a/lab/lab09/lab09.ipynb +++ b/lab/lab09/lab09.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -113,7 +113,10 @@ { "cell_type": "markdown", "id": "cell-lab09-7", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.0.** The following statements are the unordered steps of linear regression. \n", "\n", @@ -161,7 +164,10 @@ { "cell_type": "markdown", "id": "cell-lab09-11", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.** Make a scatter plot of the data. It's conventional to put the column we want to predict on the vertical axis and the other column on the horizontal axis.\n" ] @@ -181,7 +187,10 @@ { "cell_type": "markdown", "id": "cell-lab09-13", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.** Are eruption duration and waiting time roughly linearly related based on the scatter plot above? Is this relationship positive?\n" ] @@ -190,14 +199,6 @@ "cell_type": "markdown", "id": "cell-lab09-14", "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab09-15", - "metadata": {}, "source": [ "We're going to continue with the assumption that they are linearly related, so it's reasonable to use linear regression to analyze this data.\n", "\n", @@ -206,8 +207,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-16", - "metadata": {}, + "id": "cell-lab09-15", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.3.** Compute the mean and standard deviation of the eruption durations and waiting times. **Then** create a table called `faithful_standard` containing the eruption durations and waiting times in standard units. The columns should be named `duration (standard units)` and `wait (standard units)`.\n" ] @@ -215,7 +219,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-17", + "id": "cell-lab09-16", "metadata": { "for_assignment_type": "solution", "tags": [] @@ -236,7 +240,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-18", + "id": "cell-lab09-17", "metadata": { "deletable": false, "editable": false @@ -248,8 +252,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-19", - "metadata": {}, + "id": "cell-lab09-18", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.4.** Plot the data again, but this time in standard units.\n" ] @@ -257,7 +264,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-20", + "id": "cell-lab09-19", "metadata": { "tags": [] }, @@ -268,7 +275,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-21", + "id": "cell-lab09-20", "metadata": {}, "source": [ "You'll notice that this plot looks the same as the last one! However, the data and axes are scaled differently. So it's important to read the ticks on the axes." @@ -276,8 +283,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-22", - "metadata": {}, + "id": "cell-lab09-21", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.5.** Among the following numbers, which would you guess is closest to the correlation between eruption duration and waiting time in this dataset?\n", "\n", @@ -291,7 +301,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-23", + "id": "cell-lab09-22", "metadata": { "tags": [] }, @@ -303,7 +313,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-24", + "id": "cell-lab09-23", "metadata": { "deletable": false, "editable": false @@ -315,8 +325,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-25", - "metadata": {}, + "id": "cell-lab09-24", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.6.** Compute the correlation coefficient: `r`. \n", "\n", @@ -327,7 +340,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-26", + "id": "cell-lab09-25", "metadata": { "tags": [] }, @@ -340,7 +353,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-27", + "id": "cell-lab09-26", "metadata": { "deletable": false, "editable": false @@ -352,7 +365,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-28", + "id": "cell-lab09-27", "metadata": {}, "source": [ "## 2. The regression line\n", @@ -368,7 +381,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-29", + "id": "cell-lab09-28", "metadata": {}, "outputs": [], "source": [ @@ -388,8 +401,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-30", - "metadata": {}, + "id": "cell-lab09-29", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "How would you take a point in standard units and convert it back to original units? We'd have to \"stretch\" its horizontal position by `duration_std` and its vertical position by `wait_std`. That means the same thing would happen to the slope of the line.\n", "\n", @@ -403,7 +419,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-31", + "id": "cell-lab09-30", "metadata": { "tags": [] }, @@ -416,7 +432,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-32", + "id": "cell-lab09-31", "metadata": { "deletable": false, "editable": false @@ -428,8 +444,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-33", - "metadata": {}, + "id": "cell-lab09-32", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "We know that the regression line passes through the point `(duration_mean, wait_mean)`. Recall that the equation of the regression line in the original units is:\n", "\n", @@ -442,7 +461,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-34", + "id": "cell-lab09-33", "metadata": { "tags": [] }, @@ -455,7 +474,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-35", + "id": "cell-lab09-34", "metadata": { "deletable": false, "editable": false @@ -467,8 +486,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-36", - "metadata": {}, + "id": "cell-lab09-35", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "## 3. Investigating the regression line\n", "The slope and intercept tell you exactly what the regression line looks like. To predict the waiting time for an eruption, multiply the eruption's duration by `slope` and then add `intercept`.\n", @@ -479,7 +501,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-37", + "id": "cell-lab09-36", "metadata": { "tags": [] }, @@ -502,7 +524,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-38", + "id": "cell-lab09-37", "metadata": { "deletable": false, "editable": false @@ -514,7 +536,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-39", + "id": "cell-lab09-38", "metadata": {}, "source": [ "The next cell plots the line that goes between those two points, which is (a segment of) the regression line." @@ -523,7 +545,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-40", + "id": "cell-lab09-39", "metadata": { "scrolled": true }, @@ -536,8 +558,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-41", - "metadata": {}, + "id": "cell-lab09-40", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.2.** Make predictions for the waiting time after each eruption in the `faithful` table. (Of course, we know exactly what the waiting times were! We are doing this so we can see how accurate our predictions are.) Put these numbers into a column in a new table called `faithful_predictions`. Its first row should look like this:\n", "\n", @@ -551,7 +576,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-42", + "id": "cell-lab09-41", "metadata": { "tags": [] }, @@ -564,7 +589,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-43", + "id": "cell-lab09-42", "metadata": { "deletable": false, "editable": false @@ -576,8 +601,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-44", - "metadata": {}, + "id": "cell-lab09-43", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.3.** How close were we? Compute the *residual* for each eruption in the dataset. The residual is the actual waiting time minus the predicted waiting time. Add the residuals to `faithful_predictions` as a new column called `residual` and name the resulting table `faithful_residuals`.\n", "\n", @@ -587,7 +615,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-45", + "id": "cell-lab09-44", "metadata": { "tags": [] }, @@ -600,7 +628,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-46", + "id": "cell-lab09-45", "metadata": { "deletable": false, "editable": false @@ -612,7 +640,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-47", + "id": "cell-lab09-46", "metadata": {}, "source": [ "Here is a plot of the residuals you computed. Each point corresponds to one eruption. It shows how much our prediction over- or under-estimated the waiting time." @@ -621,7 +649,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-48", + "id": "cell-lab09-47", "metadata": {}, "outputs": [], "source": [ @@ -630,7 +658,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-49", + "id": "cell-lab09-48", "metadata": {}, "source": [ "There isn't really a pattern in the residuals, which confirms that it was reasonable to try linear regression. It's true that there are two separate clouds; the eruption durations seemed to fall into two distinct clusters. But that's just a pattern in the eruption durations, not a pattern in the relationship between eruption durations and waiting times." @@ -638,7 +666,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-50", + "id": "cell-lab09-49", "metadata": {}, "source": [ "## 4. How accurate are different predictions?\n", @@ -650,7 +678,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-51", + "id": "cell-lab09-50", "metadata": {}, "outputs": [], "source": [ @@ -663,7 +691,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-52", + "id": "cell-lab09-51", "metadata": {}, "source": [ "However, unless you have a strong reason to believe that the linear regression model is true, you should be wary of applying your prediction model to data that are very different from the training data." @@ -671,8 +699,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-53", - "metadata": {}, + "id": "cell-lab09-52", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.1.** In `faithful`, no eruption lasted exactly 0, 2.5, or 60 minutes. Using this line, what is the predicted waiting time for an eruption that lasts 0 minutes? 2.5 minutes? An hour?\n" ] @@ -680,7 +711,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-54", + "id": "cell-lab09-53", "metadata": { "tags": [] }, @@ -698,7 +729,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-55", + "id": "cell-lab09-54", "metadata": { "deletable": false, "editable": false @@ -710,23 +741,18 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-56", - "metadata": {}, + "id": "cell-lab09-55", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.2.** For each prediction, state whether you think it's reliable and explain your reasoning. \n" ] }, { "cell_type": "markdown", - "id": "cell-lab09-57", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab09-58", + "id": "cell-lab09-56", "metadata": {}, "source": [ "## 5. Divide and Conquer" @@ -734,7 +760,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-59", + "id": "cell-lab09-57", "metadata": {}, "source": [ "It appears from the scatter diagram that there are two clusters of points: one for durations around 2 and another for durations between 3.5 and 5. A vertical line at 3 divides the two clusters." @@ -743,7 +769,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-60", + "id": "cell-lab09-58", "metadata": { "scrolled": true }, @@ -755,7 +781,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-61", + "id": "cell-lab09-59", "metadata": {}, "source": [ "The `standardize` function from lecture appears below, which takes in a table with numerical columns and returns the same table with each column converted into standard units." @@ -764,7 +790,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-62", + "id": "cell-lab09-60", "metadata": {}, "outputs": [], "source": [ @@ -784,8 +810,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-63", - "metadata": {}, + "id": "cell-lab09-61", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.1.** Separately compute the correlation coefficient *r* for all the points with a duration below 3 **and then** for all the points with a duration above 3. To do so, create a function that computes `r` from a table, and then pass it two different tables of points, called `below_3` and `above_3`.\n", "\n", @@ -795,7 +824,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-64", + "id": "cell-lab09-62", "metadata": { "tags": [] }, @@ -816,7 +845,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-65", + "id": "cell-lab09-63", "metadata": { "deletable": false, "editable": false @@ -828,8 +857,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-66", - "metadata": {}, + "id": "cell-lab09-64", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.2.** Complete the functions `slope_of` and `intercept_of` below. \n", "\n", @@ -839,7 +871,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-67", + "id": "cell-lab09-65", "metadata": { "tags": [] }, @@ -873,7 +905,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-68", + "id": "cell-lab09-66", "metadata": { "deletable": false, "editable": false @@ -885,7 +917,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-69", + "id": "cell-lab09-67", "metadata": {}, "source": [ "The plot below shows the two different regression lines, one for each cluster, along with the original regression line!" @@ -894,7 +926,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-70", + "id": "cell-lab09-68", "metadata": {}, "outputs": [], "source": [ @@ -906,8 +938,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-71", - "metadata": {}, + "id": "cell-lab09-69", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.3.** Write a function `predict_wait` that takes a `duration` and returns the predicted wait time using the appropriate regression line, depending on whether the duration is below 3 or greater than (or equal to) 3.\n" ] @@ -915,7 +950,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-72", + "id": "cell-lab09-70", "metadata": { "for_assignment_type": "student", "tags": [] @@ -929,7 +964,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-73", + "id": "cell-lab09-71", "metadata": { "deletable": false, "editable": false @@ -941,7 +976,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-74", + "id": "cell-lab09-72", "metadata": {}, "source": [ "The predicted wait times for each point appear below." @@ -950,7 +985,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-75", + "id": "cell-lab09-73", "metadata": {}, "outputs": [], "source": [ @@ -960,8 +995,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-76", - "metadata": {}, + "id": "cell-lab09-74", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -970,7 +1008,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-77", + "id": "cell-lab09-75", "metadata": {}, "source": [ "_Type your answer here, replacing this text._" @@ -978,8 +1016,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-78", - "metadata": {}, + "id": "cell-lab09-76", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -989,7 +1030,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-79", + "id": "cell-lab09-77", "metadata": { "scrolled": true }, @@ -1007,7 +1048,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-80", + "id": "cell-lab09-78", "metadata": {}, "source": [ "The residual plot for the wait times when they are predicted by two regression lines (red) doesn't really have a pattern, which confirms that it was also appropriate to use linear regression in our \"Divide and Conquer\" scenario. How do the two residual plots compare?" @@ -1015,7 +1056,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-81", + "id": "cell-lab09-79", "metadata": {}, "source": [ "## 6. Submission\n", @@ -1035,7 +1076,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-82", + "id": "cell-lab09-80", "metadata": { "deletable": false, "editable": false @@ -1049,7 +1090,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab09-83", + "id": "cell-lab09-81", "metadata": { "deletable": false, "editable": false @@ -1062,7 +1103,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab09-84", + "id": "cell-lab09-82", "metadata": {}, "source": [ " " @@ -1102,7 +1143,7 @@ "locked": false }, { - "code": ">>> \n>>> import hashlib # This imports a hashing library for the autograder.\n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(np.array(least_squares_order).astype(int)) # Your ordering is incorrect.\n'cd29f2d730e11535cef30ecc78640daa'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(np.array(least_squares_order).astype(int))\n'cd29f2d730e11535cef30ecc78640daa'", "hidden": false, "locked": false } @@ -1121,7 +1162,7 @@ { "cases": [ { - "code": ">>> abs(sum(faithful_standard.column(0))) <= 1e-8\nTrue", + "code": ">>> abs(sum(faithful_standard.column(0))) <= 1e-08\nTrue", "hidden": false, "locked": false }, @@ -1131,7 +1172,7 @@ "locked": false }, { - "code": ">>> round(wait_std, 2) == 13.57 \nTrue", + "code": ">>> round(wait_std, 2) == 13.57\nTrue", "hidden": false, "locked": false } @@ -1150,7 +1191,7 @@ { "cases": [ { - "code": ">>> \n>>> import hashlib # This imports a hashing library for the autograder.\n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(int(correlation)) # Your choice is incorrect.\n'eccbc87e4b5ce2fe28308fd9f2a7baf3'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(int(correlation))\n'eccbc87e4b5ce2fe28308fd9f2a7baf3'", "hidden": false, "locked": false } @@ -1174,7 +1215,7 @@ "locked": false }, { - "code": ">>> round(r,3) == 0.901\nTrue", + "code": ">>> round(r, 3) == 0.901\nTrue", "hidden": false, "locked": false } @@ -1193,12 +1234,12 @@ { "cases": [ { - "code": ">>> (slope*13 - 100)/98 <= 0.5\nTrue", + "code": ">>> (slope * 13 - 100) / 98 <= 0.5\nTrue", "hidden": false, "locked": false }, { - "code": ">>> np.round(slope, 4) == 10.7296 # Make sure you are plugging in the SD_x and SD_y in the correct spots!\nTrue", + "code": ">>> np.round(slope, 4) == 10.7296\nTrue", "hidden": false, "locked": false } @@ -1265,12 +1306,12 @@ { "cases": [ { - "code": ">>> set(faithful_predictions.labels) == set(['duration', 'wait', 'predicted wait']) # Make sure your column labels are correct.\nTrue", + "code": ">>> set(faithful_predictions.labels) == set(['duration', 'wait', 'predicted wait'])\nTrue", "hidden": false, "locked": false }, { - "code": ">>> abs(1 - np.mean(faithful_predictions.column(2))/100) <= 0.35\nTrue", + "code": ">>> abs(1 - np.mean(faithful_predictions.column(2)) / 100) <= 0.35\nTrue", "hidden": false, "locked": false } @@ -1289,12 +1330,12 @@ { "cases": [ { - "code": ">>> set(faithful_residuals.labels) == set(['duration', 'wait', 'predicted wait', 'residual']) # Make sure your column labels are correct.\nTrue", + "code": ">>> set(faithful_residuals.labels) == set(['duration', 'wait', 'predicted wait', 'residual'])\nTrue", "hidden": false, "locked": false }, { - "code": ">>> abs(sum(faithful_residuals.column('residual'))) <= 1e-8\nTrue", + "code": ">>> abs(sum(faithful_residuals.column('residual'))) <= 1e-08\nTrue", "hidden": false, "locked": false } @@ -1313,22 +1354,22 @@ { "cases": [ { - "code": ">>> 12 - zero_minute_predicted_waiting_time*1.4/4 <= 0.35\nTrue", + "code": ">>> 12 - zero_minute_predicted_waiting_time * 1.4 / 4 <= 0.35\nTrue", "hidden": false, "locked": false }, { - "code": ">>> zero_minute_predicted_waiting_time - intercept <= 1e5\nTrue", + "code": ">>> zero_minute_predicted_waiting_time - intercept <= 100000.0\nTrue", "hidden": false, "locked": false }, { - "code": ">>> 2 - two_point_five_minute_predicted_waiting_time/35 <= 0.4\nTrue", + "code": ">>> 2 - two_point_five_minute_predicted_waiting_time / 35 <= 0.4\nTrue", "hidden": false, "locked": false }, { - "code": ">>> (26 - hour_predicted_waiting_time/30)/10 <= 0.43\nTrue", + "code": ">>> (26 - hour_predicted_waiting_time / 30) / 10 <= 0.43\nTrue", "hidden": false, "locked": false } @@ -1371,7 +1412,7 @@ { "cases": [ { - "code": ">>> np.allclose([wait_below_3(1), wait_below_3(3), wait_above_3(3), wait_above_3(6)], \n... [47.902151605742517, 60.603197182023813, 72.965413990538366, 89.281859197449506])\nTrue", + "code": ">>> np.allclose([wait_below_3(1), wait_below_3(3), wait_above_3(3), wait_above_3(6)], [47.90215160574252, 60.60319718202381, 72.96541399053837, 89.2818591974495])\nTrue", "hidden": false, "locked": false } @@ -1390,12 +1431,12 @@ { "cases": [ { - "code": ">>> np.allclose([predict_wait(x) for x in [1.5, 2.5]], \n... [51.077412999812836, 57.427935787953487])\nTrue", + "code": ">>> np.allclose([predict_wait(x) for x in [1.5, 2.5]], [51.077412999812836, 57.42793578795349])\nTrue", "hidden": false, "locked": false }, { - "code": ">>> np.allclose([predict_wait(x) for x in [3.5, 4.5]], \n... [75.684821525023551, 81.123636593993922])\nTrue", + "code": ">>> np.allclose([predict_wait(x) for x in [3.5, 4.5]], [75.68482152502355, 81.12363659399392])\nTrue", "hidden": false, "locked": false } diff --git a/lab/lab10/lab10.ipynb b/lab/lab10/lab10.ipynb index b537504..579e067 100644 --- a/lab/lab10/lab10.ipynb +++ b/lab/lab10/lab10.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -101,7 +101,10 @@ { "cell_type": "markdown", "id": "cell-lab10-5", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.0.1.** Knowing only what we've told you so far, what's the probability that the marble you're given was a large shiny marble?\n" ] @@ -312,7 +315,10 @@ { "cell_type": "markdown", "id": "cell-lab10-20", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Knowing nothing else about the marble, it's equally likely to be any of the marbles depicted above; this is because we've assumed that the marble is selected **uniformly at random** from the bag.\n", "\n", @@ -393,7 +399,10 @@ { "cell_type": "markdown", "id": "cell-lab10-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.1.** What's the probability Samantha gives you a shiny marble, knowing that she gave you a large marble? \n", "\n", @@ -448,7 +457,10 @@ { "cell_type": "markdown", "id": "cell-lab10-32", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.2.** Suppose instead Samantha had said she gave you a **shiny** marble (hooray!). What's the probability that the marble given to you is large? \n", "\n", @@ -504,7 +516,10 @@ { "cell_type": "markdown", "id": "cell-lab10-37", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "If you can, how? If not, why not? Check with your lab peers or a staff member to see if you are on the right track. \n" ] @@ -513,14 +528,6 @@ "cell_type": "markdown", "id": "cell-lab10-38", "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab10-39", - "metadata": {}, "source": [ "# 3. Cancer screening\n", "Hopefully the icon arrays from the previous portion helped you build intuition for why conditional probabilities can be helpful. Now, let\u2019s look at a real life application.\n", @@ -537,7 +544,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-40", + "id": "cell-lab10-39", "metadata": {}, "source": [ "## 3.1. Basic cancer statistics\n", @@ -559,7 +566,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-41", + "id": "cell-lab10-40", "metadata": {}, "outputs": [], "source": [ @@ -572,8 +579,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-42", - "metadata": {}, + "id": "cell-lab10-41", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "One way to visualize this dataset is with a contingency table, which you've seen before.\n", "\n", @@ -590,7 +600,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-43", + "id": "cell-lab10-42", "metadata": { "deletable": false, "scrolled": true, @@ -605,7 +615,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-44", + "id": "cell-lab10-43", "metadata": { "deletable": false, "editable": false @@ -617,7 +627,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-45", + "id": "cell-lab10-44", "metadata": {}, "source": [ "Here is the `people` data in an icon array." @@ -626,7 +636,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-46", + "id": "cell-lab10-45", "metadata": {}, "outputs": [], "source": [ @@ -635,7 +645,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-47", + "id": "cell-lab10-46", "metadata": {}, "source": [ "Now let's think about how you can use this kind of information when Person X is tested for cancer.\n", @@ -648,7 +658,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-48", + "id": "cell-lab10-47", "metadata": {}, "outputs": [], "source": [ @@ -658,8 +668,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-49", - "metadata": {}, + "id": "cell-lab10-48", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.2.** What's the chance that Person X has a positive test result, knowing nothing else about them?\n", "Run the next code cell to display an icon array, then assign `probability_positive_test` to this value." @@ -668,7 +681,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-50", + "id": "cell-lab10-49", "metadata": { "deletable": false, "tags": [] @@ -685,7 +698,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-51", + "id": "cell-lab10-50", "metadata": {}, "source": [ "## 3.2. Interpreting test results\n", @@ -699,7 +712,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-52", + "id": "cell-lab10-51", "metadata": {}, "outputs": [], "source": [ @@ -709,8 +722,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-53", - "metadata": {}, + "id": "cell-lab10-52", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "The *conditional probability* that Person X **has cancer given their positive test result** is the chance that they're in the first group (cancer), assuming they have a positive test result.\n", "\n", @@ -733,7 +749,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-54", + "id": "cell-lab10-53", "metadata": { "deletable": false, "tags": [] @@ -747,7 +763,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-55", + "id": "cell-lab10-54", "metadata": { "deletable": false, "editable": false @@ -759,8 +775,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-56", - "metadata": {}, + "id": "cell-lab10-55", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.2.2.** Now write code to calculate that probability exactly, using the original contingency table you wrote (the `cancer` table).\n", "\n", @@ -770,7 +789,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-57", + "id": "cell-lab10-56", "metadata": { "deletable": false, "tags": [] @@ -788,7 +807,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-58", + "id": "cell-lab10-57", "metadata": { "deletable": false, "editable": false @@ -800,26 +819,21 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-59", - "metadata": {}, + "id": "cell-lab10-58", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.2.3.** Look at the full icon array again. Using that, how would you compute the conditional probability of cancer given a positive test?\n", "\n", "Run the next code cell to see the full icon array. \n" ] }, - { - "cell_type": "markdown", - "id": "cell-lab10-60", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-61", + "id": "cell-lab10-59", "metadata": {}, "outputs": [], "source": [ @@ -830,23 +844,18 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-62", - "metadata": {}, + "id": "cell-lab10-60", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.2.4.** Is your answer to question 3.2.2 larger than the overall proportion of people in the population who have cancer (given as 1% in 3.1.1)? Does that make sense? Check with your peers or a staff member to see if you have the right idea. \n" ] }, { "cell_type": "markdown", - "id": "cell-lab10-63", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab10-64", + "id": "cell-lab10-61", "metadata": {}, "source": [ "# 4. A Subjective Prior\n", @@ -864,7 +873,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-65", + "id": "cell-lab10-62", "metadata": {}, "source": [ "## 4.1. A New Look at the Population\n", @@ -880,7 +889,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-66", + "id": "cell-lab10-63", "metadata": {}, "outputs": [], "source": [ @@ -895,7 +904,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-67", + "id": "cell-lab10-64", "metadata": {}, "outputs": [], "source": [ @@ -909,7 +918,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-68", + "id": "cell-lab10-65", "metadata": {}, "source": [ "As before, we can present the data above in an icon array. Here is the `people_new` data in such a format." @@ -918,7 +927,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-69", + "id": "cell-lab10-66", "metadata": {}, "outputs": [], "source": [ @@ -927,7 +936,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-70", + "id": "cell-lab10-67", "metadata": {}, "source": [ "## 4.2. Interpretation and Comparison\n", @@ -941,7 +950,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-71", + "id": "cell-lab10-68", "metadata": {}, "outputs": [], "source": [ @@ -951,8 +960,11 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-72", - "metadata": {}, + "id": "cell-lab10-69", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.2.1.** Using the new contingency table, `cancer_new`, that was provided at the beginning of this section, write code to calculate the exact probability that an individual has cancer given that they've received a positive test result.\n", "\n", @@ -962,7 +974,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-73", + "id": "cell-lab10-70", "metadata": { "deletable": false, "tags": [] @@ -980,7 +992,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-74", + "id": "cell-lab10-71", "metadata": { "deletable": false, "editable": false @@ -992,23 +1004,18 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-75", - "metadata": {}, + "id": "cell-lab10-72", + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.2.2.** How does your probability in 3.2.2 compare to your answer from 4.2.1? Does that make sense? Check with your peers or a staff member to see if you have the right idea. \n" ] }, { "cell_type": "markdown", - "id": "cell-lab10-76", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, - { - "cell_type": "markdown", - "id": "cell-lab10-77", + "id": "cell-lab10-73", "metadata": {}, "source": [ "## 5. Submission\n", @@ -1030,7 +1037,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-78", + "id": "cell-lab10-74", "metadata": { "deletable": false, "editable": false @@ -1044,7 +1051,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-lab10-79", + "id": "cell-lab10-75", "metadata": { "deletable": false, "editable": false @@ -1057,7 +1064,7 @@ }, { "cell_type": "markdown", - "id": "cell-lab10-80", + "id": "cell-lab10-76", "metadata": {}, "source": [ " " @@ -1092,12 +1099,12 @@ { "cases": [ { - "code": ">>> 0 < probability_large_shiny < 1 \nTrue", + "code": ">>> 0 < probability_large_shiny < 1\nTrue", "hidden": false, "locked": false }, { - "code": ">>> \n>>> import hashlib \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(np.round(probability_large_shiny, 3))\n'8404599d79837400f000c64a4fa1cc0e'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(np.round(probability_large_shiny, 3))\n'8404599d79837400f000c64a4fa1cc0e'", "hidden": false, "locked": false } @@ -1116,7 +1123,7 @@ { "cases": [ { - "code": ">>> import hashlib \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(np.round(probability_shiny, 3))\n'a83a7d5356406fb9bdbb4d93697cee38'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(np.round(probability_shiny, 3))\n'a83a7d5356406fb9bdbb4d93697cee38'", "hidden": false, "locked": false } @@ -1135,7 +1142,7 @@ { "cases": [ { - "code": ">>> \n>>> import hashlib \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(np.round(probability_shiny_given_large, 3))\n'57eeec0a6974ecb4e9fcf68fab052f7b'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(np.round(probability_shiny_given_large, 3))\n'57eeec0a6974ecb4e9fcf68fab052f7b'", "hidden": false, "locked": false } @@ -1154,7 +1161,7 @@ { "cases": [ { - "code": ">>> \n>>> import hashlib \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(np.round(probability_large_given_shiny, 3))\n'54fbf38cf649866815e0fefc46a1f6c7'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(np.round(probability_large_given_shiny, 3))\n'54fbf38cf649866815e0fefc46a1f6c7'", "hidden": false, "locked": false } @@ -1202,7 +1209,7 @@ { "cases": [ { - "code": ">>> \n>>> import hashlib \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(int(rough_prob_cancer_given_positive))\n'a87ff679a2f3e71d9181a67b7542122c'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(int(rough_prob_cancer_given_positive))\n'a87ff679a2f3e71d9181a67b7542122c'", "hidden": false, "locked": false } @@ -1221,7 +1228,7 @@ { "cases": [ { - "code": ">>> \n>>> import hashlib \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(np.round(prob_cancer_given_positive, 4))\n'15432ad172f7f319fe502e6337b3f549'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(np.round(prob_cancer_given_positive, 4))\n'15432ad172f7f319fe502e6337b3f549'", "hidden": false, "locked": false } @@ -1240,7 +1247,7 @@ { "cases": [ { - "code": ">>> \n>>> import hashlib \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> \n>>> get_hash(np.round(prob_cancer_given_positive_new, 4))\n'4f59b3fd6f5001542919c239391a3278'", + "code": ">>> import hashlib\n>>> \n>>> def get_hash(num):\n... \"\"\"Helper function for assessing correctness.\"\"\"\n... return hashlib.md5(str(num).encode()).hexdigest()\n>>> get_hash(np.round(prob_cancer_given_positive_new, 4))\n'4f59b3fd6f5001542919c239391a3278'", "hidden": false, "locked": false } diff --git a/lec/lec01.ipynb b/lectures/lec01.ipynb similarity index 100% rename from lec/lec01.ipynb rename to lectures/lec01.ipynb diff --git a/lec/lec03.ipynb b/lectures/lec03.ipynb similarity index 100% rename from lec/lec03.ipynb rename to lectures/lec03.ipynb diff --git a/lec/lec03_0.ipynb b/lectures/lec03_0.ipynb similarity index 100% rename from lec/lec03_0.ipynb rename to lectures/lec03_0.ipynb diff --git a/lec/lec04.ipynb b/lectures/lec04.ipynb similarity index 100% rename from lec/lec04.ipynb rename to lectures/lec04.ipynb diff --git a/lec/lec04_0.ipynb b/lectures/lec04_0.ipynb similarity index 100% rename from lec/lec04_0.ipynb rename to lectures/lec04_0.ipynb diff --git a/lec/lec05.ipynb b/lectures/lec05.ipynb similarity index 100% rename from lec/lec05.ipynb rename to lectures/lec05.ipynb diff --git a/lec/lec05_0.ipynb b/lectures/lec05_0.ipynb similarity index 100% rename from lec/lec05_0.ipynb rename to lectures/lec05_0.ipynb diff --git a/lec/lec06.ipynb b/lectures/lec06.ipynb similarity index 100% rename from lec/lec06.ipynb rename to lectures/lec06.ipynb diff --git a/lec/lec07.ipynb b/lectures/lec07.ipynb similarity index 100% rename from lec/lec07.ipynb rename to lectures/lec07.ipynb diff --git a/lec/lec08.ipynb b/lectures/lec08.ipynb similarity index 100% rename from lec/lec08.ipynb rename to lectures/lec08.ipynb diff --git a/lec/lec09.ipynb b/lectures/lec09.ipynb similarity index 100% rename from lec/lec09.ipynb rename to lectures/lec09.ipynb diff --git a/lec/lec09_0.ipynb b/lectures/lec09_0.ipynb similarity index 100% rename from lec/lec09_0.ipynb rename to lectures/lec09_0.ipynb diff --git a/lec/lec10.ipynb b/lectures/lec10.ipynb similarity index 100% rename from lec/lec10.ipynb rename to lectures/lec10.ipynb diff --git a/lec/lec11.ipynb b/lectures/lec11.ipynb similarity index 100% rename from lec/lec11.ipynb rename to lectures/lec11.ipynb diff --git a/lec/lec11_0.ipynb b/lectures/lec11_0.ipynb similarity index 100% rename from lec/lec11_0.ipynb rename to lectures/lec11_0.ipynb diff --git a/lec/lec12.ipynb b/lectures/lec12.ipynb similarity index 100% rename from lec/lec12.ipynb rename to lectures/lec12.ipynb diff --git a/lec/lec13.ipynb b/lectures/lec13.ipynb similarity index 100% rename from lec/lec13.ipynb rename to lectures/lec13.ipynb diff --git a/lec/lec14.ipynb b/lectures/lec14.ipynb similarity index 100% rename from lec/lec14.ipynb rename to lectures/lec14.ipynb diff --git a/lec/lec15.ipynb b/lectures/lec15.ipynb similarity index 100% rename from lec/lec15.ipynb rename to lectures/lec15.ipynb diff --git a/lec/lec16.ipynb b/lectures/lec16.ipynb similarity index 100% rename from lec/lec16.ipynb rename to lectures/lec16.ipynb diff --git a/lec/lec17.ipynb b/lectures/lec17.ipynb similarity index 100% rename from lec/lec17.ipynb rename to lectures/lec17.ipynb diff --git a/lec/lec18.ipynb b/lectures/lec18.ipynb similarity index 100% rename from lec/lec18.ipynb rename to lectures/lec18.ipynb diff --git a/lec/lec19.ipynb b/lectures/lec19.ipynb similarity index 100% rename from lec/lec19.ipynb rename to lectures/lec19.ipynb diff --git a/lec/lec20.ipynb b/lectures/lec20.ipynb similarity index 100% rename from lec/lec20.ipynb rename to lectures/lec20.ipynb diff --git a/lec/lec21.ipynb b/lectures/lec21.ipynb similarity index 100% rename from lec/lec21.ipynb rename to lectures/lec21.ipynb diff --git a/lec/lec23.ipynb b/lectures/lec23.ipynb similarity index 100% rename from lec/lec23.ipynb rename to lectures/lec23.ipynb diff --git a/lec/lec24.ipynb b/lectures/lec24.ipynb similarity index 100% rename from lec/lec24.ipynb rename to lectures/lec24.ipynb diff --git a/lec/lec25.ipynb b/lectures/lec25.ipynb similarity index 100% rename from lec/lec25.ipynb rename to lectures/lec25.ipynb diff --git a/lec/lec26.ipynb b/lectures/lec26.ipynb similarity index 100% rename from lec/lec26.ipynb rename to lectures/lec26.ipynb diff --git a/lec/lec27.ipynb b/lectures/lec27.ipynb similarity index 100% rename from lec/lec27.ipynb rename to lectures/lec27.ipynb diff --git a/lec/lec28.ipynb b/lectures/lec28.ipynb similarity index 100% rename from lec/lec28.ipynb rename to lectures/lec28.ipynb diff --git a/lec/lec29.ipynb b/lectures/lec29.ipynb similarity index 100% rename from lec/lec29.ipynb rename to lectures/lec29.ipynb diff --git a/lec/lec30.ipynb b/lectures/lec30.ipynb similarity index 100% rename from lec/lec30.ipynb rename to lectures/lec30.ipynb diff --git a/lec/lec31.ipynb b/lectures/lec31.ipynb similarity index 100% rename from lec/lec31.ipynb rename to lectures/lec31.ipynb diff --git a/lec/lec32.ipynb b/lectures/lec32.ipynb similarity index 100% rename from lec/lec32.ipynb rename to lectures/lec32.ipynb diff --git a/lec/lec33.ipynb b/lectures/lec33.ipynb similarity index 100% rename from lec/lec33.ipynb rename to lectures/lec33.ipynb diff --git a/lec/lec35.ipynb b/lectures/lec35.ipynb similarity index 100% rename from lec/lec35.ipynb rename to lectures/lec35.ipynb diff --git a/lec/lec36.ipynb b/lectures/lec36.ipynb similarity index 100% rename from lec/lec36.ipynb rename to lectures/lec36.ipynb diff --git a/lec/lec37.ipynb b/lectures/lec37.ipynb similarity index 100% rename from lec/lec37.ipynb rename to lectures/lec37.ipynb diff --git a/lec/lec38.ipynb b/lectures/lec38.ipynb similarity index 100% rename from lec/lec38.ipynb rename to lectures/lec38.ipynb diff --git a/lec/lec38_0.ipynb b/lectures/lec38_0.ipynb similarity index 100% rename from lec/lec38_0.ipynb rename to lectures/lec38_0.ipynb diff --git a/lec/lec39.ipynb b/lectures/lec39.ipynb similarity index 100% rename from lec/lec39.ipynb rename to lectures/lec39.ipynb diff --git a/project/project1/project1.ipynb b/project/project1/project1.ipynb index cff6779..fde8d8e 100644 --- a/project/project1/project1.ipynb +++ b/project/project1/project1.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -164,7 +164,10 @@ { "cell_type": "markdown", "id": "cell-project1-10", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Create a table called `b_pop` that has two columns labeled `time` and `population_total`. The first column should contain the years from 1970 through 2020 (including both 1970 and 2020) and the second should contain the population of Bangladesh in each of those years.\n" ] @@ -225,7 +228,10 @@ { "cell_type": "markdown", "id": "cell-project1-15", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Assign `initial` to an array that contains the population for every five year interval from 1970 to 2015 (inclusive). Then, assign `changed` to an array that contains the population for every five year interval from 1975 to 2020 (inclusive). The first array should include both 1970 and 2015, and the second array should include both 1975 and 2020. You should use the `b_five` table to create both arrays, by first filtering the table to only contain the relevant years.\n", "\n", @@ -344,7 +350,10 @@ { "cell_type": "markdown", "id": "cell-project1-25", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -370,7 +379,10 @@ { "cell_type": "markdown", "id": "cell-project1-27", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -405,7 +417,10 @@ { "cell_type": "markdown", "id": "cell-project1-30", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** Complete the function `fertility_over_time`. It takes the Alpha-3 code of a country as `country_code` and a `start` year. It returns a two-column table with labels `Year` and `Children per woman` that can be used to generate a line chart of the country's fertility rate each year, starting at the `start` year. The plot should include the `start` year and all later years that appear in the `fertility` table. \n", "\n", @@ -449,7 +464,10 @@ { "cell_type": "markdown", "id": "cell-project1-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -468,7 +486,8 @@ "cell_type": "markdown", "id": "cell-project1-35", "metadata": { - "deletable": false + "deletable": false, + "editable": false }, "source": [ "\n", @@ -479,7 +498,10 @@ { "cell_type": "markdown", "id": "cell-project1-36", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 7.** Using both the `fertility` and `child_mortality` tables, draw a scatter diagram that has Bangladesh's total fertility on the horizontal axis and its child mortality on the vertical axis with one point for each year, starting with 1970.\n", "\n", @@ -539,7 +561,10 @@ { "cell_type": "markdown", "id": "cell-project1-40", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -557,7 +582,10 @@ { "cell_type": "markdown", "id": "cell-project1-42", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -654,7 +682,10 @@ { "cell_type": "markdown", "id": "cell-project1-50", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 9.** Assign `fertility_statements` to an array of the numbers of each statement below that can be correctly inferred from these histograms.\n", "\n", @@ -695,7 +726,10 @@ { "cell_type": "markdown", "id": "cell-project1-53", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -721,7 +755,10 @@ { "cell_type": "markdown", "id": "cell-project1-55", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -778,7 +815,10 @@ { "cell_type": "markdown", "id": "cell-project1-58", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 12.** Create a table called `pop_by_decade` with two columns called `decade` and `population`, in this order. It has a row for each year that starts a decade, in increasing order starting with 1960 and ending with 2020. For example, 1960 is the start of the 1960's decade. The `population` column contains the total population of all countries included in the result of `stats_for_year(year)` for the first `year` of the decade. You should see that these countries contain most of the world's population.\n", "\n", @@ -819,7 +859,10 @@ { "cell_type": "markdown", "id": "cell-project1-61", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Now that you've defined your helper function (if you've chosen to do so), define the `pop_by_decade` table.\n" ] @@ -878,7 +921,10 @@ { "cell_type": "markdown", "id": "cell-project1-66", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 13.** Create a table called `region_counts`. It should contain two columns called `region` and `count`. The `region` column should contain regions of the world, and the `count` column should contain the number of countries in each region that appears in the result of `stats_for_year(2020)`. \n", "\n", @@ -959,7 +1005,10 @@ { "cell_type": "markdown", "id": "cell-project1-71", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 14.** Assign `scatter_statements` to an array of the numbers of each statement below that can be inferred from this scatter diagram for 1960. \n", "1. As a whole, the `europe_central_asia` region had the lowest child mortality rate.\n", @@ -1106,7 +1155,10 @@ { "cell_type": "markdown", "id": "cell-project1-82", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.** Assign `latest_poverty` to a three-column table with one row for each country that appears in the `poverty` table. The first column should contain the 3-letter code for the country. The second column should contain the most recent year for which an extreme poverty rate is available for the country. The third column should contain the poverty rate in that year. **Do not change the last line, so that the labels of your table are set correctly.**\n", "\n", @@ -1147,7 +1199,10 @@ { "cell_type": "markdown", "id": "cell-project1-85", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.** Using both `latest_poverty` and `population`, create a four-column table called `recent_poverty_total` with one row for each country in `latest_poverty`. The four columns should have the following labels and contents:\n", "1. `geo` contains the 3-letter country code,\n", @@ -1189,7 +1244,10 @@ { "cell_type": "markdown", "id": "cell-project1-88", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.** Assign the name `poverty_percent` to the known percentage of the world\u2019s 2010 population that were living in extreme poverty. Assume that the `poverty_total` numbers in the `recent_poverty_total` table describe **all** people in 2010 living in extreme poverty. You should get a number that is above the 2018 global estimate of 9%, since many country-specific poverty rates are older than 2018.\n", "\n", @@ -1252,7 +1310,10 @@ { "cell_type": "markdown", "id": "cell-project1-93", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.** Using both `countries` and `recent_poverty_total`, create a five-column table called `poverty_map` with one row for every country in `recent_poverty_total`. The five columns should have the following labels and contents:\n", "1. `latitude` contains the country's latitude,\n", @@ -1336,7 +1397,10 @@ { "cell_type": "markdown", "id": "cell-project1-99", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 5.** Assign `largest` to a two-column table with the `name` (not the 3-letter code) and `poverty_total` of the 10 countries with the largest number of people living in extreme poverty.\n", "\n", @@ -1373,7 +1437,10 @@ { "cell_type": "markdown", "id": "cell-project1-102", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1396,7 +1463,8 @@ "cell_type": "markdown", "id": "cell-project1-104", "metadata": { - "deletable": false + "deletable": false, + "editable": false }, "source": [ "\n", @@ -1406,18 +1474,10 @@ "*Hint:* This question is long. Feel free to create cells and experiment. You can create cells by going to the toolbar and hitting the `+` button, or by going to the `Insert` tab." ] }, - { - "cell_type": "markdown", - "id": "cell-project1-105", - "metadata": {}, - "source": [ - "_Type your answer here, replacing this text._" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "cell-project1-106", + "id": "cell-project1-105", "metadata": { "tags": [] }, @@ -1437,7 +1497,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-project1-107", + "id": "cell-project1-106", "metadata": { "deletable": false, "tags": [] @@ -1453,7 +1513,7 @@ }, { "cell_type": "markdown", - "id": "cell-project1-108", + "id": "cell-project1-107", "metadata": { "deletable": false }, @@ -1468,7 +1528,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-project1-109", + "id": "cell-project1-108", "metadata": { "deletable": false, "editable": false @@ -1483,7 +1543,7 @@ }, { "cell_type": "markdown", - "id": "cell-project1-110", + "id": "cell-project1-109", "metadata": { "deletable": false }, @@ -1505,7 +1565,7 @@ }, { "cell_type": "markdown", - "id": "cell-project1-111", + "id": "cell-project1-110", "metadata": { "deletable": false, "editable": false @@ -1519,7 +1579,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cell-project1-112", + "id": "cell-project1-111", "metadata": { "deletable": false, "editable": false @@ -1532,7 +1592,7 @@ }, { "cell_type": "markdown", - "id": "cell-project1-113", + "id": "cell-project1-112", "metadata": {}, "source": [ " " @@ -1570,12 +1630,12 @@ { "cases": [ { - "code": ">>> # Check your column labels and spelling\n>>> b_pop.labels == ('time', 'population_total')\nTrue", + "code": ">>> b_pop.labels == ('time', 'population_total')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Times should range from 1970 through 2020\n>>> all(b_pop.sort(\"time\").column(\"time\") == np.arange(1970, 2021))\nTrue", + "code": ">>> all(b_pop.sort('time').column('time') == np.arange(1970, 2021))\nTrue", "hidden": false, "locked": false } @@ -1599,12 +1659,12 @@ { "cases": [ { - "code": ">>> # Incorrect labels for columns\n>>> t = stats_for_year(1990)\n>>> t.labels == ('geo', 'population_total', 'children_per_woman_total_fertility', 'child_mortality_under_5_per_1000_born')\nTrue", + "code": ">>> t = stats_for_year(1990)\n>>> t.labels == ('geo', 'population_total', 'children_per_woman_total_fertility', 'child_mortality_under_5_per_1000_born')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Incorrect number of rows\n>>> t = stats_for_year(1990)\n>>> t.num_rows\n50", + "code": ">>> t = stats_for_year(1990)\n>>> t.num_rows\n50", "hidden": false, "locked": false }, @@ -1636,12 +1696,12 @@ { "cases": [ { - "code": ">>> # Check your column labels and spelling\n>>> pop_by_decade.labels == ('decade', 'population')\nTrue", + "code": ">>> pop_by_decade.labels == ('decade', 'population')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # The first year of the 1960's is 1960.\n>>> pop_by_decade.column(0).item(0) == 1960\nTrue", + "code": ">>> pop_by_decade.column(0).item(0) == 1960\nTrue", "hidden": false, "locked": false } @@ -1702,12 +1762,12 @@ { "cases": [ { - "code": ">>> # Check your column labels and spelling\n>>> region_counts.labels == ('region', 'count')\nTrue", + "code": ">>> region_counts.labels == ('region', 'count')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Counts must sum to 50\n>>> sum(region_counts.column('count')) == 50\nTrue", + "code": ">>> sum(region_counts.column('count')) == 50\nTrue", "hidden": false, "locked": false } @@ -1728,7 +1788,7 @@ { "cases": [ { - "code": ">>> # Please use a list of integers from 1 to 5\n>>> all(x in range(1, 6) for x in set(scatter_statements))\nTrue", + "code": ">>> all((x in range(1, 6) for x in set(scatter_statements)))\nTrue", "hidden": false, "locked": false } @@ -1755,7 +1815,7 @@ "locked": false }, { - "code": ">>> # Compute the annual exponential growth rate\n>>> max(b_five_growth.column(2)) < 0.03\nTrue", + "code": ">>> max(b_five_growth.column(2)) < 0.03\nTrue", "hidden": false, "locked": false } @@ -1778,17 +1838,17 @@ { "cases": [ { - "code": ">>> # Check your column labels and spelling\n>>> fertility_over_time('usa', 2010).labels == ('Year', 'Children per woman')\nTrue", + "code": ">>> fertility_over_time('usa', 2010).labels == ('Year', 'Children per woman')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Check that you use the start year to determine the data range.\n>>> all(fertility_over_time('usa', 2010).column('Year') == np.arange(2010, 2021))\nTrue", + "code": ">>> all(fertility_over_time('usa', 2010).column('Year') == np.arange(2010, 2021))\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Check that you use the start year to determine the data range.\n>>> all(fertility_over_time('usa', 2005).column('Year') == np.arange(2005, 2021))\nTrue", + "code": ">>> all(fertility_over_time('usa', 2005).column('Year') == np.arange(2005, 2021))\nTrue", "hidden": false, "locked": false } @@ -1810,12 +1870,12 @@ { "cases": [ { - "code": ">>> # Make sure you are using the date range 1970-2020\n>>> post_1969_fertility_and_child_mortality.num_rows\n51", + "code": ">>> post_1969_fertility_and_child_mortality.num_rows\n51", "hidden": false, "locked": false }, { - "code": ">>> # Check your column labels and spelling\n>>> all([label in post_1969_fertility_and_child_mortality.labels for label in ['Children per woman', 'Child deaths per 1000 born']])\nTrue", + "code": ">>> all([label in post_1969_fertility_and_child_mortality.labels for label in ['Children per woman', 'Child deaths per 1000 born']])\nTrue", "hidden": false, "locked": false } @@ -1836,7 +1896,7 @@ { "cases": [ { - "code": ">>> # Please use a list of integers from 1 to 6\n>>> all(x in range(1, 7) for x in set(fertility_statements))\nTrue", + "code": ">>> all((x in range(1, 7) for x in set(fertility_statements)))\nTrue", "hidden": false, "locked": false } @@ -1858,12 +1918,12 @@ { "cases": [ { - "code": ">>> # Please don't edit the last line.\n>>> latest_poverty.labels == ('geo', 'time', 'poverty_percent')\nTrue", + "code": ">>> latest_poverty.labels == ('geo', 'time', 'poverty_percent')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # The result should have one row per country.\n>>> latest_poverty.num_rows\n145", + "code": ">>> latest_poverty.num_rows\n145", "hidden": false, "locked": false } @@ -1886,17 +1946,17 @@ { "cases": [ { - "code": ">>> # Check your column labels and spelling\n>>> recent_poverty_total.labels == ('geo', 'poverty_percent', 'population_total', 'poverty_total')\nTrue", + "code": ">>> recent_poverty_total.labels == ('geo', 'poverty_percent', 'population_total', 'poverty_total')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Careful, the population of Australia in 2010 was 22,154,687\n>>> recent_poverty_total.where('geo', 'aus').column(2).item(0)\n22154687", + "code": ">>> recent_poverty_total.where('geo', 'aus').column(2).item(0)\n22154687", "hidden": false, "locked": false }, { - "code": ">>> # The number of people estimated to be living in extreme poverty\n>>> # in Australia should be 301,304. That's 22,154,687 * 0.0136\n>>> # rounded to the nearest integer.\n>>> float(recent_poverty_total.where('geo', 'aus').column(3).item(0))\n301304.0", + "code": ">>> float(recent_poverty_total.where('geo', 'aus').column(3).item(0))\n301304.0", "hidden": false, "locked": false } @@ -1939,12 +1999,12 @@ { "cases": [ { - "code": ">>> # Check your column labels and spelling\n>>> poverty_map.labels == ('latitude', 'longitude', 'name', 'region', 'poverty_total')\nTrue", + "code": ">>> poverty_map.labels == ('latitude', 'longitude', 'name', 'region', 'poverty_total')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # Something is wrong with your region column.\n>>> list(np.sort(np.unique(poverty_map.column('region'))))\n['africa', 'americas', 'asia', 'europe']", + "code": ">>> list(np.sort(np.unique(poverty_map.column('region'))))\n['africa', 'americas', 'asia', 'europe']", "hidden": false, "locked": false } @@ -1967,17 +2027,17 @@ { "cases": [ { - "code": ">>> # Check your column labels and spelling\n>>> largest.labels == ('name', 'poverty_total')\nTrue", + "code": ">>> largest.labels == ('name', 'poverty_total')\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # India is the country with the largest number of people living\n>>> # in extreme poverty.\n>>> largest.column(0).item(0)\n'India'", + "code": ">>> largest.column(0).item(0)\n'India'", "hidden": false, "locked": false }, { - "code": ">>> # The table should contain exactly 10 rows.\n>>> largest.num_rows\n10", + "code": ">>> largest.num_rows\n10", "hidden": false, "locked": false } diff --git a/project/project2/project2.ipynb b/project/project2/project2.ipynb index 3e4a93c..b62129e 100644 --- a/project/project2/project2.ipynb +++ b/project/project2/project2.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -147,7 +147,10 @@ { "cell_type": "markdown", "id": "cell-project2-10", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -171,7 +174,10 @@ { "cell_type": "markdown", "id": "cell-project2-12", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -192,7 +198,10 @@ { "cell_type": "markdown", "id": "cell-project2-14", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -210,7 +219,10 @@ { "cell_type": "markdown", "id": "cell-project2-16", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -263,7 +275,10 @@ { "cell_type": "markdown", "id": "cell-project2-20", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.4**: Define the `coordinates_to_region` function below. It should take in two arguments, a city's latitude (`lat`) and longitude (`lon`) coordinates, and output a string representing the region it is located in.\n" ] @@ -297,7 +312,10 @@ { "cell_type": "markdown", "id": "cell-project2-23", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.1.5**: Add a new column in `cities` labeled `Region` that contains the region in which the city is located. For full credit, you must use the `coordinates_to_region` function you defined rather than reimplementing its logic.\n" ] @@ -424,7 +442,10 @@ { "cell_type": "markdown", "id": "cell-project2-33", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.1:** Assign the variable `largest_2010_range_date` to the date of the **largest temperature range** in Phoenix, Arizona for any day between January 1st, 2010 and December 31st, 2010. Your answer should be a string in the \"YYYY-MM-DD\" format. Feel free to use as many lines as you need. A temperature range is calculated as the difference between the max and min temperatures for the day.\n", "\n", @@ -524,7 +545,10 @@ { "cell_type": "markdown", "id": "cell-project2-41", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.2:** Add two new columns called `Year` and `Month` to the `phoenix` table that contain the year as an **integer** and the month as a **string** (such as `\"04 (Apr)\"`) for each day, respectively. \n", "\n", @@ -562,7 +586,10 @@ { "cell_type": "markdown", "id": "cell-project2-44", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -586,7 +613,10 @@ { "cell_type": "markdown", "id": "cell-project2-46", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -606,7 +636,10 @@ { "cell_type": "markdown", "id": "cell-project2-48", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -616,7 +649,10 @@ { "cell_type": "markdown", "id": "cell-project2-49", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.5:** Create a `monthly_increases` table with one row per month and the following four columns in order: \n", "1. `\"Month\"`: The month (such as `\"02 (Feb)\"`)\n", @@ -733,7 +769,10 @@ { "cell_type": "markdown", "id": "cell-project2-58", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.6.** Complete the implementation of the function `ci_lower`, which takes a one-column table `t` containing sample observations and a confidence `level` percentage such as 95 or 99. It returns the lower bound of a confidence interval for the population mean constructed using 5,000 bootstrap resamples.\n", "\n", @@ -780,7 +819,10 @@ { "cell_type": "markdown", "id": "cell-project2-61", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -800,7 +842,10 @@ { "cell_type": "markdown", "id": "cell-project2-63", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -810,7 +855,10 @@ { "cell_type": "markdown", "id": "cell-project2-64", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 1.2.8.** Repeat the process of comparing the **lower bound of a 99% confidence interval** to the **past average** for each month. For each month, print out the name of the month (e.g., `02 (Feb)`), the observed past average, and the lower bound of a confidence interval for the present average.\n", "\n", @@ -865,7 +913,10 @@ { "cell_type": "markdown", "id": "cell-project2-67", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -886,6 +937,8 @@ "cell_type": "markdown", "id": "cell-project2-69", "metadata": { + "deletable": false, + "editable": false, "tags": [] }, "source": [ @@ -982,7 +1035,10 @@ { "cell_type": "markdown", "id": "cell-project2-77", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.1.** Create a table `totals` that has one row for each year in chronological order. It should contain the following columns:\n", "1. `\"Year\"`: The year (a number)\n", @@ -1072,7 +1128,10 @@ { "cell_type": "markdown", "id": "cell-project2-85", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1095,7 +1154,10 @@ { "cell_type": "markdown", "id": "cell-project2-87", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1129,7 +1191,10 @@ { "cell_type": "markdown", "id": "cell-project2-89", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1139,7 +1204,10 @@ { "cell_type": "markdown", "id": "cell-project2-90", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.4.** Our next step is to choose a test statistic based on our alternative hypothesis in Question 2.2. Which of the following options are valid choices for the test statistic? Assign `ab_test_stat` to an array of integers corresponding to valid choices. Assume averages and totals are taken over the total precipitation sums for each year.\n", "\n", @@ -1179,7 +1247,10 @@ { "cell_type": "markdown", "id": "cell-project2-93", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1197,7 +1268,10 @@ { "cell_type": "markdown", "id": "cell-project2-95", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1227,7 +1301,10 @@ { "cell_type": "markdown", "id": "cell-project2-98", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.6.** For our A/B test, we'll use the difference between the average precipitation in drought years and the average precipitation in other years as our test statistic:\n", "\n", @@ -1280,7 +1357,10 @@ { "cell_type": "markdown", "id": "cell-project2-102", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.7.** Write a function to simulate the test statistic under the null hypothesis. The `simulate_precipitation_null` function should simulate the null hypothesis once (not 5,000 times) and return the value of the test statistic for that simulated sample.\n", "\n", @@ -1319,7 +1399,10 @@ { "cell_type": "markdown", "id": "cell-project2-105", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.8.** Fill in the blanks below to complete the simulation for the hypothesis test. Your simulation should compute 5,000 values of the test statistic under the null hypothesis and store the result in the array `sampled_stats`.\n", "\n", @@ -1365,7 +1448,10 @@ { "cell_type": "markdown", "id": "cell-project2-108", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 2.9.** Compute the p-value for this hypothesis test, and assign it to the variable `precipitation_p_val`.\n" ] @@ -1399,7 +1485,10 @@ { "cell_type": "markdown", "id": "cell-project2-111", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1417,7 +1506,10 @@ { "cell_type": "markdown", "id": "cell-project2-113", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1437,7 +1529,10 @@ { "cell_type": "markdown", "id": "cell-project2-115", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1542,7 +1637,7 @@ "locked": false }, { - "code": ">>> # Make sure that num_unique_cities is greater than zero!\n>>> num_unique_cites > 0\nTrue", + "code": ">>> num_unique_cites > 0\nTrue", "hidden": false, "locked": false } @@ -1563,7 +1658,7 @@ { "cases": [ { - "code": ">>> # Make sure the function outputs a string!\n>>> type(coordinates_to_region(50, 100)) == str\nTrue", + "code": ">>> type(coordinates_to_region(50, 100)) == str\nTrue", "hidden": false, "locked": false } @@ -1591,7 +1686,7 @@ "locked": false }, { - "code": ">>> cities.labels[-1] == \"Region\"\nTrue", + "code": ">>> cities.labels[-1] == 'Region'\nTrue", "hidden": false, "locked": false }, @@ -1683,7 +1778,7 @@ "locked": false }, { - "code": ">>> # Make sure your table is sorted from January to December.\n>>> monthly_increases.row(2).item('Month') == '03 (Mar)'\nTrue", + "code": ">>> monthly_increases.row(2).item('Month') == '03 (Mar)'\nTrue", "hidden": false, "locked": false } diff --git a/project/project3/project3.ipynb b/project/project3/project3.ipynb index 7b3ade0..21bd2a6 100644 --- a/project/project3/project3.ipynb +++ b/project/project3/project3.ipynb @@ -11,7 +11,7 @@ "outputs": [], "source": [ "# The pip install can take a minute\n", - "%pip install -q urllib3<2.0 otter-grader==4.4.1 datascience ipywidgets\n", + "%pip install -q urllib3<2.0 otter-grader==5.5.0 datascience ipywidgets\n", "import pyodide_http\n", "pyodide_http.patch_all()\n", "\n", @@ -185,7 +185,10 @@ { "cell_type": "markdown", "id": "cell-project3-13", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 1.0\n", "Set `expected_row_sum` to the number that you __expect__ will result from summing all proportions in each row, excluding the first five columns. Think about what any one row adds up to.\n" @@ -267,7 +270,10 @@ { "cell_type": "markdown", "id": "cell-project3-20", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 1.1.1\n", "Using `vocab_table`, find the stemmed version of the word \"elements\" and assign the value to `stemmed_message`.\n" @@ -302,7 +308,10 @@ { "cell_type": "markdown", "id": "cell-project3-23", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 1.1.2\n", "What stem in the dataset has the most words that are shortened to it? Assign `most_stem` to that stem. \n" @@ -337,7 +346,10 @@ { "cell_type": "markdown", "id": "cell-project3-26", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 1.1.3\n", "What is the longest word in the dataset whose stem wasn't shortened? Assign that to `longest_uncut`. Break ties alphabetically from Z to A (so if your options are \"cat\" or \"bat\", you should pick \"cat\"). Note that when sorting letters, the letter `a` is smaller than the letter `z`.\n", @@ -387,7 +399,10 @@ { "cell_type": "markdown", "id": "cell-project3-29", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 1.1.4\n", "How many stems have only one word that is shortened to them? For example, if the stem \"book\" only maps to the word \"books\" and if the stem \"a\" only maps to the word \"a,\" both should be counted as stems that map only to a single word.\n", @@ -460,7 +475,10 @@ { "cell_type": "markdown", "id": "cell-project3-35", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 1.2.1\n", "Looking at that chart it is difficult to see if there is an association. Calculate the correlation coefficient for the potential linear association between proportion of words that are \"outer\" and the proportion of words that are \"space\" for every movie in the dataset, and assign it to `outer_space_r`. \n", @@ -504,7 +522,10 @@ { "cell_type": "markdown", "id": "cell-project3-38", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -550,7 +571,10 @@ { "cell_type": "markdown", "id": "cell-project3-40", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -632,7 +656,10 @@ { "cell_type": "markdown", "id": "cell-project3-45", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -665,7 +692,10 @@ { "cell_type": "markdown", "id": "cell-project3-47", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -730,7 +760,10 @@ { "cell_type": "markdown", "id": "cell-project3-50", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 2.1.1\n", "\n", @@ -796,7 +829,10 @@ { "cell_type": "markdown", "id": "cell-project3-55", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 2.1.2\n", "Complete the function `distance_two_features` that computes the Euclidean distance between any two movies, using two features. The last two lines call your function to show that *Monty Python and the Holy Grail* is closer to *The Silence of the Lambs* than it is to *Clerks*. \n" @@ -842,7 +878,10 @@ { "cell_type": "markdown", "id": "cell-project3-58", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 2.1.3\n", "Define the function `distance_from_python` so that it works as described in its documentation. \n", @@ -890,7 +929,10 @@ { "cell_type": "markdown", "id": "cell-project3-61", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 2.1.4\n", "\n", @@ -935,7 +977,10 @@ { "cell_type": "markdown", "id": "cell-project3-64", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 2.1.5\n", "Next, we'll clasify *Monty Python and the Holy Grail* based on the genres of the closest movies. \n", @@ -1070,7 +1115,10 @@ { "cell_type": "markdown", "id": "cell-project3-74", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 3.0\n", "Write a function called `distance` to compute the Euclidean distance between two **arrays** of **numerical** features (e.g. arrays of the proportions of times that different words appear). The function should be able to calculate the Euclidean distance between two arrays of arbitrary (but equal) length.\n", @@ -1147,7 +1195,10 @@ { "cell_type": "markdown", "id": "cell-project3-80", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Questions 3.1.1 through 3.1.4 will ask you to interpret the plot above. For each question, select one of the following choices and assign its number to the provided name.\n", "1. The word is common in both comedy and thriller movies \n", @@ -1190,7 +1241,10 @@ { "cell_type": "markdown", "id": "cell-project3-83", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.2**\n", "\n", @@ -1226,7 +1280,10 @@ { "cell_type": "markdown", "id": "cell-project3-86", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.3**\n", "\n", @@ -1262,7 +1319,10 @@ { "cell_type": "markdown", "id": "cell-project3-89", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.4**\n", "\n", @@ -1298,7 +1358,10 @@ { "cell_type": "markdown", "id": "cell-project3-92", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.1.5**\n", "\n", @@ -1336,7 +1399,10 @@ { "cell_type": "markdown", "id": "cell-project3-95", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 3.1.6\n", "Using the plot above, make an array of at least 10 common words that you think might let you **distinguish** between comedy and thriller movies. Make sure to choose words that are **frequent enough** that every movie contains at least one of them. Don't just choose the most frequent words though--you can do much better.\n" @@ -1385,7 +1451,10 @@ { "cell_type": "markdown", "id": "cell-project3-99", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1404,7 +1473,10 @@ { "cell_type": "markdown", "id": "cell-project3-101", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1464,7 +1536,10 @@ { "cell_type": "markdown", "id": "cell-project3-105", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 3.1.8\n", "Use the `fast_distances` function provided above to compute the distance from the first movie in your test set to all the movies in your training set, **using your set of features**. Make a new table called `genre_and_distances` with one row for each movie in the training set and two columns:\n", @@ -1507,7 +1582,10 @@ { "cell_type": "markdown", "id": "cell-project3-108", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 3.1.9\n", "Now compute the 7-nearest neighbors classification of the first movie in the test set. That is, decide on its genre by finding the most common genre among its 7 nearest neighbors in the training set, according to the distances you've calculated. Then check whether your classifier chose the right genre. (Depending on the features you chose, your classifier might not get this movie right, and that's okay.)\n", @@ -1561,7 +1639,10 @@ { "cell_type": "markdown", "id": "cell-project3-112", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 3.2.1\n", "Write a function called `classify`. It should take the following four arguments:\n", @@ -1606,7 +1687,10 @@ { "cell_type": "markdown", "id": "cell-project3-115", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 3.2.2\n", "\n", @@ -1655,7 +1739,10 @@ { "cell_type": "markdown", "id": "cell-project3-119", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "#### Question 3.2.3\n", "Create a classification function that takes as its argument a row containing your 10 features and classifies that row using the 15-nearest neighbors algorithm with `train_my_features` as its training set. \n" @@ -1702,7 +1789,10 @@ { "cell_type": "markdown", "id": "cell-project3-123", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "Now that it's easy to use the classifier, let's see how accurate it is on the whole test set.\n", "\n", @@ -1746,7 +1836,10 @@ { "cell_type": "markdown", "id": "cell-project3-126", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 3.3.2** \n", "\n", @@ -1790,7 +1883,10 @@ { "cell_type": "markdown", "id": "cell-project3-129", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1810,7 +1906,10 @@ { "cell_type": "markdown", "id": "cell-project3-131", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1849,7 +1948,10 @@ { "cell_type": "markdown", "id": "cell-project3-134", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "**Question 4.1** \n", "\n", @@ -1899,7 +2001,10 @@ { "cell_type": "markdown", "id": "cell-project3-137", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1921,7 +2026,10 @@ { "cell_type": "markdown", "id": "cell-project3-139", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -1943,7 +2051,10 @@ { "cell_type": "markdown", "id": "cell-project3-141", - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false + }, "source": [ "\n", "\n", @@ -2234,7 +2345,7 @@ { "cases": [ { - "code": ">>> 0 < one_distance < .01\nTrue", + "code": ">>> 0 < one_distance < 0.01\nTrue", "hidden": false, "locked": false } @@ -2256,12 +2367,12 @@ { "cases": [ { - "code": ">>> correct_dis = 0.001406116\n>>> dis = distance_two_features(\"clerks.\", \"the godfather\", \"water\", \"feel\")\n>>> np.isclose(np.round(dis, 9), correct_dis) # Make sure you can use any two movies\nTrue", + "code": ">>> correct_dis = 0.001406116\n>>> dis = distance_two_features('clerks.', 'the godfather', 'water', 'feel')\n>>> np.isclose(np.round(dis, 9), correct_dis)\nTrue", "hidden": false, "locked": false }, { - "code": ">>> correct_dis = 0.005647119\n>>> dis = distance_two_features(\"clerks.\", \"the godfather\", \"your\", \"that\")\n>>> np.isclose(np.round(dis, 9), correct_dis) # Make sure you can use any two features\nTrue", + "code": ">>> correct_dis = 0.005647119\n>>> dis = distance_two_features('clerks.', 'the godfather', 'your', 'that')\n>>> np.isclose(np.round(dis, 9), correct_dis)\nTrue", "hidden": false, "locked": false } @@ -2315,7 +2426,7 @@ "locked": false }, { - "code": ">>> close_movies.column(\"Title\").item(0) != \"monty python and the holy grail\" # Make sure that you are using the training set.\nTrue", + "code": ">>> close_movies.column('Title').item(0) != 'monty python and the holy grail'\nTrue", "hidden": false, "locked": false } @@ -2390,7 +2501,7 @@ { "cases": [ { - "code": ">>> bottom_left >= 1 and bottom_left <= 5 # It looks like you've chosen an illegal option (not within 1-5)\nTrue", + "code": ">>> bottom_left >= 1 and bottom_left <= 5\nTrue", "hidden": false, "locked": false } @@ -2411,7 +2522,7 @@ { "cases": [ { - "code": ">>> bottom_right >= 1 and bottom_right <= 5 # It looks like you've chosen an illegal option (not within 1-5)\nTrue", + "code": ">>> bottom_right >= 1 and bottom_right <= 5\nTrue", "hidden": false, "locked": false } @@ -2432,7 +2543,7 @@ { "cases": [ { - "code": ">>> top_right >= 1 and top_right <= 5 # It looks like you've chosen an illegal option (not within 1-5)\nTrue", + "code": ">>> top_right >= 1 and top_right <= 5\nTrue", "hidden": false, "locked": false } @@ -2453,7 +2564,7 @@ { "cases": [ { - "code": ">>> top_left >= 1 and top_left <= 5 # It looks like you've chosen an illegal option (not within 1-5)\nTrue", + "code": ">>> top_left >= 1 and top_left <= 5\nTrue", "hidden": false, "locked": false } @@ -2509,17 +2620,17 @@ "locked": false }, { - "code": ">>> len(set(my_features)) >= 10 # It looks like you may have duplicate words! Make sure not to!\nTrue", + "code": ">>> len(set(my_features)) >= 10\nTrue", "hidden": false, "locked": false }, { - "code": ">>> help_message = \"\"\"It looks like there are many movies in the training set that \n... don't have any of your chosen words. That will make your\n... classifier perform very poorly in some cases. Try choosing\n... at least 1 common word.\n... \"\"\"\n>>> train_f = train_movies.select(my_features)\n>>> np.count_nonzero(train_f.apply(lambda r: np.sum(np.abs(np.array(list(r)))) == 0)) < len(my_features)\nTrue", + "code": ">>> help_message = \"It looks like there are many movies in the training set that \\n don't have any of your chosen words. That will make your\\n classifier perform very poorly in some cases. Try choosing\\n at least 1 common word.\\n \"\n>>> train_f = train_movies.select(my_features)\n>>> np.count_nonzero(train_f.apply(lambda r: np.sum(np.abs(np.array(list(r)))) == 0)) < len(my_features)\nTrue", "hidden": false, "locked": false }, { - "code": ">>> help_message = \"\"\"It looks like there are many movies in the test set that \n... don't have any of your chosen words. That will make your\n... classifier perform very poorly in some cases. Try choosing\n... at least 1 common word.\n... \"\"\"\n>>> test_f = test_movies.select(my_features)\n>>> np.count_nonzero(test_f.apply(lambda r: np.sum(np.abs(np.array(list(r)))) == 0)) < 5\nTrue", + "code": ">>> help_message = \"It looks like there are many movies in the test set that \\n don't have any of your chosen words. That will make your\\n classifier perform very poorly in some cases. Try choosing\\n at least 1 common word.\\n \"\n>>> test_f = test_movies.select(my_features)\n>>> np.count_nonzero(test_f.apply(lambda r: np.sum(np.abs(np.array(list(r)))) == 0)) < 5\nTrue", "hidden": false, "locked": false } @@ -2607,12 +2718,12 @@ { "cases": [ { - "code": ">>> # This test just checks to see if your classify function works correctly \n>>> # with k=5 nearest neighbors.\n>>> from collections import Counter\n>>> g = train_movies.column('Genre')\n>>> def check(r, k):\n... t = test_my_features.row(r)\n... return classify(t, train_my_features, g, k) == Counter(np.take(g, np.argsort(fast_distances(t, train_my_features))[:k])).most_common(1)[0][0]\n>>> check_5_nn = [check(i, 5) for i in np.arange(11)]\n>>> all(check_5_nn)\nTrue", + "code": ">>> from collections import Counter\n>>> g = train_movies.column('Genre')\n>>> \n>>> def check(r, k):\n... t = test_my_features.row(r)\n... return classify(t, train_my_features, g, k) == Counter(np.take(g, np.argsort(fast_distances(t, train_my_features))[:k])).most_common(1)[0][0]\n>>> check_5_nn = [check(i, 5) for i in np.arange(11)]\n>>> all(check_5_nn)\nTrue", "hidden": false, "locked": false }, { - "code": ">>> # This test just checks to see if your classify function works correctly \n>>> # with k=11 nearest neighbors.\n>>> from collections import Counter\n>>> g = train_movies.column('Genre')\n>>> def check(r, k):\n... t = test_my_features.row(r)\n... return classify(t, train_my_features, g, k) == Counter(np.take(g, np.argsort(fast_distances(t, train_my_features))[:k])).most_common(1)[0][0]\n>>> check_11_nn = [check(i, 11) for i in np.arange(11)]\n>>> all(check_11_nn)\nTrue", + "code": ">>> from collections import Counter\n>>> g = train_movies.column('Genre')\n>>> \n>>> def check(r, k):\n... t = test_my_features.row(r)\n... return classify(t, train_my_features, g, k) == Counter(np.take(g, np.argsort(fast_distances(t, train_my_features))[:k])).most_common(1)[0][0]\n>>> check_11_nn = [check(i, 11) for i in np.arange(11)]\n>>> all(check_11_nn)\nTrue", "hidden": false, "locked": false } @@ -2633,7 +2744,7 @@ { "cases": [ { - "code": ">>> from collections import Counter\n>>> g = train_movies.column('Genre')\n>>> r = np.where(test_movies['Title'] == \"godzilla\")[0][0]\n>>> t = test_my_features.row(r)\n>>> godzilla_expected_genre = Counter(np.take(g, np.argsort(fast_distances(t, train_my_features))[:15])).most_common(1)[0][0]\n>>> godzilla_genre == godzilla_expected_genre\nTrue", + "code": ">>> from collections import Counter\n>>> g = train_movies.column('Genre')\n>>> r = np.where(test_movies['Title'] == 'godzilla')[0][0]\n>>> t = test_my_features.row(r)\n>>> godzilla_expected_genre = Counter(np.take(g, np.argsort(fast_distances(t, train_my_features))[:15])).most_common(1)[0][0]\n>>> godzilla_genre == godzilla_expected_genre\nTrue", "hidden": false, "locked": false } @@ -2654,7 +2765,7 @@ { "cases": [ { - "code": ">>> # This test just checks that your classify_feature_row works correctly.\n>>> def check(r):\n... t = test_my_features.row(r)\n... return classify(t, train_my_features, train_movies.column('Genre'), 15) == classify_feature_row(t)\n>>> all([check(i) for i in np.arange(15)])\nTrue", + "code": ">>> def check(r):\n... t = test_my_features.row(r)\n... return classify(t, train_my_features, train_movies.column('Genre'), 15) == classify_feature_row(t)\n>>> all([check(i) for i in np.arange(15)])\nTrue", "hidden": false, "locked": false } @@ -2708,7 +2819,7 @@ "locked": false }, { - "code": ">>> # Make sure that test_movie_correctness does not modify the original\n>>> # test_movies table.\n>>> print(test_movie_correctness.group('Genre'))\nGenre | count\ncomedy | 17\nthriller | 33\n", + "code": ">>> print(test_movie_correctness.group('Genre'))\nGenre | count\ncomedy | 17\nthriller | 33\n", "hidden": false, "locked": false } @@ -2730,12 +2841,12 @@ { "cases": [ { - "code": ">>> len(new_features) <= 5 # You have too many features\nTrue", + "code": ">>> len(new_features) <= 5\nTrue", "hidden": false, "locked": false }, { - "code": ">>> another_classifier(test_new.row(0)) in [\"comedy\", \"thriller\"] \nTrue", + "code": ">>> another_classifier(test_new.row(0)) in ['comedy', 'thriller']\nTrue", "hidden": false, "locked": false } From 7d4b7aa85a0761db1d1a4ef731f18c0db4e03863 Mon Sep 17 00:00:00 2001 From: sean-morris Date: Thu, 4 Jul 2024 10:50:50 -0700 Subject: [PATCH 2/2] Manually Graded Questions: PDF Created --- hw/hw01/hw01.ipynb | 2 +- hw/hw03/hw03.ipynb | 2 +- hw/hw04/hw04.ipynb | 2 +- hw/hw06/hw06.ipynb | 2 +- hw/hw07/hw07.ipynb | 4 ++-- hw/hw08/hw08.ipynb | 2 +- hw/hw09/hw09.ipynb | 2 +- hw/hw10/hw10.ipynb | 2 +- hw/hw11/hw11.ipynb | 2 +- hw/hw12/hw12.ipynb | 2 +- lab/lab05/lab05.ipynb | 2 +- lab/lab09/lab09.ipynb | 2 +- project/project1/project1.ipynb | 2 +- project/project2/project2.ipynb | 2 +- project/project3/project3.ipynb | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/hw/hw01/hw01.ipynb b/hw/hw01/hw01.ipynb index a85b3f8..1d23d2e 100644 --- a/hw/hw01/hw01.ipynb +++ b/hw/hw01/hw01.ipynb @@ -870,7 +870,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/hw/hw03/hw03.ipynb b/hw/hw03/hw03.ipynb index ac7ff79..7e1f4b5 100644 --- a/hw/hw03/hw03.ipynb +++ b/hw/hw03/hw03.ipynb @@ -1183,7 +1183,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/hw/hw04/hw04.ipynb b/hw/hw04/hw04.ipynb index 61d5091..84c5833 100644 --- a/hw/hw04/hw04.ipynb +++ b/hw/hw04/hw04.ipynb @@ -736,7 +736,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/hw/hw06/hw06.ipynb b/hw/hw06/hw06.ipynb index fe234f5..c9fafe7 100644 --- a/hw/hw06/hw06.ipynb +++ b/hw/hw06/hw06.ipynb @@ -1391,7 +1391,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/hw/hw07/hw07.ipynb b/hw/hw07/hw07.ipynb index e71d8c3..f50c0f9 100644 --- a/hw/hw07/hw07.ipynb +++ b/hw/hw07/hw07.ipynb @@ -1275,7 +1275,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { @@ -1303,7 +1303,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.1.-1" + "version": "3.11.0" }, "otter": { "OK_FORMAT": true, diff --git a/hw/hw08/hw08.ipynb b/hw/hw08/hw08.ipynb index de70053..f7b441c 100644 --- a/hw/hw08/hw08.ipynb +++ b/hw/hw08/hw08.ipynb @@ -667,7 +667,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/hw/hw09/hw09.ipynb b/hw/hw09/hw09.ipynb index 9c24533..53d10fa 100644 --- a/hw/hw09/hw09.ipynb +++ b/hw/hw09/hw09.ipynb @@ -992,7 +992,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/hw/hw10/hw10.ipynb b/hw/hw10/hw10.ipynb index 9e41b05..67fe217 100644 --- a/hw/hw10/hw10.ipynb +++ b/hw/hw10/hw10.ipynb @@ -1143,7 +1143,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/hw/hw11/hw11.ipynb b/hw/hw11/hw11.ipynb index cd910d6..3900118 100644 --- a/hw/hw11/hw11.ipynb +++ b/hw/hw11/hw11.ipynb @@ -900,7 +900,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/hw/hw12/hw12.ipynb b/hw/hw12/hw12.ipynb index 9243cae..1513b99 100644 --- a/hw/hw12/hw12.ipynb +++ b/hw/hw12/hw12.ipynb @@ -774,7 +774,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/lab/lab05/lab05.ipynb b/lab/lab05/lab05.ipynb index 2f764b6..8c01966 100644 --- a/lab/lab05/lab05.ipynb +++ b/lab/lab05/lab05.ipynb @@ -1049,7 +1049,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/lab/lab09/lab09.ipynb b/lab/lab09/lab09.ipynb index be3a22f..85e277b 100644 --- a/lab/lab09/lab09.ipynb +++ b/lab/lab09/lab09.ipynb @@ -1098,7 +1098,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/project/project1/project1.ipynb b/project/project1/project1.ipynb index fde8d8e..fd97dd6 100644 --- a/project/project1/project1.ipynb +++ b/project/project1/project1.ipynb @@ -1587,7 +1587,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/project/project2/project2.ipynb b/project/project2/project2.ipynb index b62129e..7581df9 100644 --- a/project/project2/project2.ipynb +++ b/project/project2/project2.ipynb @@ -1589,7 +1589,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, { diff --git a/project/project3/project3.ipynb b/project/project3/project3.ipynb index 21bd2a6..f071989 100644 --- a/project/project3/project3.ipynb +++ b/project/project3/project3.ipynb @@ -2150,7 +2150,7 @@ "outputs": [], "source": [ "# Save your notebook first, then run this cell to export your submission.\n", - "grader.export(pdf=False, run_tests=True)" + "grader.export(run_tests=True)" ] }, {