karpathy · ibadrather · Sep 14, 2022 · Sep 17, 2022 · Sep 17, 2022 · Sep 17, 2022
diff --git a/lectures/makemore/loss_with_correct_step.png b/lectures/makemore/loss_with_correct_step.png
diff --git a/lectures/makemore/loss_with_wrong_step.png b/lectures/makemore/loss_with_wrong_step.png
diff --git a/lectures/makemore/makemore_part2_mlp.ipynb b/lectures/makemore/makemore_part2_mlp.ipynb
diff --git a/lectures/micrograd/micrograd_lecture_second_half_roughly.ipynb b/lectures/micrograd/micrograd_lecture_second_half_roughly.ipynb
diff --git a/my_code/makemore/1_bigram_torch_exercise.ipynb b/my_code/makemore/1_bigram_torch_exercise.ipynb
diff --git a/my_code/makemore/1_bigram_torch_exercise_approach_2.ipynb b/my_code/makemore/1_bigram_torch_exercise_approach_2.ipynb
diff --git a/my_code/makemore/1_makemore_bigram_torch.ipynb b/my_code/makemore/1_makemore_bigram_torch.ipynb
diff --git a/my_code/makemore/2_makemore_mlp.ipynb b/my_code/makemore/2_makemore_mlp.ipynb
@@ -0,0 +1,249 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn.functional\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with open(\"names.txt\", \"r\") as f:\n",
+    "    words = f.read().splitlines()\n",
+    "\n",
+    "words[:8]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total names:  32033\n",
+      "Minimum name length:  2\n",
+      "Maximum name length:  15\n"
+     ]
+    }
+   ],
+   "source": [
+    "# total words\n",
+    "print(\"Total names: \", len(words))\n",
+    "\n",
+    "print(\"Minimum name length: \", min(len(w) for w in words))\n",
+    "print(\"Maximum name length: \", max(len(w) for w in words))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Stoi:  {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26, '.': 0}\n",
+      "Itos:  {1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# making vocabulary mappings\n",
+    "chars = sorted(list(set(\"\".join(words))))\n",
+    "stoi = {c: i+1 for i, c in enumerate(chars)}\n",
+    "stoi[\".\"] = 0\n",
+    "itos = {i: c for c, i in stoi.items()}\n",
+    "print(\"Stoi: \", stoi)\n",
+    "print(\"Itos: \", itos)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### build the dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "block_size = 3 # context length: how many characters do we take to predict the next one?\n",
+    "X, Y = [], []\n",
+    "for w in words[:5]:\n",
+    "  \n",
+    "  #print(w)\n",
+    "  context = [0] * block_size\n",
+    "  for ch in w + '.':\n",
+    "    ix = stoi[ch]\n",
+    "    X.append(context)\n",
+    "    Y.append(ix)\n",
+    "    #print(''.join(itos[i] for i in context), '--->', itos[ix])\n",
+    "    context = context[1:] + [ix] # crop and append\n",
+    "  \n",
+    "X = torch.tensor(X)\n",
+    "Y = torch.tensor(Y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(torch.Size([32, 3]), torch.Size([32]), torch.int64, torch.int64)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.shape, Y.shape, X.dtype, Y.dtype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# In the paper it has 3 words as inputs\n",
+    "# And 17000 words with 30 dimensions\n",
+    "# We only have 27 chars here, let's start with small dimensions for look-up matrix C"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([27, 2])"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "C = torch.randn(27, 2)\n",
+    "C.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([0.9258, 0.9268])"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# let' embed a single character first\n",
+    "C[5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([32, 3, 2])"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Let's embed all\n",
+    "emb = C[X]\n",
+    "emb.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now let's make the hidden layer\n",
+    "W1 = torch.randn((6, 100))  # 3*2 inputs and 100 hidden units\n",
+    "b1 = torch.randn(100)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.6 ('deeply')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "8ba283c99aab3f9826c9057a6b77e5ed1375b1012fccd26237ed2bb4d681a306"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}