From 068aa4f5d2a693bcf82fa9ebcb7f0d0b1eb054ba Mon Sep 17 00:00:00 2001
From: Nero Blackstone <gf7600gs@gmail.com>
Date: Mon, 3 Jun 2024 16:08:42 +0800
Subject: [PATCH] update

---
 _toc.yml                                      |  10 +-
 .../chapter_recurrent-modern/deep-rnn.ipynb   |  23 ---
 .../chapter_recurrent_modern/deep-rnn.ipynb   | 147 ++++++++++++++++++
 .../gru.ipynb                                 |   1 -
 .../lstm.ipynb                                |   1 -
 5 files changed, 152 insertions(+), 30 deletions(-)
 delete mode 100644 notebooks/chapter_recurrent-modern/deep-rnn.ipynb
 create mode 100644 notebooks/chapter_recurrent_modern/deep-rnn.ipynb
 rename notebooks/{chapter_recurrent-modern => chapter_recurrent_modern}/gru.ipynb (99%)
 rename notebooks/{chapter_recurrent-modern => chapter_recurrent_modern}/lstm.ipynb (99%)

diff --git a/_toc.yml b/_toc.yml
index 61f59fc..2211058 100644
--- a/_toc.yml
+++ b/_toc.yml
@@ -41,10 +41,10 @@ parts:
     chapters:
     - file: notebooks/chapter_recurrent_neural_networks/sequence.ipynb
     - file: notebooks/chapter_recurrent_neural_networks/text-sequence.ipynb
-    - caption: Modern Recurrent Neural Networks
+  - caption: Modern Recurrent Neural Networks
     numbered: true
-  - chapters:
-    - file: notebooks/chapter_recurrent-modern/lstm.ipynb
-    - file: notebooks/chapter_recurrent-modern/gru.ipynb
-    - file: notebooks/chapter_recurrent-modern/deep-rnn.ipynb
+    chapters:
+    - file: notebooks/chapter_recurrent_modern/lstm.ipynb
+    - file: notebooks/chapter_recurrent_modern/gru.ipynb
+    - file: notebooks/chapter_recurrent_modern/deep-rnn.ipynb
     
\ No newline at end of file
diff --git a/notebooks/chapter_recurrent-modern/deep-rnn.ipynb b/notebooks/chapter_recurrent-modern/deep-rnn.ipynb
deleted file mode 100644
index 7e089d3..0000000
--- a/notebooks/chapter_recurrent-modern/deep-rnn.ipynb
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "vscode": {
-     "languageId": "julia"
-    }
-   },
-   "source": [
-    "# Deep Recurrent Neural Networks\n",
-    "\n"
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/chapter_recurrent_modern/deep-rnn.ipynb b/notebooks/chapter_recurrent_modern/deep-rnn.ipynb
new file mode 100644
index 0000000..a1c98d7
--- /dev/null
+++ b/notebooks/chapter_recurrent_modern/deep-rnn.ipynb
@@ -0,0 +1,147 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "vscode": {
+     "languageId": "julia"
+    }
+   },
+   "source": [
+    "# Deep Recurrent Neural Networks\n",
+    "\n",
+    "## Concise Implementation\n",
+    "\n",
+    "Fortunately many of the logistical details required to implement multiple layers of an RNN are readily available in high-level APIs. Our concise implementation will use such built-in functionalities. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: Using backend: CUDA.\n",
+      "└ @ Flux /home/nero/.julia/packages/Flux/Wz6D4/src/functor.jl:662\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "predict (generic function with 1 method)"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "using Downloads,IterTools,CUDA,Flux\n",
+    "using StatsBase: wsample\n",
+    "\n",
+    "device = Flux.get_device(; verbose=true)\n",
+    "\n",
+    "file_path = Downloads.download(\"http://d2l-data.s3-accelerate.amazonaws.com/timemachine.txt\")\n",
+    "raw_text = open(io->read(io, String),file_path)\n",
+    "str = lowercase(replace(raw_text,r\"[^A-Za-z]+\"=>\" \"))\n",
+    "tokens = [str...]\n",
+    "vocab = unique(tokens)\n",
+    "vocab_len = length(vocab)\n",
+    "\n",
+    "# n*[seq_length x feature x batch_size]\n",
+    "function getdata(str::String,vocab::Vector{Char},seq_length::Int,batch_size::Int)::Tuple\n",
+    "    data = collect.(partition(str,seq_length,1))\n",
+    "    x = [[Flux.onehotbatch(i,vocab) for i in d] for d in Flux.batchseq.(Flux.chunk(data[begin:end-1];size = batch_size))]\n",
+    "    y = [[Flux.onehotbatch(i,vocab) for i in d] for d in Flux.batchseq.(Flux.chunk(data[2:end];size = batch_size))]\n",
+    "    return x,y\n",
+    "end\n",
+    "\n",
+    "function loss(model, xs, ys)\n",
+    "    Flux.reset!(model)\n",
+    "    return sum(Flux.logitcrossentropy.([model(x) for x in xs], ys))\n",
+    "end\n",
+    "\n",
+    "function predict(model::Chain, prefix::String, num_preds::Int)\n",
+    "    model = cpu(model)\n",
+    "    Flux.reset!(model)\n",
+    "    buf = IOBuffer()\n",
+    "    write(buf, prefix)\n",
+    "\n",
+    "    c = wsample(vocab, softmax([model(Flux.onehot(c, vocab)) for c in collect(prefix)][end]))\n",
+    "    for i in 1:num_preds\n",
+    "        write(buf, c)\n",
+    "        c = wsample(vocab, softmax(model(Flux.onehot(c, vocab))))\n",
+    "    end\n",
+    "    return String(take!(buf))\n",
+    "end"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Chain(\n",
+       "  Recur(\n",
+       "    GRUv3Cell(27 => 32),                \u001b[90m# 5_792 parameters\u001b[39m\n",
+       "  ),\n",
+       "  Recur(\n",
+       "    GRUv3Cell(32 => 32),                \u001b[90m# 6_272 parameters\u001b[39m\n",
+       "  ),\n",
+       "  Dense(32 => 27),                      \u001b[90m# 891 parameters\u001b[39m\n",
+       ") \u001b[90m        # Total: 12 trainable arrays, \u001b[39m12_955 parameters,\n",
+       "\u001b[90m          # plus 2 non-trainable, 64 parameters, summarysize \u001b[39m1.938 KiB."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "model = Chain(GRUv3(vocab_len => 32),GRUv3(32 => 32),Dense(32 => vocab_len)) |> device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "opt_state = Flux.setup(Adam(1e-2), model)\n",
+    "\n",
+    "x,y = getdata(str, vocab, 32, 1024) |> device\n",
+    "data = zip(x,y)\n",
+    "loss_train = []\n",
+    "\n",
+    "for epoch in 1:50\n",
+    "    @show \"$epoch start\"\n",
+    "    Flux.reset!(model)\n",
+    "    Flux.train!(loss,model,data,opt_state)\n",
+    "    push!(loss_train,sum(loss.(Ref(model), x, y)) / length(str)) \n",
+    "end"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Julia 1.10.3",
+   "language": "julia",
+   "name": "julia-1.10"
+  },
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.10.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/chapter_recurrent-modern/gru.ipynb b/notebooks/chapter_recurrent_modern/gru.ipynb
similarity index 99%
rename from notebooks/chapter_recurrent-modern/gru.ipynb
rename to notebooks/chapter_recurrent_modern/gru.ipynb
index 74ea86d..8520f63 100644
--- a/notebooks/chapter_recurrent-modern/gru.ipynb
+++ b/notebooks/chapter_recurrent_modern/gru.ipynb
@@ -146,7 +146,6 @@
     "loss_train = []\n",
     "\n",
     "for epoch in 1:50\n",
-    "    @show \"$epoch start\"\n",
     "    Flux.reset!(model)\n",
     "    Flux.train!(loss,model,data,opt_state)\n",
     "    push!(loss_train,sum(loss.(Ref(model), x, y)) / length(str)) \n",
diff --git a/notebooks/chapter_recurrent-modern/lstm.ipynb b/notebooks/chapter_recurrent_modern/lstm.ipynb
similarity index 99%
rename from notebooks/chapter_recurrent-modern/lstm.ipynb
rename to notebooks/chapter_recurrent_modern/lstm.ipynb
index 8252268..7850a1c 100644
--- a/notebooks/chapter_recurrent-modern/lstm.ipynb
+++ b/notebooks/chapter_recurrent_modern/lstm.ipynb
@@ -94,7 +94,6 @@
     "loss_train = []\n",
     "\n",
     "for epoch in 1:50\n",
-    "    @show \"$epoch start\"\n",
     "    Flux.reset!(model)\n",
     "    Flux.train!(loss,model,data,opt_state)\n",
     "    push!(loss_train,sum(loss.(Ref(model), x, y)) / length(str)) \n",