From be56c804ea7ec6df15daaa9c2225d030a9f05e01 Mon Sep 17 00:00:00 2001 From: Siddha Ganju Date: Mon, 23 Aug 2021 15:06:31 -0700 Subject: [PATCH] Rename 1_feature_extraction.ipynb to 1-feature-extraction.ipynb --- code/chapter-4/1-feature-extraction.ipynb | 1773 ++++++++++++++------- code/chapter-4/1_feature_extraction.ipynb | 1185 -------------- 2 files changed, 1182 insertions(+), 1776 deletions(-) delete mode 100644 code/chapter-4/1_feature_extraction.ipynb diff --git a/code/chapter-4/1-feature-extraction.ipynb b/code/chapter-4/1-feature-extraction.ipynb index 3c10b88..af92dcf 100644 --- a/code/chapter-4/1-feature-extraction.ipynb +++ b/code/chapter-4/1-feature-extraction.ipynb @@ -1,594 +1,1185 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \n", - " \n", - "
\n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - "
\n", - "\n", - "\n", - "This code is part of [Chapter 4 - Building a Reverse Image Search Engine: Understanding Embeddings](https://learning.oreilly.com/library/view/practical-deep-learning/9781492034858/ch04.html).\n", - "\n", - "Note: In order to run this notebook on Google Colab you need to [follow these instructions](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb#scrollTo=WzIRIt9d2huC) so that the local data such as the images are available in your Google Drive." - ] + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "colab": { + "name": "Copy of 1-feature-extraction.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "2f0c84e6e48b40cb9f1360ba4451513c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_470d8a8b00c5475684ab84b55c79b760", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_a0ffda7bcf1a46a4bdd6352c43cfda61", + "IPY_MODEL_f80ec0bf78de458995da4f9c964f3722" + ] + } + }, + "470d8a8b00c5475684ab84b55c79b760": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "width": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "overflow": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "overflow_y": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "align_self": null, + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "a0ffda7bcf1a46a4bdd6352c43cfda61": { + "model_module": "@jupyter-widgets/controls", + "model_name": "IntProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_9792cd696fa74cc68ef6b35095fb8685", + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "orientation": "horizontal", + "min": 0, + "bar_style": "success", + "max": 8677, + "_model_name": "IntProgressModel", + "_model_module_version": "1.5.0", + "value": 8677, + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4d80db8732434d119c3f3b3bb8469ce9", + "description": "" + } + }, + "f80ec0bf78de458995da4f9c964f3722": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_fe1544db2c8e4df2a5c7097575da68ab", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100% 8677/8677 [06:16<00:00, 23.04it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_145e7cf97a5844a48e321c8a4878d13b" + } + }, + "9792cd696fa74cc68ef6b35095fb8685": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "4d80db8732434d119c3f3b3bb8469ce9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "width": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "overflow": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "overflow_y": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "align_self": null, + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "fe1544db2c8e4df2a5c7097575da68ab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "145e7cf97a5844a48e321c8a4878d13b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "width": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "overflow": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "overflow_y": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "align_self": null, + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Feature Extraction\n", - "\n", - "This notebook is the first among six of the follow along Jupyter Notebook for Chapter 4. We will extract features from pretrained models like VGG-16, VGG-19, ResNet-50, InceptionV3 and MobileNet and benchmark them using the Caltech101 dataset.\n", - "\n", - "## Dataset:\n", - "\n", - "In the `data` directory of the repo, download the Caltech101 dataset (or try it on your dataset). Please note (as of 01 September 2020) the Caltech 101 dataset has moved locations and now has to be downloaded through Google Drive using `gdown`.\n", - "\n", - "```\n", - "$ gdown https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp --output caltech101.tar.gz\n", - "\n", - "$ tar -xvzf caltech101.tar.gz\n", - "\n", - "$ mv 101_ObjectCategories datasets/caltech101\n", - "```\n", - "Note that there is a 102nd category called ‘BACKGROUND_Google’ consisting of random images not contained in the first 101 categories, which needs to be deleted before we start experimenting. \n", - "\n", - "```\n", - "$ rm -rf datasets/caltech101/BACKGROUND_Google\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "!mkdir -p ../../datasets\n", - "!pip install gdown\n", - "!gdown https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp --output ../../datasets/caltech101.tar.gz\n", - "!tar -xvzf ../../datasets/caltech101.tar.gz --directory ../../datasets\n", - "!mv ../../datasets/101_ObjectCategories ../../datasets/caltech101\n", - "!rm -rf ../../datasets/caltech101/BACKGROUND_Google" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "scrolled": false - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "from numpy.linalg import norm\n", - "import pickle\n", - "from tqdm import tqdm, tqdm_notebook\n", - "import os\n", - "import random\n", - "import time\n", - "import math\n", - "import tensorflow\n", - "from tensorflow.keras.preprocessing import image\n", - "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", - "from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n", - "from tensorflow.keras.applications.vgg16 import VGG16\n", - "from tensorflow.keras.applications.vgg19 import VGG19\n", - "from tensorflow.keras.applications.mobilenet import MobileNet\n", - "from tensorflow.keras.applications.inception_v3 import InceptionV3\n", - "from tensorflow.keras.models import Model\n", - "from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will define a helper function that allows us to choose any pretrained model with all the necessary details for our experiments." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def model_picker(name):\n", - " if (name == 'vgg16'):\n", - " model = VGG16(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " elif (name == 'vgg19'):\n", - " model = VGG19(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " elif (name == 'mobilenet'):\n", - " model = MobileNet(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max',\n", - " depth_multiplier=1,\n", - " alpha=1)\n", - " elif (name == 'inception'):\n", - " model = InceptionV3(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " elif (name == 'resnet'):\n", - " model = ResNet50(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " elif (name == 'xception'):\n", - " model = Xception(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " else:\n", - " print(\"Specified model not available\")\n", - " return model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let's put our function to use." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "model_architecture = 'resnet'\n", - "model = model_picker(model_architecture)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's define a function to extract image features given an image and a model. We developed a similar function in Chapter-2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def extract_features(img_path, model):\n", - " input_shape = (224, 224, 3)\n", - " img = image.load_img(img_path,\n", - " target_size=(input_shape[0], input_shape[1]))\n", - " img_array = image.img_to_array(img)\n", - " expanded_img_array = np.expand_dims(img_array, axis=0)\n", - " preprocessed_img = preprocess_input(expanded_img_array)\n", - " features = model.predict(preprocessed_img)\n", - " flattened_features = features.flatten()\n", - " normalized_features = flattened_features / norm(flattened_features)\n", - " return normalized_features" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's see the feature length the model generates. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "features = extract_features('../../sample-images/cat.jpg', model)\n", - "print(len(features))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we will see how much time it takes to extract features of one image." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "%timeit features = extract_features('../../sample-images/cat.jpg', model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The time taken to extract features is dependent on a few factors such as image size, computing power etc. A better benchmark would be running the network over an entire dataset. A simple change to the existing code will allow this.\n", - "\n", - "Let's make a handy function to recursively get all the image files under a root directory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']\n", - "\n", - "def get_file_list(root_dir):\n", - " file_list = []\n", - " for root, directories, filenames in os.walk(root_dir):\n", - " for filename in filenames:\n", - " if any(ext in filename for ext in extensions):\n", - " file_list.append(os.path.join(root, filename))\n", - " return file_list" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let's run the extraction over the entire dataset and time it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# path to the your datasets\n", - "root_dir = '../../datasets/caltech101'\n", - "filenames = sorted(get_file_list(root_dir))\n", - "\n", - "feature_list = []\n", - "for i in tqdm_notebook(range(len(filenames))):\n", - " feature_list.append(extract_features(filenames[i], model))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's try the same with the Keras Image Generator functions." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "batch_size = 64\n", - "datagen = tensorflow.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)\n", - "\n", - "generator = datagen.flow_from_directory(root_dir,\n", - " target_size=(224, 224),\n", - " batch_size=batch_size,\n", - " class_mode=None,\n", - " shuffle=False)\n", - "\n", - "num_images = len(generator.filenames)\n", - "num_epochs = int(math.ceil(num_images / batch_size))\n", - "\n", - "start_time = time.time()\n", - "feature_list = []\n", - "feature_list = model.predict_generator(generator, num_epochs)\n", - "end_time = time.time()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "for i, features in enumerate(feature_list):\n", - " feature_list[i] = features / norm(features)\n", - "\n", - "feature_list = feature_list.reshape(num_images, -1)\n", - "\n", - "print(\"Num images = \", len(generator.classes))\n", - "print(\"Shape of feature_list = \", feature_list.shape)\n", - "print(\"Time taken in sec = \", end_time - start_time)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### GPU Utilization's effect on time taken by varying batch size \n", - "\n", - "\n", - "GPUs are optimized to parallelize the feature generation process and hence will give better results when multiple images are passed instead of just one image.\n", - "The opportunity to improve can be seen based on GPU Utilization. Low GPU Utilization indicates an opportunity to further improve the througput.\n", - "\n", - "\n", - "GPU Utilization can be seen using the nvidia-smi command. To update it every half a second\n", - "\n", - " watch -n .5 nvidia-smi\n", - " \n", - "To pool the GPU utilization every second and dump into a file\n", - "\n", - " nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -f gpu_utilization.csv -l 1\n", - " \n", - "To calculate median GPU Utilization from the file generated\n", - "\n", - " sort -n gpu_utilization.csv | datamash median 1\n", - "\n", - "|Model |Time second (sec) | batch_size | % GPU Utilization | Implementation|\n", - "|-|-|-|\n", - "|Resnet50 | 124 | 1 | 52 | extract_features |\n", - "|Resnet50 | 98 | 1 | 72 | ImageDataGenerator |\n", - "|Resnet50 | 57 | 2 | 81 | ImageDataGenerator |\n", - "|Resnet50 | 40 | 4 | 88 | ImageDataGenerator |\n", - "|Resnet50 | 34 | 8 | 94 | ImageDataGenerator |\n", - "|Resnet50 | 29 | 16 | 97 | ImageDataGenerator |\n", - "|Resnet50 | 28 | 32 | 97 | ImageDataGenerator |\n", - "|Resnet50 | 28 | 64 | 98 | ImageDataGenerator |" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Some benchmarks on different model architectures to see relative speeds\n", - "\n", - "Keeping batch size of 64, benchmarking the different models\n", - "\n", - "|Model |items/second |\n", - "|-|-|-|\n", - "| VGG19 | 31.06 |\n", - "| VGG16 | 28.16 | \n", - "| Resnet50 | 28.48 | \n", - "| Inception | 20.07 |\n", - "| Mobilenet | 13.45 |" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's save the features as intermediate files to use later." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "filenames = [root_dir + '/' + s for s in generator.filenames]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "code_folding": [], - "collapsed": true - }, - "outputs": [], - "source": [ - "pickle.dump(generator.classes, open('./data/class_ids-caltech101.pickle',\n", - " 'wb'))\n", - "pickle.dump(filenames, open('./data/filenames-caltech101.pickle', 'wb'))\n", - "pickle.dump(\n", - " feature_list,\n", - " open('./data/features-caltech101-' + model_architecture + '.pickle', 'wb'))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's train a finetuned model as well and save the features for that as well." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "TRAIN_SAMPLES = 8677\n", - "NUM_CLASSES = 101\n", - "IMG_WIDTH, IMG_HEIGHT = 224, 224" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,\n", - " rotation_range=20,\n", - " width_shift_range=0.2,\n", - " height_shift_range=0.2,\n", - " zoom_range=0.2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "train_generator = train_datagen.flow_from_directory(root_dir,\n", - " target_size=(IMG_WIDTH,\n", - " IMG_HEIGHT),\n", - " batch_size=batch_size,\n", - " shuffle=True,\n", - " seed=12345,\n", - " class_mode='categorical')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def model_maker():\n", - " base_model = ResNet50(include_top=False,\n", - " input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))\n", - " for layer in base_model.layers[:]:\n", - " layer.trainable = False\n", - " input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))\n", - " custom_model = base_model(input)\n", - " custom_model = GlobalAveragePooling2D()(custom_model)\n", - " custom_model = Dense(64, activation='relu')(custom_model)\n", - " custom_model = Dropout(0.5)(custom_model)\n", - " predictions = Dense(NUM_CLASSES, activation='softmax')(custom_model)\n", - " return Model(inputs=input, outputs=predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "model_finetuned = model_maker()\n", - "model_finetuned.compile(loss='categorical_crossentropy',\n", - " optimizer=tensorflow.keras.optimizers.Adam(0.001),\n", - " metrics=['acc'])\n", - "model_finetuned.fit_generator(\n", - " train_generator,\n", - " steps_per_epoch=math.ceil(float(TRAIN_SAMPLES) / batch_size),\n", - " epochs=10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "model_finetuned.save('./data/model-finetuned.h5')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "start_time = time.time()\n", - "feature_list_finetuned = []\n", - "feature_list_finetuned = model_finetuned.predict_generator(generator, num_epochs)\n", - "end_time = time.time()\n", - "\n", - "for i, features_finetuned in enumerate(feature_list_finetuned):\n", - " feature_list_finetuned[i] = features_finetuned / norm(features_finetuned)\n", - "\n", - "feature_list = feature_list_finetuned.reshape(num_images, -1)\n", - "\n", - "print(\"Num images = \", len(generator.classes))\n", - "print(\"Shape of feature_list = \", feature_list.shape)\n", - "print(\"Time taken in sec = \", end_time - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "pickle.dump(\n", - " feature_list,\n", - " open('./data/features-caltech101-resnet-finetuned.pickle', 'wb'))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "xcoukqAFnz6Y" + }, + "source": [ + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n", + "\n", + "\n", + "This code is part of [Chapter 4 - Building a Reverse Image Search Engine: Understanding Embeddings](https://learning.oreilly.com/library/view/practical-deep-learning/9781492034858/ch04.html).\n", + "\n", + "Note: In order to run this notebook on Google Colab you need to [follow these instructions](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb#scrollTo=WzIRIt9d2huC) so that the local data such as the images are available in your Google Drive.\n", + "\n", + "Note 7/24/2021: This notebook has been updated as of 7/24/2021." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-UrrqCTnnz6b" + }, + "source": [ + "# Feature Extraction\n", + "\n", + "This notebook is the first among six of the follow along Jupyter Notebook for Chapter 4. We will extract features from pretrained models like VGG-16, VGG-19, ResNet-50, InceptionV3 and MobileNet and benchmark them using the Caltech101 dataset.\n", + "\n", + "## Dataset:\n", + "\n", + "In the `data` directory of the repo, download the Caltech101 dataset (or try it on your dataset). Please note (as of 01 September 2020) the Caltech 101 dataset has moved locations and now has to be downloaded through Google Drive using `gdown`.\n", + "\n", + "```\n", + "$ gdown https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp --output caltech101.tar.gz\n", + "\n", + "$ tar -xvzf caltech101.tar.gz\n", + "\n", + "$ mv 101_ObjectCategories datasets/caltech101\n", + "```\n", + "Note that there is a 102nd category called ‘BACKGROUND_Google’ consisting of random images not contained in the first 101 categories, which needs to be deleted before we start experimenting. \n", + "\n", + "```\n", + "$ rm -rf datasets/caltech101/BACKGROUND_Google\n", + "```" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "-_byh3zWnz6c" + }, + "source": [ + "!mkdir -p ../../datasets\n", + "!pip install gdown\n", + "!gdown https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp --output ../../datasets/caltech101.tar.gz\n", + "!tar -xvzf ../../datasets/caltech101.tar.gz --directory ../../datasets\n", + "!mv ../../datasets/101_ObjectCategories ../../datasets/caltech101\n", + "!rm -rf ../../datasets/caltech101/BACKGROUND_Google" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "scrolled": false, + "id": "yN9yirdLnz6d" + }, + "source": [ + "import numpy as np\n", + "from numpy.linalg import norm\n", + "import pickle\n", + "from tqdm import tqdm, tqdm_notebook\n", + "import os\n", + "import random\n", + "import time\n", + "import math\n", + "import tensorflow\n", + "from tensorflow.keras.preprocessing import image\n", + "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", + "from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n", + "from tensorflow.keras.applications.vgg16 import VGG16\n", + "from tensorflow.keras.applications.vgg19 import VGG19\n", + "from tensorflow.keras.applications.mobilenet import MobileNet\n", + "from tensorflow.keras.applications.inception_v3 import InceptionV3\n", + "from tensorflow.keras.models import Model\n", + "from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D\n" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LxZ73Ltmnz6e" + }, + "source": [ + "We will define a helper function that allows us to choose any pretrained model with all the necessary details for our experiments." + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "n39Tv0YZnz6e" + }, + "source": [ + "def model_picker(name):\n", + " if (name == 'vgg16'):\n", + " model = VGG16(weights='imagenet',\n", + " include_top=False,\n", + " input_shape=(224, 224, 3),\n", + " pooling='max')\n", + " elif (name == 'vgg19'):\n", + " model = VGG19(weights='imagenet',\n", + " include_top=False,\n", + " input_shape=(224, 224, 3),\n", + " pooling='max')\n", + " elif (name == 'mobilenet'):\n", + " model = MobileNet(weights='imagenet',\n", + " include_top=False,\n", + " input_shape=(224, 224, 3),\n", + " pooling='max',\n", + " depth_multiplier=1,\n", + " alpha=1)\n", + " elif (name == 'inception'):\n", + " model = InceptionV3(weights='imagenet',\n", + " include_top=False,\n", + " input_shape=(224, 224, 3),\n", + " pooling='max')\n", + " elif (name == 'resnet'):\n", + " model = ResNet50(weights='imagenet',\n", + " include_top=False,\n", + " input_shape=(224, 224, 3),\n", + " pooling='max')\n", + " elif (name == 'xception'):\n", + " model = Xception(weights='imagenet',\n", + " include_top=False,\n", + " input_shape=(224, 224, 3),\n", + " pooling='max')\n", + " else:\n", + " print(\"Specified model not available\")\n", + " return model" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qObR-xNenz6f" + }, + "source": [ + "Now, let's put our function to use." + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UCz180_jnz6g", + "outputId": "b6d47be1-b290-46b1-9e54-235b5ba0d2d3" + }, + "source": [ + "model_architecture = 'resnet'\n", + "model = model_picker(model_architecture)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5\n", + "94773248/94765736 [==============================] - 2s 0us/step\n", + "94781440/94765736 [==============================] - 2s 0us/step\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "za5aAGhgnz6g" + }, + "source": [ + "Let's define a function to extract image features given an image and a model. We developed a similar function in Chapter-2" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "2Fc9vtRenz6g" + }, + "source": [ + "def extract_features(img_path, model):\n", + " input_shape = (224, 224, 3)\n", + " img = image.load_img(img_path,\n", + " target_size=(input_shape[0], input_shape[1]))\n", + " img_array = image.img_to_array(img)\n", + " expanded_img_array = np.expand_dims(img_array, axis=0)\n", + " preprocessed_img = preprocess_input(expanded_img_array)\n", + " features = model.predict(preprocessed_img)\n", + " flattened_features = features.flatten()\n", + " normalized_features = flattened_features / norm(flattened_features)\n", + " return normalized_features" + ], + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "57uKAEMxnz6h" + }, + "source": [ + "Let's see the feature length the model generates by running on an example image. If you don't have the usual cat image available locally, let's download it!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qC_isBQ7okco" + }, + "source": [ + "try:\n", + " import google.colab\n", + " IS_COLAB_ENV = True\n", + "except:\n", + " IS_COLAB_ENV = False" + ], + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jU3c7aAbqR-P", + "outputId": "7cc823ea-5d84-4eb6-ab0a-acc777ba21be" + }, + "source": [ + "IMG_PATH = '../../sample-images/cat.jpg'\n", + "if IS_COLAB_ENV:\n", + " !curl https://raw.githubusercontent.com/PracticalDL/Practical-Deep-Learning-Book/master/sample-images/cat.jpg --output cat.jpg\n", + " IMG_PATH = 'cat.jpg'" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r100 661k 100 661k 0 0 6964k 0 --:--:-- --:--:-- --:--:-- 6964k\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_USxAyninz6h", + "outputId": "c2c588db-95cc-4d55-a686-1ce5a11bc99f" + }, + "source": [ + "features = extract_features('cat.jpg', model)\n", + "print(\"Total length of features for one image: \", len(features))" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2048\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qgih38bvnz6i" + }, + "source": [ + "Now, we will see how much time it takes to extract features of one image. For the cat image, it should ideally be around 81 ms. Depending on your image size, the final time may be different and it depends on the time to read, resize, preprocess, and extract features." + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Rs8nZZCLnz6i", + "outputId": "cb7d8dba-ec9c-4fc0-ee23-864f2e66bfb9" + }, + "source": [ + "%timeit features = extract_features('cat.jpg', model)" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "10 loops, best of 3: 81.4 ms per loop\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "esrDlqWa9sGo" + }, + "source": [ + "## Benchmarking time taken to extract features over the entire dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8jELqglInz6i" + }, + "source": [ + "The time taken to extract features is dependent on a few factors such as image size, computing power etc. A better benchmark would be running the network over an entire dataset. A simple change to the existing code will allow this.\n", + "\n", + "Let's make a handy function to recursively get all the image files under a root directory." + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "biSAC49dnz6j" + }, + "source": [ + "extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']\n", + "\n", + "def get_file_list(root_dir):\n", + " file_list = []\n", + " for root, directories, filenames in os.walk(root_dir):\n", + " for filename in filenames:\n", + " if any(ext in filename for ext in extensions):\n", + " filepath = os.path.join(root, filename)\n", + " if os.path.exists(filepath):\n", + " file_list.append(filepath)\n", + " else:\n", + " print(filepath)\n", + " return file_list" + ], + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1HtebW5cnz6j" + }, + "source": [ + "Now, let's run the extraction over the entire dataset and time it." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XC75Z2Dau77Z", + "outputId": "9140e060-cd2a-42a2-fd6c-a1d782822e28" + }, + "source": [ + "# path to the your datasets\n", + "root_dir = '../../datasets/caltech101'\n", + "filenames = sorted(get_file_list(root_dir))\n", + "print(len(filenames))" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "8677\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 84, + "referenced_widgets": [ + "2f0c84e6e48b40cb9f1360ba4451513c", + "470d8a8b00c5475684ab84b55c79b760", + "a0ffda7bcf1a46a4bdd6352c43cfda61", + "f80ec0bf78de458995da4f9c964f3722", + "9792cd696fa74cc68ef6b35095fb8685", + "4d80db8732434d119c3f3b3bb8469ce9", + "fe1544db2c8e4df2a5c7097575da68ab", + "145e7cf97a5844a48e321c8a4878d13b" + ] + }, + "id": "8gfnWvUNnz6j", + "outputId": "9563b02d-607e-4b47-a823-d345875b146d" + }, + "source": [ + "standard_feature_list = []\n", + "for i in tqdm_notebook(range(len(filenames))):\n", + " standard_feature_list.append(extract_features(filenames[i], model))" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2f0c84e6e48b40cb9f1360ba4451513c", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=8677), HTML(value=u'')))" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n", + "8677\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m2NPEbnZnz6j" + }, + "source": [ + "Now let's try the same with the Keras Image Generator functions." + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gsEQDJuKnz6k", + "outputId": "255b0843-9fd2-4947-94d5-fd400f729eba" + }, + "source": [ + "batch_size = 128\n", + "datagen = tensorflow.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)\n", + "\n", + "generator = datagen.flow_from_directory(root_dir,\n", + " target_size=(224, 224),\n", + " class_mode=None,\n", + " shuffle=False)\n", + "\n", + "num_images = len(generator.filenames)\n", + "num_epochs = int(math.ceil(num_images / batch_size))\n", + "\n", + "start_time = time.time()\n", + "feature_list = []\n", + "feature_list = model.predict_generator(generator, num_epochs)\n", + "end_time = time.time()" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "WARNING: Logging before flag parsing goes to stderr.\n", + "W0725 00:00:57.965632 140262955202432 deprecation.py:323] From :16: predict_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Please use Model.predict, which supports generators.\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "Found 8677 images belonging to 101 classes.\n", + "(8677, 'num images')\n", + "(67, 'num epochs')\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oMPld5V0nz6k", + "outputId": "f552f9ef-123e-4f46-f11d-3bfa8ca96247" + }, + "source": [ + "for i, features in enumerate(feature_list):\n", + " feature_list[i] = features / norm(features)\n", + "\n", + "feature_list = feature_list.reshape(len(feature_list), -1)\n", + "\n", + "print(\"Num images = \", len(generator.classes))\n", + "print(\"Shape of feature_list = \", feature_list.shape)\n", + "print(\"Time taken in sec = \", end_time - start_time)" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "text": [ + "('Num images = ', 8677)\n", + "('Shape of feature_list = ', (2144, 2048))\n", + "('Time taken in sec = ', 8.890146017074585)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kJ9xPhBc7uGG" + }, + "source": [ + "By now, we have generated features from the entire dataset of images, using two different methods. We experimented with our own code of extracting features and compared it against the generator functionality in Keras. Below you'll see some summaries of experiments we ran, by varying the batch size parameter. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qF0q5OKPnz6k" + }, + "source": [ + "### GPU Utilization's effect on time taken by varying batch size \n", + "\n", + "\n", + "GPUs are optimized to parallelize the feature generation process and hence will give better results when multiple images are passed instead of just one image.\n", + "The opportunity to improve can be seen based on GPU Utilization. Low GPU Utilization indicates an opportunity to further improve the througput.\n", + "\n", + "\n", + "GPU Utilization can be seen using the nvidia-smi command. To update it every half a second\n", + "\n", + " watch -n .5 nvidia-smi\n", + " \n", + "To pool the GPU utilization every second and dump into a file\n", + "\n", + " nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -f gpu_utilization.csv -l 1\n", + " \n", + "To calculate median GPU Utilization from the file generated\n", + "\n", + " sort -n gpu_utilization.csv | datamash median 1\n", + "\n", + "|Model |Time second (sec) | batch_size | % GPU Utilization | Implementation|\n", + "|-|-|-|-|-|\n", + "|Resnet50 | 124 | 1 | 52 | extract_features |\n", + "|Resnet50 | 98 | 1 | 72 | ImageDataGenerator |\n", + "|Resnet50 | 57 | 2 | 81 | ImageDataGenerator |\n", + "|Resnet50 | 40 | 4 | 88 | ImageDataGenerator |\n", + "|Resnet50 | 34 | 8 | 94 | ImageDataGenerator |\n", + "|Resnet50 | 29 | 16 | 97 | ImageDataGenerator |\n", + "|Resnet50 | 28 | 32 | 97 | ImageDataGenerator |\n", + "|Resnet50 | 28 | 64 | 98 | ImageDataGenerator |" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UmQPJ0Qbnz6l" + }, + "source": [ + "### Some benchmarks on different model architectures to see relative speeds\n", + "\n", + "Keeping batch size of 64, benchmarking the different models\n", + "\n", + "|Model |items/second |\n", + "|-|-|\n", + "| VGG19 | 31.06 |\n", + "| VGG16 | 28.16 | \n", + "| Resnet50 | 28.48 | \n", + "| Inception | 20.07 |\n", + "| Mobilenet | 13.45 |" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0h25HAoXnz6l" + }, + "source": [ + "With the benchmarking experiments squared away, let's save the features as intermediate files to use later." + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "JXxQ9oi7nz6m" + }, + "source": [ + "filenames = [root_dir + '/' + s for s in generator.filenames]" + ], + "execution_count": 19, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "code_folding": [], + "collapsed": true, + "id": "6azYdG-Anz6m" + }, + "source": [ + "pickle.dump(generator.classes, open('./data/class_ids-caltech101.pickle',\n", + " 'wb'))\n", + "pickle.dump(filenames, open('./data/filenames-caltech101.pickle', 'wb'))\n", + "pickle.dump(\n", + " feature_list,\n", + " open('./data/features-caltech101-' + model_architecture + '.pickle', 'wb'))" + ], + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZBpWWLPNnz6m" + }, + "source": [ + "Let's train a finetuned model as well and save the features for that as well." + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "tdq3wz3Qnz6m" + }, + "source": [ + "TRAIN_SAMPLES = 8677\n", + "NUM_CLASSES = 101\n", + "IMG_WIDTH, IMG_HEIGHT = 224, 224" + ], + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "ePCFVIHznz6n" + }, + "source": [ + "train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,\n", + " rotation_range=20,\n", + " width_shift_range=0.2,\n", + " height_shift_range=0.2,\n", + " zoom_range=0.2)" + ], + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Yh_NVL5Tnz6n", + "outputId": "129fa193-a5fa-4e02-9db0-f74525b5f9b2" + }, + "source": [ + "train_generator = train_datagen.flow_from_directory(root_dir,\n", + " target_size=(IMG_WIDTH,\n", + " IMG_HEIGHT),\n", + " shuffle=True,\n", + " seed=12345,\n", + " class_mode='categorical')" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Found 8677 images belonging to 101 classes.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "bR6oZOPlnz6n" + }, + "source": [ + "def model_maker():\n", + " base_model = ResNet50(include_top=False,\n", + " input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))\n", + " for layer in base_model.layers[:]:\n", + " layer.trainable = False\n", + " input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))\n", + " custom_model = base_model(input)\n", + " custom_model = GlobalAveragePooling2D()(custom_model)\n", + " custom_model = Dense(64, activation='relu')(custom_model)\n", + " custom_model = Dropout(0.5)(custom_model)\n", + " predictions = Dense(NUM_CLASSES, activation='softmax')(custom_model)\n", + " return Model(inputs=input, outputs=predictions)" + ], + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Sghlu4Rfnz6n", + "outputId": "19c97bb7-bc5b-4de0-e48c-60c088998823" + }, + "source": [ + "model_finetuned = model_maker()\n", + "model_finetuned.compile(loss='categorical_crossentropy',\n", + " optimizer=tensorflow.keras.optimizers.Adam(0.001),\n", + " metrics=['acc'])\n", + "model_finetuned.fit_generator(\n", + " train_generator,\n", + " steps_per_epoch=math.ceil(float(TRAIN_SAMPLES) / batch_size),\n", + " epochs=10)" + ], + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "text": [ + "W0725 00:01:11.765636 140262955202432 deprecation.py:323] From :8: fit_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Please use Model.fit, which supports generators.\n", + "W0725 00:01:12.087389 140262955202432 data_adapter.py:1091] sample_weight modes were coerced from\n", + " ...\n", + " to \n", + " ['...']\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "Train for 68.0 steps\n", + "Epoch 1/10\n", + "68/68 [==============================] - 27s 391ms/step - loss: 3.8677 - acc: 0.2043\n", + "Epoch 2/10\n", + "68/68 [==============================] - 26s 379ms/step - loss: 2.8975 - acc: 0.3625\n", + "Epoch 3/10\n", + "68/68 [==============================] - 26s 382ms/step - loss: 2.5821 - acc: 0.4187\n", + "Epoch 4/10\n", + "68/68 [==============================] - 25s 363ms/step - loss: 2.3700 - acc: 0.4444\n", + "Epoch 5/10\n", + "68/68 [==============================] - 26s 389ms/step - loss: 2.0201 - acc: 0.5175\n", + "Epoch 6/10\n", + "68/68 [==============================] - 26s 386ms/step - loss: 1.8416 - acc: 0.5386\n", + "Epoch 7/10\n", + "68/68 [==============================] - 25s 371ms/step - loss: 1.7514 - acc: 0.5510\n", + "Epoch 8/10\n", + "68/68 [==============================] - 25s 370ms/step - loss: 1.6617 - acc: 0.5694\n", + "Epoch 9/10\n", + "68/68 [==============================] - 26s 383ms/step - loss: 1.5282 - acc: 0.5896\n", + "Epoch 10/10\n", + "68/68 [==============================] - 25s 366ms/step - loss: 1.5246 - acc: 0.5979\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "GcPDg7Wunz6o" + }, + "source": [ + "model_finetuned.save('./data/model-finetuned.h5')" + ], + "execution_count": 26, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cSE2qbwtnz6o", + "outputId": "dd898777-459f-44f2-b561-c2ed9792c4ef" + }, + "source": [ + "start_time = time.time()\n", + "feature_list_finetuned = []\n", + "feature_list_finetuned = model_finetuned.predict_generator(generator, num_epochs)\n", + "end_time = time.time()\n", + "\n", + "for i, features_finetuned in enumerate(feature_list_finetuned):\n", + " feature_list_finetuned[i] = features_finetuned / norm(features_finetuned)\n", + "\n", + "feature_list = feature_list_finetuned.reshape(len(feature_list_finetuned), -1)\n", + "\n", + "print(\"Num images = \", len(feature_list_finetuned) )\n", + "print(\"Shape of feature_list = \", feature_list.shape)\n", + "print(\"Time taken in sec = \", end_time - start_time)" + ], + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2144\n", + "('Num images = ', 2144)\n", + "('Shape of feature_list = ', (2144, 101))\n", + "('Time taken in sec = ', 8.284640073776245)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "L8FvRo-6nz6o" + }, + "source": [ + "pickle.dump(\n", + " feature_list,\n", + " open('./data/features-caltech101-resnet-finetuned.pickle', 'wb'))" + ], + "execution_count": 28, + "outputs": [] + } + ] } diff --git a/code/chapter-4/1_feature_extraction.ipynb b/code/chapter-4/1_feature_extraction.ipynb deleted file mode 100644 index af92dcf..0000000 --- a/code/chapter-4/1_feature_extraction.ipynb +++ /dev/null @@ -1,1185 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - }, - "colab": { - "name": "Copy of 1-feature-extraction.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "accelerator": "GPU", - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "2f0c84e6e48b40cb9f1360ba4451513c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "state": { - "_view_name": "HBoxView", - "_dom_classes": [], - "_model_name": "HBoxModel", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.5.0", - "box_style": "", - "layout": "IPY_MODEL_470d8a8b00c5475684ab84b55c79b760", - "_model_module": "@jupyter-widgets/controls", - "children": [ - "IPY_MODEL_a0ffda7bcf1a46a4bdd6352c43cfda61", - "IPY_MODEL_f80ec0bf78de458995da4f9c964f3722" - ] - } - }, - "470d8a8b00c5475684ab84b55c79b760": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "width": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "overflow": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "overflow_y": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "align_self": null, - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "a0ffda7bcf1a46a4bdd6352c43cfda61": { - "model_module": "@jupyter-widgets/controls", - "model_name": "IntProgressModel", - "state": { - "_view_name": "ProgressView", - "style": "IPY_MODEL_9792cd696fa74cc68ef6b35095fb8685", - "_view_module": "@jupyter-widgets/controls", - "_dom_classes": [], - "orientation": "horizontal", - "min": 0, - "bar_style": "success", - "max": 8677, - "_model_name": "IntProgressModel", - "_model_module_version": "1.5.0", - "value": 8677, - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_4d80db8732434d119c3f3b3bb8469ce9", - "description": "" - } - }, - "f80ec0bf78de458995da4f9c964f3722": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_fe1544db2c8e4df2a5c7097575da68ab", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": "100% 8677/8677 [06:16<00:00, 23.04it/s]", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_145e7cf97a5844a48e321c8a4878d13b" - } - }, - "9792cd696fa74cc68ef6b35095fb8685": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "state": { - "_view_name": "StyleView", - "_model_name": "ProgressStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "bar_color": null, - "_model_module": "@jupyter-widgets/controls" - } - }, - "4d80db8732434d119c3f3b3bb8469ce9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "width": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "overflow": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "overflow_y": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "align_self": null, - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "fe1544db2c8e4df2a5c7097575da68ab": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "145e7cf97a5844a48e321c8a4878d13b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "width": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "overflow": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "overflow_y": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "align_self": null, - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - } - } - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "xcoukqAFnz6Y" - }, - "source": [ - "\n", - " \n", - " \n", - "
\n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - "
\n", - "\n", - "\n", - "This code is part of [Chapter 4 - Building a Reverse Image Search Engine: Understanding Embeddings](https://learning.oreilly.com/library/view/practical-deep-learning/9781492034858/ch04.html).\n", - "\n", - "Note: In order to run this notebook on Google Colab you need to [follow these instructions](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb#scrollTo=WzIRIt9d2huC) so that the local data such as the images are available in your Google Drive.\n", - "\n", - "Note 7/24/2021: This notebook has been updated as of 7/24/2021." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-UrrqCTnnz6b" - }, - "source": [ - "# Feature Extraction\n", - "\n", - "This notebook is the first among six of the follow along Jupyter Notebook for Chapter 4. We will extract features from pretrained models like VGG-16, VGG-19, ResNet-50, InceptionV3 and MobileNet and benchmark them using the Caltech101 dataset.\n", - "\n", - "## Dataset:\n", - "\n", - "In the `data` directory of the repo, download the Caltech101 dataset (or try it on your dataset). Please note (as of 01 September 2020) the Caltech 101 dataset has moved locations and now has to be downloaded through Google Drive using `gdown`.\n", - "\n", - "```\n", - "$ gdown https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp --output caltech101.tar.gz\n", - "\n", - "$ tar -xvzf caltech101.tar.gz\n", - "\n", - "$ mv 101_ObjectCategories datasets/caltech101\n", - "```\n", - "Note that there is a 102nd category called ‘BACKGROUND_Google’ consisting of random images not contained in the first 101 categories, which needs to be deleted before we start experimenting. \n", - "\n", - "```\n", - "$ rm -rf datasets/caltech101/BACKGROUND_Google\n", - "```" - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "-_byh3zWnz6c" - }, - "source": [ - "!mkdir -p ../../datasets\n", - "!pip install gdown\n", - "!gdown https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp --output ../../datasets/caltech101.tar.gz\n", - "!tar -xvzf ../../datasets/caltech101.tar.gz --directory ../../datasets\n", - "!mv ../../datasets/101_ObjectCategories ../../datasets/caltech101\n", - "!rm -rf ../../datasets/caltech101/BACKGROUND_Google" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "scrolled": false, - "id": "yN9yirdLnz6d" - }, - "source": [ - "import numpy as np\n", - "from numpy.linalg import norm\n", - "import pickle\n", - "from tqdm import tqdm, tqdm_notebook\n", - "import os\n", - "import random\n", - "import time\n", - "import math\n", - "import tensorflow\n", - "from tensorflow.keras.preprocessing import image\n", - "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", - "from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n", - "from tensorflow.keras.applications.vgg16 import VGG16\n", - "from tensorflow.keras.applications.vgg19 import VGG19\n", - "from tensorflow.keras.applications.mobilenet import MobileNet\n", - "from tensorflow.keras.applications.inception_v3 import InceptionV3\n", - "from tensorflow.keras.models import Model\n", - "from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D\n" - ], - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LxZ73Ltmnz6e" - }, - "source": [ - "We will define a helper function that allows us to choose any pretrained model with all the necessary details for our experiments." - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "n39Tv0YZnz6e" - }, - "source": [ - "def model_picker(name):\n", - " if (name == 'vgg16'):\n", - " model = VGG16(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " elif (name == 'vgg19'):\n", - " model = VGG19(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " elif (name == 'mobilenet'):\n", - " model = MobileNet(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max',\n", - " depth_multiplier=1,\n", - " alpha=1)\n", - " elif (name == 'inception'):\n", - " model = InceptionV3(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " elif (name == 'resnet'):\n", - " model = ResNet50(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " elif (name == 'xception'):\n", - " model = Xception(weights='imagenet',\n", - " include_top=False,\n", - " input_shape=(224, 224, 3),\n", - " pooling='max')\n", - " else:\n", - " print(\"Specified model not available\")\n", - " return model" - ], - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qObR-xNenz6f" - }, - "source": [ - "Now, let's put our function to use." - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UCz180_jnz6g", - "outputId": "b6d47be1-b290-46b1-9e54-235b5ba0d2d3" - }, - "source": [ - "model_architecture = 'resnet'\n", - "model = model_picker(model_architecture)" - ], - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5\n", - "94773248/94765736 [==============================] - 2s 0us/step\n", - "94781440/94765736 [==============================] - 2s 0us/step\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "za5aAGhgnz6g" - }, - "source": [ - "Let's define a function to extract image features given an image and a model. We developed a similar function in Chapter-2" - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "2Fc9vtRenz6g" - }, - "source": [ - "def extract_features(img_path, model):\n", - " input_shape = (224, 224, 3)\n", - " img = image.load_img(img_path,\n", - " target_size=(input_shape[0], input_shape[1]))\n", - " img_array = image.img_to_array(img)\n", - " expanded_img_array = np.expand_dims(img_array, axis=0)\n", - " preprocessed_img = preprocess_input(expanded_img_array)\n", - " features = model.predict(preprocessed_img)\n", - " flattened_features = features.flatten()\n", - " normalized_features = flattened_features / norm(flattened_features)\n", - " return normalized_features" - ], - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "57uKAEMxnz6h" - }, - "source": [ - "Let's see the feature length the model generates by running on an example image. If you don't have the usual cat image available locally, let's download it!" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "qC_isBQ7okco" - }, - "source": [ - "try:\n", - " import google.colab\n", - " IS_COLAB_ENV = True\n", - "except:\n", - " IS_COLAB_ENV = False" - ], - "execution_count": 6, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jU3c7aAbqR-P", - "outputId": "7cc823ea-5d84-4eb6-ab0a-acc777ba21be" - }, - "source": [ - "IMG_PATH = '../../sample-images/cat.jpg'\n", - "if IS_COLAB_ENV:\n", - " !curl https://raw.githubusercontent.com/PracticalDL/Practical-Deep-Learning-Book/master/sample-images/cat.jpg --output cat.jpg\n", - " IMG_PATH = 'cat.jpg'" - ], - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r100 661k 100 661k 0 0 6964k 0 --:--:-- --:--:-- --:--:-- 6964k\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_USxAyninz6h", - "outputId": "c2c588db-95cc-4d55-a686-1ce5a11bc99f" - }, - "source": [ - "features = extract_features('cat.jpg', model)\n", - "print(\"Total length of features for one image: \", len(features))" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "text": [ - "2048\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qgih38bvnz6i" - }, - "source": [ - "Now, we will see how much time it takes to extract features of one image. For the cat image, it should ideally be around 81 ms. Depending on your image size, the final time may be different and it depends on the time to read, resize, preprocess, and extract features." - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Rs8nZZCLnz6i", - "outputId": "cb7d8dba-ec9c-4fc0-ee23-864f2e66bfb9" - }, - "source": [ - "%timeit features = extract_features('cat.jpg', model)" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "10 loops, best of 3: 81.4 ms per loop\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "esrDlqWa9sGo" - }, - "source": [ - "## Benchmarking time taken to extract features over the entire dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8jELqglInz6i" - }, - "source": [ - "The time taken to extract features is dependent on a few factors such as image size, computing power etc. A better benchmark would be running the network over an entire dataset. A simple change to the existing code will allow this.\n", - "\n", - "Let's make a handy function to recursively get all the image files under a root directory." - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "biSAC49dnz6j" - }, - "source": [ - "extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']\n", - "\n", - "def get_file_list(root_dir):\n", - " file_list = []\n", - " for root, directories, filenames in os.walk(root_dir):\n", - " for filename in filenames:\n", - " if any(ext in filename for ext in extensions):\n", - " filepath = os.path.join(root, filename)\n", - " if os.path.exists(filepath):\n", - " file_list.append(filepath)\n", - " else:\n", - " print(filepath)\n", - " return file_list" - ], - "execution_count": 10, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1HtebW5cnz6j" - }, - "source": [ - "Now, let's run the extraction over the entire dataset and time it." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "XC75Z2Dau77Z", - "outputId": "9140e060-cd2a-42a2-fd6c-a1d782822e28" - }, - "source": [ - "# path to the your datasets\n", - "root_dir = '../../datasets/caltech101'\n", - "filenames = sorted(get_file_list(root_dir))\n", - "print(len(filenames))" - ], - "execution_count": 11, - "outputs": [ - { - "output_type": "stream", - "text": [ - "8677\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/", - "height": 84, - "referenced_widgets": [ - "2f0c84e6e48b40cb9f1360ba4451513c", - "470d8a8b00c5475684ab84b55c79b760", - "a0ffda7bcf1a46a4bdd6352c43cfda61", - "f80ec0bf78de458995da4f9c964f3722", - "9792cd696fa74cc68ef6b35095fb8685", - "4d80db8732434d119c3f3b3bb8469ce9", - "fe1544db2c8e4df2a5c7097575da68ab", - "145e7cf97a5844a48e321c8a4878d13b" - ] - }, - "id": "8gfnWvUNnz6j", - "outputId": "9563b02d-607e-4b47-a823-d345875b146d" - }, - "source": [ - "standard_feature_list = []\n", - "for i in tqdm_notebook(range(len(filenames))):\n", - " standard_feature_list.append(extract_features(filenames[i], model))" - ], - "execution_count": 12, - "outputs": [ - { - "output_type": "display_data", - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2f0c84e6e48b40cb9f1360ba4451513c", - "version_minor": 0, - "version_major": 2 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=8677), HTML(value=u'')))" - ] - }, - "metadata": { - "tags": [] - } - }, - { - "output_type": "stream", - "text": [ - "\n", - "8677\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "m2NPEbnZnz6j" - }, - "source": [ - "Now let's try the same with the Keras Image Generator functions." - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "gsEQDJuKnz6k", - "outputId": "255b0843-9fd2-4947-94d5-fd400f729eba" - }, - "source": [ - "batch_size = 128\n", - "datagen = tensorflow.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)\n", - "\n", - "generator = datagen.flow_from_directory(root_dir,\n", - " target_size=(224, 224),\n", - " class_mode=None,\n", - " shuffle=False)\n", - "\n", - "num_images = len(generator.filenames)\n", - "num_epochs = int(math.ceil(num_images / batch_size))\n", - "\n", - "start_time = time.time()\n", - "feature_list = []\n", - "feature_list = model.predict_generator(generator, num_epochs)\n", - "end_time = time.time()" - ], - "execution_count": 14, - "outputs": [ - { - "output_type": "stream", - "text": [ - "WARNING: Logging before flag parsing goes to stderr.\n", - "W0725 00:00:57.965632 140262955202432 deprecation.py:323] From :16: predict_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "Please use Model.predict, which supports generators.\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "Found 8677 images belonging to 101 classes.\n", - "(8677, 'num images')\n", - "(67, 'num epochs')\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "oMPld5V0nz6k", - "outputId": "f552f9ef-123e-4f46-f11d-3bfa8ca96247" - }, - "source": [ - "for i, features in enumerate(feature_list):\n", - " feature_list[i] = features / norm(features)\n", - "\n", - "feature_list = feature_list.reshape(len(feature_list), -1)\n", - "\n", - "print(\"Num images = \", len(generator.classes))\n", - "print(\"Shape of feature_list = \", feature_list.shape)\n", - "print(\"Time taken in sec = \", end_time - start_time)" - ], - "execution_count": 18, - "outputs": [ - { - "output_type": "stream", - "text": [ - "('Num images = ', 8677)\n", - "('Shape of feature_list = ', (2144, 2048))\n", - "('Time taken in sec = ', 8.890146017074585)\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kJ9xPhBc7uGG" - }, - "source": [ - "By now, we have generated features from the entire dataset of images, using two different methods. We experimented with our own code of extracting features and compared it against the generator functionality in Keras. Below you'll see some summaries of experiments we ran, by varying the batch size parameter. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qF0q5OKPnz6k" - }, - "source": [ - "### GPU Utilization's effect on time taken by varying batch size \n", - "\n", - "\n", - "GPUs are optimized to parallelize the feature generation process and hence will give better results when multiple images are passed instead of just one image.\n", - "The opportunity to improve can be seen based on GPU Utilization. Low GPU Utilization indicates an opportunity to further improve the througput.\n", - "\n", - "\n", - "GPU Utilization can be seen using the nvidia-smi command. To update it every half a second\n", - "\n", - " watch -n .5 nvidia-smi\n", - " \n", - "To pool the GPU utilization every second and dump into a file\n", - "\n", - " nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -f gpu_utilization.csv -l 1\n", - " \n", - "To calculate median GPU Utilization from the file generated\n", - "\n", - " sort -n gpu_utilization.csv | datamash median 1\n", - "\n", - "|Model |Time second (sec) | batch_size | % GPU Utilization | Implementation|\n", - "|-|-|-|-|-|\n", - "|Resnet50 | 124 | 1 | 52 | extract_features |\n", - "|Resnet50 | 98 | 1 | 72 | ImageDataGenerator |\n", - "|Resnet50 | 57 | 2 | 81 | ImageDataGenerator |\n", - "|Resnet50 | 40 | 4 | 88 | ImageDataGenerator |\n", - "|Resnet50 | 34 | 8 | 94 | ImageDataGenerator |\n", - "|Resnet50 | 29 | 16 | 97 | ImageDataGenerator |\n", - "|Resnet50 | 28 | 32 | 97 | ImageDataGenerator |\n", - "|Resnet50 | 28 | 64 | 98 | ImageDataGenerator |" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UmQPJ0Qbnz6l" - }, - "source": [ - "### Some benchmarks on different model architectures to see relative speeds\n", - "\n", - "Keeping batch size of 64, benchmarking the different models\n", - "\n", - "|Model |items/second |\n", - "|-|-|\n", - "| VGG19 | 31.06 |\n", - "| VGG16 | 28.16 | \n", - "| Resnet50 | 28.48 | \n", - "| Inception | 20.07 |\n", - "| Mobilenet | 13.45 |" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0h25HAoXnz6l" - }, - "source": [ - "With the benchmarking experiments squared away, let's save the features as intermediate files to use later." - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "JXxQ9oi7nz6m" - }, - "source": [ - "filenames = [root_dir + '/' + s for s in generator.filenames]" - ], - "execution_count": 19, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "code_folding": [], - "collapsed": true, - "id": "6azYdG-Anz6m" - }, - "source": [ - "pickle.dump(generator.classes, open('./data/class_ids-caltech101.pickle',\n", - " 'wb'))\n", - "pickle.dump(filenames, open('./data/filenames-caltech101.pickle', 'wb'))\n", - "pickle.dump(\n", - " feature_list,\n", - " open('./data/features-caltech101-' + model_architecture + '.pickle', 'wb'))" - ], - "execution_count": 20, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZBpWWLPNnz6m" - }, - "source": [ - "Let's train a finetuned model as well and save the features for that as well." - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "tdq3wz3Qnz6m" - }, - "source": [ - "TRAIN_SAMPLES = 8677\n", - "NUM_CLASSES = 101\n", - "IMG_WIDTH, IMG_HEIGHT = 224, 224" - ], - "execution_count": 21, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "ePCFVIHznz6n" - }, - "source": [ - "train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,\n", - " rotation_range=20,\n", - " width_shift_range=0.2,\n", - " height_shift_range=0.2,\n", - " zoom_range=0.2)" - ], - "execution_count": 22, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Yh_NVL5Tnz6n", - "outputId": "129fa193-a5fa-4e02-9db0-f74525b5f9b2" - }, - "source": [ - "train_generator = train_datagen.flow_from_directory(root_dir,\n", - " target_size=(IMG_WIDTH,\n", - " IMG_HEIGHT),\n", - " shuffle=True,\n", - " seed=12345,\n", - " class_mode='categorical')" - ], - "execution_count": 23, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Found 8677 images belonging to 101 classes.\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "bR6oZOPlnz6n" - }, - "source": [ - "def model_maker():\n", - " base_model = ResNet50(include_top=False,\n", - " input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))\n", - " for layer in base_model.layers[:]:\n", - " layer.trainable = False\n", - " input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))\n", - " custom_model = base_model(input)\n", - " custom_model = GlobalAveragePooling2D()(custom_model)\n", - " custom_model = Dense(64, activation='relu')(custom_model)\n", - " custom_model = Dropout(0.5)(custom_model)\n", - " predictions = Dense(NUM_CLASSES, activation='softmax')(custom_model)\n", - " return Model(inputs=input, outputs=predictions)" - ], - "execution_count": 24, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Sghlu4Rfnz6n", - "outputId": "19c97bb7-bc5b-4de0-e48c-60c088998823" - }, - "source": [ - "model_finetuned = model_maker()\n", - "model_finetuned.compile(loss='categorical_crossentropy',\n", - " optimizer=tensorflow.keras.optimizers.Adam(0.001),\n", - " metrics=['acc'])\n", - "model_finetuned.fit_generator(\n", - " train_generator,\n", - " steps_per_epoch=math.ceil(float(TRAIN_SAMPLES) / batch_size),\n", - " epochs=10)" - ], - "execution_count": 25, - "outputs": [ - { - "output_type": "stream", - "text": [ - "W0725 00:01:11.765636 140262955202432 deprecation.py:323] From :8: fit_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "Please use Model.fit, which supports generators.\n", - "W0725 00:01:12.087389 140262955202432 data_adapter.py:1091] sample_weight modes were coerced from\n", - " ...\n", - " to \n", - " ['...']\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "Train for 68.0 steps\n", - "Epoch 1/10\n", - "68/68 [==============================] - 27s 391ms/step - loss: 3.8677 - acc: 0.2043\n", - "Epoch 2/10\n", - "68/68 [==============================] - 26s 379ms/step - loss: 2.8975 - acc: 0.3625\n", - "Epoch 3/10\n", - "68/68 [==============================] - 26s 382ms/step - loss: 2.5821 - acc: 0.4187\n", - "Epoch 4/10\n", - "68/68 [==============================] - 25s 363ms/step - loss: 2.3700 - acc: 0.4444\n", - "Epoch 5/10\n", - "68/68 [==============================] - 26s 389ms/step - loss: 2.0201 - acc: 0.5175\n", - "Epoch 6/10\n", - "68/68 [==============================] - 26s 386ms/step - loss: 1.8416 - acc: 0.5386\n", - "Epoch 7/10\n", - "68/68 [==============================] - 25s 371ms/step - loss: 1.7514 - acc: 0.5510\n", - "Epoch 8/10\n", - "68/68 [==============================] - 25s 370ms/step - loss: 1.6617 - acc: 0.5694\n", - "Epoch 9/10\n", - "68/68 [==============================] - 26s 383ms/step - loss: 1.5282 - acc: 0.5896\n", - "Epoch 10/10\n", - "68/68 [==============================] - 25s 366ms/step - loss: 1.5246 - acc: 0.5979\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 25 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "GcPDg7Wunz6o" - }, - "source": [ - "model_finetuned.save('./data/model-finetuned.h5')" - ], - "execution_count": 26, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "cSE2qbwtnz6o", - "outputId": "dd898777-459f-44f2-b561-c2ed9792c4ef" - }, - "source": [ - "start_time = time.time()\n", - "feature_list_finetuned = []\n", - "feature_list_finetuned = model_finetuned.predict_generator(generator, num_epochs)\n", - "end_time = time.time()\n", - "\n", - "for i, features_finetuned in enumerate(feature_list_finetuned):\n", - " feature_list_finetuned[i] = features_finetuned / norm(features_finetuned)\n", - "\n", - "feature_list = feature_list_finetuned.reshape(len(feature_list_finetuned), -1)\n", - "\n", - "print(\"Num images = \", len(feature_list_finetuned) )\n", - "print(\"Shape of feature_list = \", feature_list.shape)\n", - "print(\"Time taken in sec = \", end_time - start_time)" - ], - "execution_count": 27, - "outputs": [ - { - "output_type": "stream", - "text": [ - "2144\n", - "('Num images = ', 2144)\n", - "('Shape of feature_list = ', (2144, 101))\n", - "('Time taken in sec = ', 8.284640073776245)\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "collapsed": true, - "id": "L8FvRo-6nz6o" - }, - "source": [ - "pickle.dump(\n", - " feature_list,\n", - " open('./data/features-caltech101-resnet-finetuned.pickle', 'wb'))" - ], - "execution_count": 28, - "outputs": [] - } - ] -}