jupyter-naas · FlorentLvr · Jul 4, 2024
diff --git a/Instagram/Instagram_Explore_API.ipynb b/Instagram/Instagram_Explore_API.ipynb
@@ -0,0 +1,340 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "dbfe669a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-23T14:22:16.610471Z",
+     "iopub.status.busy": "2021-02-23T14:22:16.610129Z",
+     "iopub.status.idle": "2021-02-23T14:22:16.627784Z",
+     "shell.execute_reply": "2021-02-23T14:22:16.626866Z",
+     "shell.execute_reply.started": "2021-02-23T14:22:16.610384Z"
+    },
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "<img width=\"8%\" alt=\"Instagram.png\" src=\"https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Instagram.png\" style=\"border-radius: 15%\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5bbfcea2",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "# Instagram - Explore API\n",
+    "<a href=\"https://bit.ly/3JyWIk6\">Give Feedback</a> | <a href=\"https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=Instagram+-+Get+stats+from+posts:+Error+short+description\">Bug report</a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "394838ed",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "**Tags:** #instagram #snippet #dataframe #content"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "370242e8",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fcdf88ea-b290-4dc9-8605-08c8724551fd",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "**Last update:** 2024-07-03 (Created: 2024-07-02)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "naas-description",
+   "metadata": {
+    "papermill": {},
+    "tags": [
+     "description"
+    ]
+   },
+   "source": [
+    "**Description:** This notebook retrieves data from an instagram profile through apify."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de96f02c",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "## Input"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93bc8174",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "### Import libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f8cfa596-61fc-4135-a913-915cc5aab9e9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "import time\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0957e2bc",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "### Setup variables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a142e6f7-02d9-4fee-912a-2eb410a82b03",
+   "metadata": {},
+   "source": [
+    "- `apify_token`: personal token to access data\n",
+    "- `instagram_profile_url`: link to instagram profile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f9f3fbb-f787-45d4-a7f0-e13662c0b736",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx\"\n",
+    "instagram_profile_url = \"https://www.instagram.com/naaslife/\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9829569f",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "## Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40e3d074-6982-4381-bbb9-d5ee8c442c4a",
+   "metadata": {},
+   "source": [
+    "### Scrape instagram data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "94d07b97-00e6-4204-a40e-51f3515a9138",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def get_instagram_data(apify_token, instagram_profile_url):\n",
+    "    # Extract the username from the profile URL\n",
+    "    username = instagram_profile_url.split('/')[-2]\n",
+    "    \n",
+    "    # Define the Apify API URL for the Instagram Profile Scraper\n",
+    "    api_url = \"https://api.apify.com/v2/acts/apify~instagram-profile-scraper/run-sync-get-dataset-items\"\n",
+    "\n",
+    "    # Define the payload with the necessary parameters\n",
+    "    payload = {\n",
+    "        \"usernames\": [username],  # Pass the username as a list\n",
+    "        \"proxyConfig\": {\n",
+    "            \"useApifyProxy\": True\n",
+    "        }\n",
+    "    }\n",
+    "\n",
+    "    # Define the headers with the Apify API token\n",
+    "    headers = {\n",
+    "        \"Authorization\": f\"Bearer {apify_token}\",\n",
+    "        \"Content-Type\": \"application/json\"\n",
+    "    }\n",
+    "\n",
+    "    # Make the request to the Apify API\n",
+    "    response = requests.post(api_url, json=payload, headers=headers)\n",
+    "\n",
+    "    # Check if the response is successful\n",
+    "    if response.status_code == 200:\n",
+    "        # Extract the JSON data from the response\n",
+    "        data = response.json()\n",
+    "\n",
+    "        # Check if the data contains the profile information\n",
+    "        if data and len(data) > 0:\n",
+    "            return data[0]\n",
+    "        else:\n",
+    "            return \"No profile data found.\"\n",
+    "    else:\n",
+    "        return f\"Error: {response.status_code} - {response.text}\"\n",
+    "\n",
+    "def process_instagram_data(data):\n",
+    "    # Extract and organize data into DataFrames\n",
+    "    posts = data.get('posts', [])\n",
+    "    profiles = [data.get('user', {})]\n",
+    "    places = [post.get('location', {}) for post in posts if post.get('location')]\n",
+    "    hashtags = [hashtag for post in posts for hashtag in post.get('hashtags', [])]\n",
+    "    photos = [post.get('images', []) for post in posts]\n",
+    "    comments = [comment for post in posts for comment in post.get('comments', [])]\n",
+    "\n",
+    "    # Create DataFrames\n",
+    "    df_posts = pd.DataFrame(posts)\n",
+    "    df_profiles = pd.DataFrame(profiles)\n",
+    "    df_places = pd.DataFrame(places)\n",
+    "    df_hashtags = pd.DataFrame(hashtags, columns=['hashtag'])\n",
+    "    df_photos = pd.DataFrame(photos)\n",
+    "    df_comments = pd.DataFrame(comments)\n",
+    "\n",
+    "    return df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "14696ed5",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-07-02T23:32:10.789097Z",
+     "iopub.status.busy": "2021-07-02T23:32:10.788829Z",
+     "iopub.status.idle": "2021-07-02T23:32:10.796900Z",
+     "shell.execute_reply": "2021-07-02T23:32:10.796358Z",
+     "shell.execute_reply.started": "2021-07-02T23:32:10.789033Z"
+    },
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "## Output"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72557a2c",
+   "metadata": {
+    "papermill": {},
+    "tags": []
+   },
+   "source": [
+    "### Display result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1d3039cc-b820-4b02-8e01-24fd97527009",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "profile_data = get_instagram_data(apify_token, instagram_profile_url)\n",
+    "\n",
+    "if isinstance(profile_data, dict):\n",
+    "    df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments = process_instagram_data(profile_data)\n",
+    "    \n",
+    "    print(\"Posts DataFrame:\")\n",
+    "    print(df_posts)\n",
+    "    print(\"\\nProfiles DataFrame:\")\n",
+    "    print(df_profiles)\n",
+    "    print(\"\\nPlaces DataFrame:\")\n",
+    "    print(df_places)\n",
+    "    print(\"\\nHashtags DataFrame:\")\n",
+    "    print(df_hashtags)\n",
+    "    print(\"\\nPhotos DataFrame:\")\n",
+    "    print(df_photos)\n",
+    "    print(\"\\nComments DataFrame:\")\n",
+    "    print(df_comments)\n",
+    "else:\n",
+    "    print(profile_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e76164f0-64be-4e45-98c8-d5be914b449d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  },
+  "naas": {
+   "notebook_id": "8c1d59ba9fc141ddf76ab615ec70620884b5be94f4cde842bd75126ac862db52",
+   "notebook_path": "Instagram/Instagram_Get_stats_from_posts.ipynb"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "environment_variables": {},
+   "parameters": {},
+   "version": "2.3.3"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {},
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}