diff --git a/06_HTML_XML_JSON.ipynb b/06_HTML_XML_JSON.ipynb
new file mode 100644
index 0000000..dd5c5e4
--- /dev/null
+++ b/06_HTML_XML_JSON.ipynb
@@ -0,0 +1,1347 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "import requests\n",
+    "import json\n",
+    "\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from bs4 import BeautifulSoup\n",
+    "\n",
+    "import boto3\n",
+    "\n",
+    "from IPython.display import HTML, display"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#quick warmup\n",
+    "\n",
+    "# imagine a key-value data container\n",
+    "inventory = {\n",
+    "      \"001\": [{\"name\": \"Milk\", \"quantity\": 34, \"price\": 1.99}],\n",
+    "      \"002\": [{\"name\": \"Bread\", \"quantity\": 20, \"price\": 2.49},\n",
+    "              {\"name\": \"Nutella\", \"quantity\": 5, \"price\": 2.49}]    \n",
+    "    }\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#How would a store manager find out how much is milk\n",
+    "\n",
+    "#How do we add new item? What do we need to do?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "name_to_item = {}\n",
+    "for item_id, details_list in inventory.items():\n",
+    "    for details in details_list:\n",
+    "        # Here we are assuming product names are unique\n",
+    "        name_to_item[details[\"name\"]] = {\"id\": item_id, \"quantity\": details[\"quantity\"], \"price\": details[\"price\"]}\n",
+    "#if details[\"name\"] is not unique, what happens?\n",
+    "print(name_to_item)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lecture 05 - JSON, XML, HTML, Requests and APIs\n",
+    "by Jan Šíla, Vítek Macháček <br />\n",
+    "March 26, 2024\n",
+    "\n",
+    "### Contents\n",
+    "\n",
+    "* Standardized data representation\n",
+    "* JSON\n",
+    "* XML\n",
+    "* Introduction to BeautifulSoup\n",
+    "* Basics of HTML (+ Element Inspection)\n",
+    "* Introduction to Requests (GET vs. POST) and APIs\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Goals:\n",
+    "    \n",
+    "* work with data  online/real-time data\n",
+    "* acquisition, processing - > results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Date exchange formats - JSON, XML\n",
+    "\n",
+    "`Language of the internet`\n",
+    "\n",
+    "* You can send/receive a message with (almost) any service\n",
+    "\n",
+    "* send .docx -> what if I do not have MS Word?\n",
+    "* we need a simple data format which would work on any machine (system agnostic), is general (can write anything) and is ediatable in basic editors\n",
+    "\n",
+    "* More complex than simple tables\n",
+    "* Highly structured - if you dont follow the rules, you are out\n",
+    "* Both sides need to understand the structure (comments in yaml)\n",
+    "* only data, no code to be run (security measure)\n",
+    "* distributed as text/string (to be precise as `bytes` literals) \n",
+    "* parsed to objects - easy to work with straight away\n",
+    "* Can be persisted as special files, or some data streams from APIs. \n",
+    "* Human readable\n",
+    "* Hierarchical\n",
+    "* Can be fetched using standard web APIs\n",
+    "\n",
+    "### Purpose\n",
+    "\n",
+    "1. Communication \n",
+    "    * All imaginable communication channels\n",
+    "    * Applications within single server/machine\n",
+    "    * Only transferring of data\n",
+    "    * Both sides need to understand the structure\n",
+    "\n",
+    "2. Storing\n",
+    "    * self-descriptive\n",
+    "    * human readable\n",
+    "    * also in DBs - SQL, MongoDB etc.\n",
+    "\n",
+    "3. Standardization\n",
+    "    * predictability\n",
+    "    * cooperation\n",
+    "    * spillovers from standardization\n",
+    "\n",
+    "\n",
+    "### Dimensionality problem\n",
+    "\n",
+    "* rich information comes at costs of data complexity \n",
+    "* to interrelate information, you need to high dimensionality (or A LOT of columns) or declaratory formats such as protobuf\n",
+    "* Strongly object-oriented\n",
+    "\n",
+    "\n",
+    "### 1D:\n",
+    "* logs\n",
+    "\n",
+    "### 2D: CSVs\n",
+    "* tabular data (like pandas DFs)\n",
+    "\n",
+    "### 3+D:\n",
+    "#### XML\n",
+    "* eXtensible Markup Language is a software- and hardware-independent tool for storing and transporting data.\n",
+    "* Officialy defined at 1998, but its roots are even older.\n",
+    "* XML was designed to carry data - with focus on what data is\n",
+    "* HTML was designed to display data - with focus on what data should look like displayed \n",
+    "* XML tags are not predefined like HTML tags are\n",
+    "* more verbose than JSON\n",
+    "* can have comments !actually a really cool in useful feature!\n",
+    "* used historically as a transaction format in many areas: \n",
+    "    * Scientific measurements\n",
+    "    * News information\n",
+    "    * Wheather measurements\n",
+    "    * Financial transactions\n",
+    "* Necessary to use XML parser to use in Python or in JavaScript\n",
+    "\n",
+    "\n",
+    "### JSON\n",
+    "* JavaScript Object Notation\n",
+    "* often *.json* files\n",
+    "* but also used in the web etc.\n",
+    "* supports standard datatypes - strings, integers, floats, lists\n",
+    "* No comments\n",
+    "* More compact, less verbose\n",
+    "* No closing tags\n",
+    "* Used EVERYWHERE, BUT [NOT LICENSED FOR EVIL](https://www.json.org/license.html). If you want to do evil stuff, use XML instead.\n",
+    "* Native in JavaScript and close to native in Python (dictionary)\n",
+    "* Jupyter Notebooks\n",
+    "\n",
+    "\n",
+    "* common pitfals: properly formatted JSON is different to python dict. -> check: https://jsonlint.com/\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# JSON"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# general representation of a dictionary\n",
+    "# emphasis on accessibility -> key-value ( hash table )\n",
+    "# contains records, lists, or other dictionaries\n",
+    "\n",
+    "teachers = [\n",
+    "    {'name':'Jozef Baruník','titles':['doc.','PhDr.','Ph.D.','Bc.','Mgr.'],'ID':1234,'courses':['JEM005','JEM116','JEM059','JEM061']},\n",
+    "    {'name':'Martin Hronec','titles':['Bc.','Mgr.'],'ID':3421,'courses':['JEM005','JEM207']},\n",
+    "]\n",
+    "\n",
+    "courses = {\n",
+    "    \"JEM005\":{'name':'Advanced Econometrics','ECTS':6,'teachers':[3421,1234]},\n",
+    "    'JEM207':{'name':'Data Processing in Python','ECTS':5,'teachers':[3421]},\n",
+    "    'JEM116':{'name':'Applied Econometrics','ECTS':6,'teachers':[1234]},\n",
+    "    'JEM059':{'name':'Quantitative Finance I.','ECTS':6,'teachers':[1234,5678]},\n",
+    "    'JEM061':{'name':'Quantitative Finance II.','ECTS':6,'teachers':[1234,5678]}\n",
+    "}\n",
+    "jsondata = {'teachers':teachers,'courses':courses}\n",
+    "jsondata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "is this a valid JSON?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "https://jsonformatter.curiousconcept.com/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![python and JSON](./06_pics/python_json.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "js = json.dumps(\n",
+    "    jsondata\n",
+    ") #json formatted string!\n",
+    "\n",
+    "isinstance(js,str)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "js"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jsondata['courses']['JEM005']['test']='test'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.DataFrame(jsondata['courses']).transpose()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfc = pd.read_json(json.dumps(jsondata['courses']),orient='index')\n",
+    "dfc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# lets come back to this a little later"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# eXtensible Markup Language (XML)\n",
+    "\n",
+    "* elements\n",
+    "* attributes\n",
+    "* tags\n",
+    "\n",
+    "### Tag\n",
+    "> <>\n",
+    "\n",
+    "### Element"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "### Convert to python data-types"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#either\n",
+    "'''<element>content</element>'''\n",
+    "\n",
+    "#or self-closing (no content)\n",
+    "'''<element />''';\n",
+    "# <br />"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Attributes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'''<element attr=\"value\" />''';"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![XML tree structure](./06_pics/xml_tree_structure.png)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```xml\n",
+    "<bookstore>\n",
+    "    <book category=\"fiction\">\n",
+    "        <title lang=\"ENG\">Everyday Italian</title>\n",
+    "        <title lang=\"CZE\">AAaAA</title>\n",
+    "        <author>Giada De Laurentis</author>\n",
+    "        <year>2005</year>\n",
+    "        <price>30.00</price>\n",
+    "    </book>\n",
+    "</bookstore>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```json\n",
+    "{\n",
+    "    \"bookstore\":[\n",
+    "        {\n",
+    "            \"title\":\"Everyday Italian\",\n",
+    "            \"lang\":\"ENG\",\n",
+    "            \"author\":\"Giada de Laurentis\",\n",
+    "            \"year\":2005,\n",
+    "            \"price\":30\n",
+    "        }\n",
+    "    ]\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "\n",
+    "Takeaway: JSON and XML are not equivalents and cannot be freely mirrored. Unfortunately.\n",
+    "\n",
+    "JSON cannot have multiple tags with different properties ->title_en, title_cze  perhaps"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Navigation\n",
+    "* Xpath\n",
+    "* CSS selectors \n",
+    "* **BeautifulSoup**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### BeatifulSoup in detail\n",
+    "each BS object represents\n",
+    "* an element\n",
+    "* the position in tree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'''string  on more \n",
+    "nes '''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xml = '''\n",
+    "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
+    "<ies_data>\n",
+    "    <courses>\n",
+    "        <course id=\"JEM005\" ects=\"6\" name=\"Advanced Econometrics\">\n",
+    "           <teacher-id>3421</teacher-id>\n",
+    "           <teacher-id>1234</teacher-id>\n",
+    "        </course>\n",
+    "        <course id=\"JEM207\" ects=\"5\" name=\"Data Processing in Python\">\n",
+    "            <teacher-id>3421</teacher-id>\n",
+    "        </course>\n",
+    "            <course id=\"JEM116\" ects=\"6\" name=\"Applied Econometrics I.\">\n",
+    "            <teacher-id>1234</teacher-id>\n",
+    "        </course>\n",
+    "        <course id=\"JEM059\" ects=\"6\" name=\"Quantitative Finance I.\">\n",
+    "            <teacher-id>1234</teacher-id>\n",
+    "            <teacher-id>5678</teacher-id>\n",
+    "        </course>\n",
+    "        <course id=\"JEM061\" ects=\"6\" name=\"Quantitative Finance II.\">\n",
+    "            <teacher-id>1234</teacher-id>\n",
+    "            <teacher-id>5678</teacher-id>\n",
+    "        </course>\n",
+    "    </courses>\n",
+    "    <teachers>\n",
+    "        <teacher teacher-id=\"3421\">\n",
+    "            <name>Martin Hronec</name>\n",
+    "        </teacher>\n",
+    "        <teacher teacher-id=\"1234\">\n",
+    "            <name>Jozef Baruník</name>\n",
+    "        </teacher>\n",
+    "        <teacher teacher-id=\"5678\">\n",
+    "            <name>Lukáš Vácha</name>\n",
+    "        </teacher>\n",
+    "    </teachers>\n",
+    "</ies_data>\n",
+    "'''\n",
+    "\n",
+    "#unlike HTML, those tag names are defined by Vitek - no one else 'can' understand them -> flexibility is limited. But same issue with JSON to be fair\n",
+    "\n",
+    "soup = BeautifulSoup(xml)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dir(soup)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```find()``` will find a **first** element given the input\n",
+    "\n",
+    "```find_all()``` or ```findAll()```  finds a **all** elements given the input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "soup.find_all('course')[0].find('teacher-id')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jem059 = soup.find('course',{'id':'JEM059'}) #looking for a tag with attrbitues (optional)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jem059"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "soup.findAll('teacher-id')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`soup['attr']` will return the value of attribute"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(jem059['ects'])\n",
+    "print(jem059['name'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "soup.findAll('teacher-id')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jem059"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "you can also navigate horizontally"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jem059.findNext('course').findNext('course')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jem059.findPrevious('course').findPrevious('course')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "and even upstream!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jem059.parent.parent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#get all teacher ids\n",
+    "teacher_ids = [int(t.text) for t in soup.findAll('teacher-id')]\n",
+    "print(teacher_ids)\n",
+    "#get unique\n",
+    "set(teacher_ids)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "course = soup.find('course')\n",
+    "d = {\n",
+    "    'id':course['id'],\n",
+    "    'name':course['name'],\n",
+    "    'ects':course['ects'],\n",
+    "    'teachers':[int(t.text) for t in course.findAll('teacher-id')]\n",
+    "}\n",
+    "d"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Can convert to JSON-like"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "l = []\n",
+    "for course in soup.findAll('course'):\n",
+    "    d = {'id':course['id'],\n",
+    "         'name':course['name'],\n",
+    "         'ects':course['ects'],\n",
+    "         'teachers':[int(t.text) for t in course.findAll('teacher-id')]}\n",
+    "    l.append(d)\n",
+    "l"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Or in list-comprehension syntax"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "l = [{\n",
+    "    'id':course['id'],\n",
+    "    'name':course['name'],\n",
+    "    'ects':course['ects'],\n",
+    "    'teachers':[int(t.text) for t in course.findAll('teacher-id')]\n",
+    "} for course in soup.findAll('course')]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "l"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.DataFrame(l)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# HTML\n",
+    "standard web-page consists of:\n",
+    "\n",
+    "* Browser-executed code (`front-end`)\n",
+    "    * HTML \"DOM\" structure - the website content\n",
+    "        * List of elements that are on website\n",
+    "        * Links to CSS classes, ids and\n",
+    "    * CSS stylesheets - website graphics\n",
+    "    * JavaScripts - website interactivity    \n",
+    "\n",
+    "* Server-executed (`back-end`)\n",
+    "    * Server, database, app logic etc.\n",
+    "    * Not available for scraping!\n",
+    "    * May be available as API\n",
+    "\n",
+    "\n",
+    "## Web-scraping\n",
+    "* client side only\n",
+    "* Navigating HTML DOM by taking advantage of CSS structure\n",
+    "\n",
+    "## DOM (Document Object Module):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "html = '''\n",
+    "<html>\n",
+    "    <head>\n",
+    "        <title>Sample page</title>\n",
+    "    <script>\n",
+    "        function click_button() {\n",
+    "            alert('Hi guys, still enjoying Python?!')\n",
+    "        }\n",
+    "    </script>\n",
+    "    <style>\n",
+    "        #content div {\n",
+    "            color:black;\n",
+    "        }\n",
+    "        .firstRow {\n",
+    "            background-color:#ddd;\n",
+    "        }\n",
+    "\n",
+    "        .normalRow {\n",
+    "            background-color:white;\n",
+    "        }\n",
+    "    </style>\n",
+    "    </head>\n",
+    "    \n",
+    "    <body>\n",
+    "        <div id=\"header\">\n",
+    "            My page header\n",
+    "        </div>\n",
+    "        <div id=\"table_container\">\n",
+    "            <table>\n",
+    "                <tr class=\"firstRow\">\n",
+    "                    <td>name</td>\n",
+    "                    <td>number</td>\n",
+    "                </tr>\n",
+    "                <tr class=\"normalRow\">\n",
+    "                    <td>B</td>\n",
+    "                    <td>2</td>\n",
+    "                </tr>\n",
+    "                <tr class=\"normalRow\">\n",
+    "                    <td>C</td>\n",
+    "                    <td>3</td>\n",
+    "                </tr>\n",
+    "            </table>\n",
+    "        </div>\n",
+    "        <div id=\"button_container\">\n",
+    "            <button id=\"btn\" onclick=\"click_button()\">Click Me!</button>\n",
+    "        </div\n",
+    "    </body>\n",
+    "</html>\n",
+    "'''\n",
+    "display(HTML(html))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "soup = BeautifulSoup(html,'html')\n",
+    "soup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rows = soup.findAll('tr',{'class','normalRow'})\n",
+    "rows"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = {}\n",
+    "\n",
+    "for row in rows:\n",
+    "    key = row.findAll('td')[0].text\n",
+    "    val = int(row.findAll('td')[1].text)\n",
+    "    d[key] = val\n",
+    "pd.Series(d)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.Series({\n",
+    "    row.findAll('td')[0].text:int(row.findAll('td')[1].text) \n",
+    "    for row in BeautifulSoup(html).findAll('tr',{'class':'normalRow'})})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "soup = BeautifulSoup(html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "row = soup.findAll('tr',{'class':'normalRow'})[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "row"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "row.findAll('td')[0].text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "int(row.findAll('td')[1].text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "{row.findAll('td')[0].text:int(row.findAll('td')[1].text) for row in soup.findAll('tr',{'class':'normalRow'})}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## HTML Inspection\n",
+    "http://ies.fsv.cuni.cz/cs/node/51"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# requests and internet communication\n",
+    "\n",
+    "* `Client` asks/requests questions (your Jupyter client)\n",
+    "* `Server` replies/serve answers (your Jupyter server)\n",
+    "\n",
+    "\n",
+    "API = *Application Programming Interface*\n",
+    "\n",
+    "very general term! Not only used in web communication\n",
+    "\n",
+    "## HTTP requests\n",
+    "\n",
+    "A most standard webserver communication channel around\n",
+    "\n",
+    "A standard HTTP request contains:\n",
+    "\n",
+    "* URL \n",
+    "\n",
+    "    * domain\n",
+    "    * route\n",
+    "    * parameters\n",
+    "\n",
+    "* Request Type - GET, POST, PUT, DELETE (see below)\n",
+    "\n",
+    "* Content specification - \n",
+    "    * Application/JSON\n",
+    "    * Application/XML\n",
+    "    * text/html\n",
+    "    * text/css\n",
+    "\n",
+    "* Content\n",
+    "\n",
+    "* Outcoming data (will see below)\n",
+    "\n",
+    "* Cookies \n",
+    "\n",
+    "* Status Code:\n",
+    "\n",
+    "    * 200 - success\n",
+    "    * 404 - resource does not exist\n",
+    "    * 500 - the server failed during processing your request\n",
+    "\n",
+    "\n",
+    "1) REST API - use HTTP request and returns JSON\n",
+    "\n",
+    "2) SOAP API - use HTTP request and returns XML\n",
+    "\n",
+    "3) Website - use HTTP request and returns set of HTML, JavaScript, CSS and other files\n",
+    "\n",
+    "### When to use?\n",
+    "* whenever more applications need to communicate\n",
+    "* user-friendly interface for complicated tasks - DEEP AI, Google Maps\n",
+    "* Data - Golemio, OpenStreetMaps\n",
+    "\n",
+    "### GET request\n",
+    "* fast\n",
+    "* public\n",
+    "* data flow only one direction\n",
+    "* parameters via request adress\n",
+    "\n",
+    "> https://www.google.com/search?q=how+to+understand+url+parameters&rlz=1C1GCEU_csCZ860CZ860&oq=how+to+understand+url+parameters&aqs=chrome..69i57j33i22i29i30l7.5237j0j4&sourceid=chrome&ie=UTF-8\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r = requests.get('https://cs.wikipedia.org/wiki/Institut_ekonomick%C3%BDch_studi%C3%AD_Fakulty_soci%C3%A1ln%C3%ADch_v%C4%9Bd_Univerzity_Karlovy')\n",
+    "#plain request - like browser\n",
+    "r.text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "soup = BeautifulSoup(r.text,'html')\n",
+    "tags=soup.findAll('span', {'class':\"wd\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tags"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### POST request\n",
+    "* slow\n",
+    "* private\n",
+    "* both sides can send data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Static pages x Dynamic pages x JavaScript-rendered pages\n",
+    "\n",
+    "### Static\n",
+    "\n",
+    "* pages that do not get updated instantly\n",
+    "* all information necessary for rendering a website is available after entering the URL\n",
+    "* It may ask the database, but the output is stable.\n",
+    "* all parameters within the adress!\n",
+    "* Typical example:\n",
+    "    \n",
+    "### JavaScript rendered: \n",
+    "* Defacto static, but you cannot take advantage of HTML/CSS structure\n",
+    "\n",
+    "### Dynamic content\n",
+    "* webpage instantly communicates with the webserver and the database\n",
+    "* \n",
+    "* solution -> Selenium!\n",
+    "\n",
+    "### Is this website static or dynamic?\n",
+    "\n",
+    "1. Facebook\n",
+    "2. Sreality.cz\n",
+    "3. IES website\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## How to chose data source for project\n",
+    "\n",
+    "You need to know in advance what data you will download:\n",
+    "\n",
+    "1. full or satisfactory access to API\n",
+    "2. the web-page is parsable (prefer not too much javascript)\n",
+    "3. plan to generate all requests"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# APIs Example\n",
+    "### Get wiki data using GET"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#if time, return to geodata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lets start with a basic request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "api_url = 'https://krcgc3uqga.execute-api.eu-central-1.amazonaws.com'\n",
+    "#this api implements three routers\n",
+    "# GET /time\n",
+    "# GET /stocks\n",
+    "# POST /hashme"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route = 'time'\n",
+    "# route /ruːt/\n",
+    "response = requests.get(f'{api_url}/{route}')\n",
+    "response.json()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# route = stocks\n",
+    "\n",
+    "route = 'stocks'\n",
+    "# route /ruːt/\n",
+    "response = requests.get(f'{api_url}/{route}')\n",
+    "print(response.json())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route = \"hashme\"\n",
+    "url = f\"https://krcgc3uqga.execute-api.eu-central-1.amazonaws.com/{route}\"\n",
+    "\n",
+    "payload = json.dumps({\n",
+    "  \"name\": \"Jan Sila\"\n",
+    "})\n",
+    "headers = {\n",
+    "  'Content-Type': 'application/json'\n",
+    "}\n",
+    "\n",
+    "response = requests.post(url, headers=headers, data=payload)\n",
+    "\n",
+    "print(response.json())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = requests.get('https://en.wikipedia.org/wiki/Charles_University')\n",
+    "soup = BeautifulSoup(response.text)\n",
+    "div = soup.find('div',{'id':'mw-content-text'}) #  #mw-content-text > div > p:nth-child(10)texts)\n",
+    "article = ' '.join([p.text for p in div.find_all('p')])\n",
+    "print(article)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Bonus example:\n",
+    "\n",
+    "## GeoJSON\n",
+    "\n",
+    "* One standardized data format for transferring geodata\n",
+    "* Plenty of geodata out there\n",
+    "* see for example http://opendata.iprpraha.cz/CUR/OVZ/OVZ_Klima_ZnecOvzdusi_p/WGS_84/OVZ_Klima_ZnecOvzdusi_p.json\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "verbose_request = requests.get('http://opendata.iprpraha.cz/CUR/OVZ/OVZ_Klima_ZnecOvzdusi_p/WGS_84/OVZ_Klima_ZnecOvzdusi_p.json')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "print(verbose_request.status_code)\n",
+    "dir(verbose_request)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "verbose_request.json()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "#get already json\n",
+    "d = requests.get('http://opendata.iprpraha.cz/CUR/OVZ/OVZ_Klima_ZnecOvzdusi_p/WGS_84/OVZ_Klima_ZnecOvzdusi_p.json').json()\n",
+    "\n",
+    "d['features'][0]['properties']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import branca\n",
+    "import folium\n",
+    "\n",
+    "colorscale = branca.colormap.linear.YlOrRd_09.scale(0, 5)\n",
+    "\n",
+    "def style_function(feature):\n",
+    "    gridvalue = feature['properties']['GRIDVALUE']\n",
+    "    return {\n",
+    "        'fillOpacity': 0.5,\n",
+    "        'weight': 0,\n",
+    "        'fillColor': colorscale(gridvalue)\n",
+    "    }\n",
+    "\n",
+    "m = folium.Map(location=[50.085,14.45],zoom_start=11)\n",
+    "folium.GeoJson('http://opendata.iprpraha.cz/CUR/OVZ/OVZ_Klima_ZnecOvzdusi_p/WGS_84/OVZ_Klima_ZnecOvzdusi_p.json',style_function=style_function).add_to(m)\n",
+    "m"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/06_pics/python_json.png b/06_pics/python_json.png
new file mode 100644
index 0000000..49153dd
Binary files /dev/null and b/06_pics/python_json.png differ
diff --git a/06_pics/xml_tree_structure.png b/06_pics/xml_tree_structure.png
new file mode 100644
index 0000000..2ab6456
Binary files /dev/null and b/06_pics/xml_tree_structure.png differ