From 469f9c8e0f3a4fd760e1c051a463ab943b82a9fe Mon Sep 17 00:00:00 2001 From: Jozef Volak Date: Mon, 20 Sep 2021 12:17:42 +0200 Subject: [PATCH] Update README and CHANGELOG, fix minor bugs --- CHANGELOG.md | 48 +++++ README.md | 167 ++++++++++++++---- .../grafana/db_swarm_monitoring.json | 18 +- generate_uc_compose.sh | 2 +- 4 files changed, 196 insertions(+), 39 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..02b1e29f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,48 @@ +# Frinx Machine 1.7 RELEASE NOTE + +## Uniconfig + +* Using Uniconfig 4.2.8 - stateless model +* Multi-zone with multi-instance uniconfig deployment +* Use Traefik as a load balancer in front of Uniconfig instances in each zone + +## Conductor + +* Update conductor server to 3.0.5 +* Disabled ack check in client +* Changed health-check API +* Optimise Frinx Conductor Client - PyPi version 1.0.3 + +## Conductor workers + +* Install/Uninstall RPCs +* Frinx Conductor Workers - PyPi version 1.0.2 + +## Device inventory + +* Replace old inventory with new Device Inventory + +## KrakenD + +* Update KrakenD image to v1.4.0 +* Add default certificates to docker image +* No local building required +* Add log filtering plugin + +## FM-Workflows + +* Clean obsolete workflows +* Add device inventory worker +* Optimise tasts definitions for conductor client v 1.3.0 +* Mock IOS02 as a separate device + +## Docker secrets + +* Store certificates in docker secrets + +## Monitoring services + +* Collecting swarm statistics +* Collecting node statistics +* Collecting logs of FM services +* Grafana visualisation diff --git a/README.md b/README.md index 7f90060b..1681230f 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,10 @@ https://github.com/FRINXio/FRINX-machine/releases > Master branch is used for development and not recommended for deployment +
+`For migration of Frinx-Machine 1.6 to Frinx-Machine 1.7 see ` [ Migration from Frinx Machine 1.6](#maintaining)
+ ## Requirements Minimal hardware requirements (See [resource limitation](#resource-limitation)) @@ -19,10 +22,13 @@ Development: - 16GB RAM - 4x CPU + +For long-term monitoring is good to have a minimal 30Gb of free space + To deploy an FM swarm cluster you need at least one machine with Ubuntu 18.04/20.04 installed. # Starting Frinx Machine -You can deploy the FM either locally with all services running on a single node, or you can split UniFlow and UniConfig instances among multiple nodes. UniFlow is always running on the docker swarm manager node. In the case of multi-node deployment, it is necessary to modify the .env file ( Follow [Preparing Environment](#preparing-environment) ). +You can deploy the FM either locally with all services running on a single node, or you can split UniFlow and UniConfig instances among multiple nodes. UniFlow is always running on the docker swarm manager node. * [Installation](#installation) @@ -32,12 +38,14 @@ You can deploy the FM either locally with all services running on a single node, * [Resource limitation](#resource-limitation) * [Maintaining](#maintaining) * [TLS Certificated](#tls-certificates) - +

## Installation Run the install script, this will check and download the neccessary prerequisities. ```sh -$ sudo ./install.sh +$ ./install.sh # will ask password sudo +$ ./install.sh --skip # skip dependeny installing +$ ./install.sh --update-secrets # update certificates to docker secrets frm ./config/certificates folder ``` Automatically installed software: @@ -49,16 +57,23 @@ Automatically installed software: NOTE: It may happen that swarm initialization will fail during install. Most likely due to multiple network interfaces present. In that case run `docker swarm init --advertise-addr ` command to tell swarm which ip address to use for inter-manager communication and overlay networking +
+ NOTE: As FM is designed to run as non-root user, you need the user to be in `docker` group, this is done automatically during the installation process. Use newgrp or reboot the system for changes to take effect **BEFORE** running ```./startup.sh```
See: https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user +
+ + +### Install Frinx Machine with proxy If you want to configure docker to use a proxy server, use: + ```sh # Create folder for docker proxy config file $ mkdir "${USER}/.docker" -$ sudo ./install.sh \ +$ ./install.sh \ --proxy-conf "${USER}/.docker/config.json" \ --http-proxy "ip:port" \ --https-proxy "ip:port" \ @@ -66,10 +81,13 @@ $ sudo ./install.sh \ ``` For disabling proxy, the config.json must be removed and content of UC_PROXY_* variables in .env file must be erased! For example: UC_PROXY_HTTP_ENV="". -For more info see: https://docs.docker.com/network/proxy/ +For more info see: https://docs.docker.com/network/proxy/ +

### Install/Update docker secrets (KrakenD HTTPS/TLS) -During installation, docker secrets are created and are used for establishing HTTPS/TLS connections. These secrets contain private and public keys and are generated from files in the ./config/certificates folder. These certificates can be replaced by custom certificates (use the same name) before execution of installation script or re-execution with the `--update-secrets` flag. +During installation, docker secrets are created and are used for establishing HTTPS/TLS connections. These secrets contain private and public keys and are generated from files in the ./config/certificates folder. + +These certificates can be replaced by custom certificates (use the same name) before execution of installation script or re-execution with the `--update-secrets` flag.

## Single-node deployment Installation and running of UniFlow and UniConfig on the same machine. To deploy both UniFlow and UniConfig locally (for testing and demo purposes), run `startup.sh`: @@ -85,9 +103,12 @@ $ ./startup.sh --uniflow # To uniconfig only, use: $ ./startup.sh --uniconfig -# To metric services only, use: +# To monitoring services only, use: $ ./startup.sh --monitoring +# To Frinx Machine without monitoring services, use: +$ ./startup.sh --no-monitoring + ``` The FRINX Machine services will now be started. @@ -101,18 +122,23 @@ Each service will show as 'REPLICAS 1/1' when the service is up (it may take sev UniFlow dashboard is accessible via web browser by visiting: ```sh http:// + +# monitoring dashboard +http://:3000 ``` + In some cases the self signed certificate of the FM dashboard creates an NET_ERR_CERT_INVALID error message in your browser. Follow [these](https://stackoverflow.com/questions/35274659/does-using-badidea-or-thisisunsafe-to-bypass-a-chrome-certificate-hsts-error) steps to proceed. +
+ ### Demo workflows & sample topology Once all services are started, please clone https://github.com/FRINXio/fm-workflows and follow the instructions to load demo workflows and a sample topolgy.
You can find a collection of demo use cases here https://docs.frinx.io/frinx-machine/use-cases/index.html - +
## Multi-node deployment UniFlow services are deployed on swarm manager node and UniConifig services are deployed on swarm worker nodes. -NOTE: Before starting multi-node deployment, it is **necessary to generate uniconfig compose files** with `generate_uc_compose.sh` script! ### Preparing worker nodes for UniConfig services Install and set-up docker-ce on worker node: @@ -127,9 +153,11 @@ $ sudo apt-get install docker-ce=5:18.09.9~3-0~ubuntu-bionic $ sudo apt-get install docker-ce=5:20.10.5~3-0~ubuntu-focal $ sudo usermod -aG docker $USER $ newgrp docker -$ docker plugin install grafana/loki-docker-driver:latest --alias loki --grant-all-permissions +$ docker plugin install grafana/loki-docker-driver:main-20515a2 --alias loki --grant-all-permissions ``` +### Connect workers to Docker Swarm + Run following command on manager node to determine the swarm token ```sh # on manager node @@ -142,36 +170,48 @@ And then join the worker node to the swarm with token provided by the manager: $ docker swarm join --token SWMTKN- IP:PORT ``` -To deploy UniConfig to a worker node, distribute the default UniConfig configuration to `/opt` directory on the worker node (SCP used as an example). - -From the manager node: -```sh -$ scp -r ./config/uniconfig/frinx username@host:/home/username/ -``` -Log into remote node and copy the files: -```sh -$ sudo cp -r /home/username/frinx /opt -$ sudo chmod a+w /opt/frinx/uniconfig/cache/ -``` - -### Generating uniconfig compose files - Now is possible to check all swarm nodes with and find worker node IDs. ```sh #List all swarm nodes $ docker node ls ``` -For generating of uniconfig (uniconfig-postgresql) compose files use `generate_uc_compose.sh`. +### Generate configuration files for multi-node deployment + +Frinx Machine supports Uniconfig deployment in multi-zone mode (multiple uniconfig zones). +Before the Frinx Machine is start, is necessary to generate unique configuration files per each zone separatelly. +For generating these files use `generate_uc_compose.sh`. + You need to define: -- uniconfig service name: must be unique name -- swarm node-id: where will be deployed (find from previous command output) +- uniconfig zone name: must be unique name +- swarm node-id: where will be deployed (use docker node ls) - folder path: where are stored composefiles for multinode deployment +- instances: how many uniconfig instances will be started per zone (redundancy) + +Default folder path is `./composefiles/uniconfig`, but can be differend (outside from FM repo folder). +```sh +$ ./generate_uc_compose.sh -s -n -f -i +``` +
+ +### Upload configuration files on worker node + +To deploy UniConfig to a worker node, distribute the default UniConfig configuration to `/opt` directory on the worker node (SCP used as an example). -Preffered folder path is `./composefiles/uniconfig`, but can be differend (outside from FM repo folder). +From the worker node: ```sh -$ ./generate_uc_compose.sh -s -n -f +$ sudo install -o $USER -g $USER -m 755 -d /opt/frinx + +# if older FM was started on this node, remove docker persistant volumes +$ docker volume prune -f +``` + +From the manager node: +```sh +# path_to_folder contain generated files from previous steps +$ scp -r /opt/frinx/* username@host:/opt/frinx ``` +
### Deploying services @@ -186,6 +226,8 @@ $ ./startup.sh --multinode --uniconfig --dev NOTE: The deployment might take a while as the worker node needs to download all necessary images first. +
+ ## Preparing Environment The FRINX-Machine repository contains a **env.template** (used for creating .env) and **.env** file in which the default FM configuration settings are stored. In .env file, the settings are divided to these groups: @@ -204,16 +246,50 @@ The FRINX-Machine repository contains a **env.template** (used for creating .env > * UC_PROXY_* : use docker proxy in Uniconfig Service ( See [Installation](#installation) ) Default settings are prepared for deployment without docker proxy. + +
+ ## Resource limitation Default resource limitation is configured for production but can be changed to development. ```sh -$ ./startup.sh --dev +$ ./startup.sh --dev # ./config/dev_settings.txt +$ ./startup.sh --prod # ./config/prod_settings.txt, same as ./startup.sh ``` -Template for production settings is stored in `./config/prod_settings.txt`.
In this file, these values can be changed by profiled requirements. +These values can be changed by profiled requirements. + +
## Maintaining +### Migration from Frinx Machine 1.6 + +- Remove old docker volumes : + +```sh +$ ./teardown -v +# or +$ docker volume prune -f +``` + +- Update / remove .env file from Frinx Machine 1.6 deployment: + +If you using for the deployment of Frinx Machine 1.7 the same directory as for Frinx Machine 1.6, you need to update/remove .env file.
+`For updating replace these lines in .env file.` +```sh +# Remove: +LOCAL_KRAKEND_IMAGE_TAG="with_certificates" +# Add +BASE_KRAKEND_IMAGE_TAG="1.0.0" +LOCAL_KRAKEND_IMAGE_TAG="" +``` + +- Build KrakenD image with custom certificates (optional): + +Add custom certifictes to ./krakend/certs and use `build_krakend.sh` + +
+ ### Checking You can check the status of the swarm cluster by running: ```sh @@ -222,6 +298,8 @@ $ docker stack ps fm ``` Where 'fm' (FRINX Machine) is the name of the swarm stack configured during deployment, the name is assigned automatically. +
+ ### Bench Security For security improvement of dockerized Frinx Machine, therse docker settings can be configured to `/etc/docker/daemon.json`. @@ -242,6 +320,8 @@ Bench security analysis can be performed with this command $ ./config/docker-security/bench_security.sh ``` +
+ ### Monitoring services Frinx Machine is collecting logs and metrics/statistics from services. @@ -250,7 +330,11 @@ Frinx Machine is collecting logs and metrics/statistics from services. * Node monitoring: node-exporter * Swarm monitoring: google/cadvisor * Visualization: Grafana (url 127.0.0.1:3000) -
+ +NOTE: Be aware, that the monitoring system is space consuming. For longer monitoring is good to have enough free space on the disc. +Optimal is 30Gb and more. + +
### ElasticSearch disk flood stage prevention ElasticSearch changes the disk permissions to read-only if the disk free space drops below 512Mb.. This setting is a last resort to prevent nodes from running out of disk space. The index block must be released manually when the disk utilization falls below the high watermark. @@ -259,6 +343,8 @@ ElasticSearch changes the disk permissions to read-only if the disk free space d curl -XPUT -H "Content-Type: application/json" http://localhost:9200/_all/_settings -d '{"index.blocks read_only_allow_delete": null}' ``` +
+ ### List of deployed uniconfig services KrakenD provice API, which return list of deployed uniconfig services: @@ -267,6 +353,8 @@ $ curl -X GET 127.0.0.1/static/list/uniconfig {"instances":["uniconfig1","uniconfig2"]} ``` +
+ ### Shutdown To stop all services, simply remove the stack from the swarm: ```sh @@ -284,6 +372,7 @@ For see more options run: $ ./teardown.sh -h ``` +
### Log collections To collect logs, run: @@ -292,7 +381,16 @@ $ ./collectlogs.sh ``` This will collect all docker related logs and compresses it into an archive. Logs are collected from local machine only, if you want logs from remote node (e.g. worker) you must copy and run the script on the remote node. -# TLS certificates +
+ +## TLS certificates + +All certificates, which are stored in docker secrets can be found in `./config/certificates` folder. + +* frinx_krakend_*: Enabling HTTPS for api-gateway +* frinx_uniconfig_tls_cert*: Enabling HTTPS connection for traefik (unizonfig zone load balancer) + +
In the demo deployment the setup has already been done and uniconfig is running under https(not suitable for production). To set it up with own certificates please follow the next steps: @@ -309,9 +407,12 @@ To set it up with own certificates please follow the next steps: In case a new certificate is generated for uniconfig When prompted for `What is your first and last name?` put docker dns name of uniconfig container (Default: uniconfig). Also will need to modify `/home/test/FRINX-machine/config/uniconfig/frinx/uniconfig/config/lighty-uniconfig-config.json` based on the new keystore setup. + 3. In case self signed certificate is used please add ca's certificate to `karakend/certs` folder in `.crt` format. For changes to be propagated run `./build_krakend.sh` (for more run with -h) in case of deployed stack `startup.sh` as well. +
+ ## For developers If you need to modify and rebuild modules, you can use `pullmodules.sh` script to download up-to-date modules from FRINX's public GitHub repositories. Then you can use standard docker utilities to build and distribute them, e.g.: ```sh diff --git a/config/monitoring/grafana/db_swarm_monitoring.json b/config/monitoring/grafana/db_swarm_monitoring.json index 59cc8bbf..35a59f46 100644 --- a/config/monitoring/grafana/db_swarm_monitoring.json +++ b/config/monitoring/grafana/db_swarm_monitoring.json @@ -553,16 +553,24 @@ "mode": "absolute", "steps": [ { - "color": "rgba(245, 54, 54, 0.9)", + "color": "dark-green", "value": null }, { - "color": "rgba(237, 122, 40, 0.89)", + "color": "semi-dark-green", "value": 0.1 }, { "color": "rgba(14, 211, 40, 0.97)", "value": 0.25 + }, + { + "color": "rgba(237, 122, 40, 0.89)", + "value": 0.7 + }, + { + "color": "dark-red", + "value": 0.9 } ] }, @@ -867,7 +875,7 @@ { "evaluator": { "params": [ - 524287 + 1024000000 ], "type": "lt" }, @@ -892,7 +900,7 @@ "for": "1m", "frequency": "1m", "handler": 1, - "message": "Hahaha", + "message": "Disk space is low. Please remove old data and check elasticsearch volume permissions in logs!", "name": "Free disc space alert", "noDataState": "keep_state", "notifications": [] @@ -963,7 +971,7 @@ "fill": true, "line": true, "op": "lt", - "value": 524287, + "value": 1024000000, "visible": true } ], diff --git a/generate_uc_compose.sh b/generate_uc_compose.sh index a636ee89..54af97b8 100755 --- a/generate_uc_compose.sh +++ b/generate_uc_compose.sh @@ -183,7 +183,7 @@ function generateUcCompose { function prepareFolder { if [ -d ${__FOLDER_PATH} ]; then - rm -rf ${__FOLDER_PATH}/* + rm -rf "${__FOLDER_PATH}/swarm-uniconfig.yml" "${__FOLDER_PATH}/opt" mkdir -p ${__FOLDER_PATH}/${__DEF_CONFIG_PATH} fi }