Skip to content

Commit

Permalink
Manikin: update perfomance
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexeyManikin committed Apr 29, 2020
1 parent 21206d6 commit 0d01d5b
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 95 deletions.
26 changes: 6 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Статистика доменов
Скрипт для сбора статистики для зон ru/su/rf. Собираются все записи c DNS,
Автономная система, переод делегирования
Скрипт для сбора статистики для зон ru/su/rf. Собираются все записи c DNS (A, AAAA, NS, MX, TXT),
данные об автономной системе к который пренадлежит домен, переоду делегирования и так далее. На основе этих данных можно
строить статистику.

Сайт проекта http://firststat.ru

Статья с описанием https://habrahabr.ru/post/301894/
* Сайт проекта http://firststat.ru
* Статья с описанием https://habrahabr.ru/post/301894/

Для работы необходимы модули:
- mysqlclient==1.4.6
Expand Down Expand Up @@ -42,21 +42,7 @@

Далее каждую ночь база данных доменов будет обновляться. На двух
процессорах E5-2690v2 с 225 гигабайтами памяти процесс обновления БД з
анимает 6-8 часов, плюс еще несколько часов агрегирование данных.

# TODO

* https://habr.com/ru/post/66151/
* собирать статистику за сегодня из domain, а не из domain_history
* подключить RPKI
* скачивание и unzip сделать паралельно
* randomize_servers = on; ?? и в принуипе разобратся с рекурсером, можно ли его ускорить или все запросы так и пересылать в google
* pdnsd-ctl status

# Пример отчета (1 августа 2015 года)

![example](https://scontent.xx.fbcdn.net/hphotos-xpt1/t31.0-8/11779902_855515371153091_8587193411725580989_o.png)

анимает 3-6 часов.



18 changes: 9 additions & 9 deletions classes/asInet.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import urllib.parse
import traceback
import time
import typing

from helpers.helperUnicode import as_default_string
from helpers.helpers import get_mysql_connection
Expand Down Expand Up @@ -106,10 +107,10 @@ def parsing_as(self, show_log: bool = False, max_as: int = MAX_AS_NUMBER) -> Non
as_data = self._get_all_as_info()

for i in range(1, max_as):
print("Update as %s" % i)
self.update_as(i, as_data, show_log=show_log)

self.update_as(198610, as_data, show_log=show_log)
if i in as_data:
self.update_as(i, as_data=as_data[i], show_log=show_log)
else:
self.update_as(i, as_data=None, show_log=show_log)

def _get_asn_description(self, number: int) -> dict:
"""
Expand Down Expand Up @@ -142,7 +143,7 @@ def _get_asn_description(self, number: int) -> dict:
'DESCRIPTION': description,
'USE_FAST': 0}

def update_as(self, number: int, as_data: dict, show_log: bool = False) -> bool:
def update_as(self, number: int, as_data: typing.Union[dict, None], show_log: bool = False) -> bool:
"""
Обновляем информацию об AS в базе данных
"""
Expand All @@ -157,16 +158,15 @@ def update_as(self, number: int, as_data: dict, show_log: bool = False) -> bool:

count = cursor.fetchone()

if number in as_data and count['count'] != 0:
if as_data and count['count'] != 0:
as_info = {'AS': number,
'COUNTRY': as_data[number]['country'],
'COUNTRY': as_data['country'],
'ORGANIZATION': '',
'DATE_REGISTER': '',
'DESCRIPTION': as_data[number]['descriptions'],
'DESCRIPTION': as_data['descriptions'],
'USE_FAST': 1}
else:
try:
time.sleep(.2)
as_info = self._get_asn_description(number)
except:
as_info = {'AS': number,
Expand Down
9 changes: 5 additions & 4 deletions classes/command/bgpdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ class Bgpdump(Command):
}
':type : dict'

def __init__(self, path: str):
def __init__(self, path_in: str, path_out: str):
"""
:type path: unicode
:type path_in: unicode
:return:
"""
super(Bgpdump, self).__init__("bgpdump")
self.binary = [os.path.abspath(CURRENT_PATH+'/bin/bgpdump')]
self.path = path
self.path_in = path_in
self.path_out = path_out

def get_command(self) -> list:
"""
Expand All @@ -31,4 +32,4 @@ def get_command(self) -> list:
:rtype: list
"""

return self.binary + [self.path]
return self.binary + ["-O", self.path_out, self.path_in]
14 changes: 5 additions & 9 deletions classes/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,13 @@ def parse_file_rib_file_to(self, path_rib_file: str or None = None, path_to: str
path_rib_file = os.path.abspath(os.path.join(self.path, 'rib.bz2'))
path_to = os.path.abspath(os.path.join(self.work_path, 'rib'))

bgp_dump = Bgpdump(path_rib_file)
command = bgp_dump.get_command()

shutil.rmtree(path_to, ignore_errors=True)
file_rib = open(path_to, 'w')
bgp_dump = Bgpdump(path_rib_file, path_to)
command = bgp_dump.get_command()

p = SubprocessRunner(command=command, stdout=file_rib)
p = SubprocessRunner(command=command)
p.run()
p.wait(write_output_in_log=False)
file_rib.close()

return path_to

Expand All @@ -116,7 +113,6 @@ def convert_rib_to_net_as(self, path_rib_file: str or bool = False) -> dict:
# NEXT_HOP: 80.91.255.62
# AGGREGATOR: AS24940 213.133.96.18
:type path_rib_file: unicode
:return:
"""

Expand All @@ -131,8 +127,8 @@ def convert_rib_to_net_as(self, path_rib_file: str or bool = False) -> dict:
prefix = ''
as_path = ''

file_rib_data = open(path_rib_file)
line = file_rib_data.readline()
file_rib_data = open(path_rib_file, 'r')
line: str = file_rib_data.readline()
while line:
symbol = line[0]
if symbol == 'T' or symbol == 'S' or symbol == 'F' or symbol == 'O' or symbol == 'N':
Expand Down
37 changes: 29 additions & 8 deletions classes/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from classes.command.wget import Wget
from helpers.helpers import *
import shutil
import concurrent.futures
from helpers.helpersCollor import BColor


Expand Down Expand Up @@ -47,6 +48,24 @@ def download_file(url: str, data_dir: str) -> bool:

return True

@staticmethod
def download(path: str, item: dict):
"""
:return:
"""
file_name = item['file_name']
url = item['url']
path_file = os.path.abspath(os.path.join(path, file_name))

BColor.process("Download %s to %s " % (url, path_file))
shutil.rmtree(path_file, ignore_errors=True)
Downloader.download_file(url, path_file)
if os.path.getsize(path_file) == 0:
BColor.error("Can`t download file %s to %s" % (url, path_file))
raise Exception("Can`t download file %s to %s" % (url, path_file))

return os.path.getsize(path_file)

@staticmethod
def download_data_for_current_date() -> str:
"""
Expand All @@ -73,13 +92,15 @@ def download_data_for_current_date() -> str:

path = Downloader.create_data_dir()

for item in files_list:
path_file = os.path.abspath(os.path.join(path, item['file_name']))
BColor.process("Download %s to %s " % (item['url'], path_file))
shutil.rmtree(path_file, ignore_errors=True)
Downloader.download_file(item['url'], path_file)
if os.path.getsize(path_file) == 0:
BColor.error("Can`t download file %s to %s" % (item['url'], path_file))
raise Exception("Can`t download file %s to %s" % (item['url'], path_file))
with concurrent.futures.ThreadPoolExecutor(max_workers=len(files_list)) as executor:
future_to_download = {executor.submit(Downloader.download,
path,
item): item for item in files_list}
for future in concurrent.futures.as_completed(future_to_download, timeout=1800):
item = future_to_download[future]
file_name = item['file_name']
url = item['url']
array_data = future.result()
BColor.ok("Download url %s to %s, size is %i" % (url, file_name, array_data))

return path
23 changes: 0 additions & 23 deletions doc/structure.sql
Original file line number Diff line number Diff line change
Expand Up @@ -633,29 +633,6 @@ CREATE TABLE `ns_domain_old_count_statistic` (
) ENGINE=InnoDB AUTO_INCREMENT=9396429 DEFAULT CHARSET=utf8;
/*!40101 SET character_set_client = @saved_cs_client */;

--
-- Table structure for table `providers_like_statistic`
--

DROP TABLE IF EXISTS `providers_like_statistic`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `providers_like_statistic` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`date` date NOT NULL,
`name` varchar(70) DEFAULT NULL,
`tld` varchar(32) DEFAULT NULL,
`count` bigint(20) NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `uniq` (`date`,`name`,`tld`),
KEY `date` (`date`),
KEY `date_tld` (`date`,`tld`),
KEY `i_ns` (`name`),
KEY `i_ns_tld` (`name`,`tld`),
KEY `i_date_ns` (`name`,`date`)
) ENGINE=InnoDB AUTO_INCREMENT=3392465 DEFAULT CHARSET=utf8;
/*!40101 SET character_set_client = @saved_cs_client */;

--
-- Table structure for table `registrant`
--
Expand Down
4 changes: 2 additions & 2 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ runner:
volumes:
- ../:/home/domain_statistic:rw
- ./counteiner_data/download/:/home/domain_statistic/download:rw
- ./runner/config/root:/var/spool/cron/crontabs/root:rw
- ./runner/config/root:/var/spool/cron/crontabs/root:ro

#rpki:
# build: rpki/
Expand All @@ -42,7 +42,7 @@ devrunner:
links:
- "mysql:db"
- "recurcer:resolver"
# - "rpki:rpki"
# - "rpki:rpki"
mem_limit: "200G"
ports:
- "2222:2222"
Expand Down
2 changes: 1 addition & 1 deletion docker/mysql/create_base.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash

echo "create database domain_statistic;" | mysql mysql;
echo "GRANT ALL PRIVILEGES ON domain_statistic.* TO domain_statistic@'%' IDENTIFIED BY '0123456789';" | mysql mysql;
echo "GRANT ALL PRIVILEGES ON domain_statistic.* TO domain_statistic@'%' IDENTIFIED BY '120686120686120686DePole12';" | mysql mysql;
echo "GRANT SELECT ON domain_statistic.* TO readonlyqweqweqwe@'%' IDENTIFIED BY 'readonlyqweqweqwe';" | mysql mysql;
echo "FLUSH PRIVILEGES;" | mysql mysql;
MYPASSWD=$RANDOM$RANDOM$RANDOM
Expand Down
13 changes: 0 additions & 13 deletions docker/rpki/install_minit.sh

This file was deleted.

13 changes: 8 additions & 5 deletions docker/runner/config/root
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
SHELL=/bin/sh
# DO NOT EDIT THIS FILE - edit the master and reinstall.
# (/tmp/crontab.XlhJVj/crontab installed on Sat Apr 11 19:42:02 2020)
# (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $)
SHELL=/bin/bash
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

10 1 */5 * * /usr/local/bin/python3.7 /home/domain_statistic/update_as_info.py >> /home/domain_statistic/download/update_as_info_`date +%Y-%m-%d`.log
0 6 * * * /usr/local/bin/python3.7 /home/domain_statistic/update_domain.py -n `cat /etc/resolv.conf | awk '{print $2}'` -u 2>&1 &>/home/domain_statistic/download/update_domain_`date +%Y-%m-%d`.log
#1 1 * * * /usr/local/bin/python3.7 /home/domain_statistic/update_rpki_history.py >> /home/domain_statistic/download/update_rpki.log
#1 10 */7 * * /usr/local/bin/python3.7 /home/domain_statistic/normalization.py
10 1 */5 * * /usr/local/bin/python3.7 /home/domain_statistic/update_as_info.py >> /home/domain_statistic/download/update_as_info_`/bin/date +\%Y-\%m-\%d`.log
10 6 * * * /usr/local/bin/python3.7 /home/domain_statistic/update_domain.py -n `cat /etc/resolv.conf | awk '{print $2}'` -u 2>&1 &> /home/domain_statistic/download/update_domain_`/bin/date +\%Y-\%m-\%d`.log
#1 1 * * * /usr/local/bin/python3.7 /home/domain_statistic/update_rpki_history.py >> /home/domain_statistic/download/update_rpki.log
#1 10 */7 * * /usr/local/bin/python3.7 /home/domain_statistic/normalization.py
2 changes: 1 addition & 1 deletion update_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def load_prefix_list_from_var(prefix_list: dict) -> SubnetTree.SubnetTree:
BColor.process("Unzip file")
converter = Converter(path, delete_work_dir=(not args.show_verbose))

BColor.process("Parsing rib file")
BColor.process("Parsing rib file (run bgpdump)")
converter.parse_file_rib_file_to()

BColor.process("Get AS list")
Expand Down

0 comments on commit 0d01d5b

Please sign in to comment.