-
Notifications
You must be signed in to change notification settings - Fork 1
/
darwinex_data.py
124 lines (99 loc) · 4.87 KB
/
darwinex_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 29 17:24:20 2018
Script: dwx_tickdata_download.py (Python 3)
--
Downloads tick data from the Darwinex tick data server. This code demonstrates
how to download data for one specific date/hour combination, but can be
extended easily to downloading entire assets over user-specified start/end
datetime ranges.
Requirements: Your Darwinex FTP credentials.
Result: Dictionary of pandas DataFrame objects by date/hour.
(columns: float([ask, size]), index: millisecond timestamp)
Example code:
> td = DWX_Tick_Data(dwx_ftp_user='very_secure_username',
dwx_ftp_pass='extremely_secure_password',
dwx_ftp_hostname='mystery_ftp.server.com',
dwx_ftp_port=21)
> td._download_hour_(_asset='EURNOK', _date='2018-10-22', _hour='00')
> td._asset_db['EURNOK-2018-10-22-00']
ask size
2018-10-22 00:00:07.097000+00:00 9.47202 1000000.0
2018-10-22 00:00:07.449000+00:00 9.47188 750000.0
2018-10-22 00:01:08.123000+00:00 9.47201 250000.0
2018-10-22 00:01:10.576000+00:00 9.47202 1000000.0
... ...
@author: Darwinex Labs
@twitter: https://twitter.com/darwinexlabs
@web: http://blog.darwinex.com/category/labs
"""
from ftplib import FTP
import ftplib
from io import BytesIO
import pandas as pd
import gzip
import os
import sys
class DWX_Tick_Data():
def __init__(self, dwx_ftp_user='<insert your Darwinex username>',
dwx_ftp_pass='<insert your Darwinex password>',
dwx_ftp_hostname='<insert Darwinex Tick Data FTP host>',
dwx_ftp_port=21):
# Dictionary DB to hold dictionary objects in FX/Hour format
self._asset_db = {}
self._ftpObj = FTP(dwx_ftp_hostname)
self._ftpObj.login(dwx_ftp_user, dwx_ftp_pass)
self._virtual_dl = None
#########################################################################
# Function: Downloads and stored currency tick data from Darwinex FTP
# Server. Object stores data in a dictionary, keys being of the
# format: CURRENCYPAIR-YYYY-MM-DD-HH
#########################################################################
def walk_dir(self):
original_dir = self._ftpObj.pwd()
print(original_dir)
file_dir = {}
names = self._ftpObj.nlst()
for name in names[2:]:
print(original_dir+name)
self._ftpObj.cwd(original_dir+'/'+name)
file_dir[name] = self._ftpObj.nlst()[2:]
# break ## todo remove this
## Get back to original dir
self._ftpObj.cwd(original_dir)
return file_dir
def _download_hour_(self, _asset='EURUSD', _date='2017-10-01', _hour='22',
_ftp_loc_format='{}/{}_ASK_{}_{}.log.gz',
_verbose=False):
try:
_file = _ftp_loc_format.format(_asset, _asset, _date, _hour)
except:
_file = _ftp_loc_format
_key = '{}-{}-{}'.format(_asset, _date, _hour)
self._virtual_dl = BytesIO()
if _verbose is True:
print("\n[INFO] Retrieving file \'{}\' from Darwinex Tick Data Server..".format(_file))
try:
self._ftpObj.retrbinary("RETR {}".format(_file), self._virtual_dl.write)
self._virtual_dl.seek(0)
_log = gzip.open(self._virtual_dl)
# Get bytes into local DB as list of lists
self._asset_db[_key] = [line.strip().decode().split(',') for line in _log if len(line) > 1]
# Construct DataFrame
_temp = self._asset_db[_key]
self._asset_db[_key] = pd.DataFrame({'ask': [l[1] for l in _temp],
'size': [l[2] for l in _temp]},
index=[pd.to_datetime(l[0], unit='ms', utc=True) for l in _temp])
# Sanitize types
self._asset_db[_key] = self._asset_db[_key].astype(float)
if _verbose is True:
print('\n[SUCCESS] {} tick data for {} (hour {}) stored in self._asset_db dict object.\n'.format(_asset,
_date,
_hour))
return self._asset_db[_key]
# Case: if file not found
except Exception as ex:
_exstr = "Exception Type {0}. Args:\n{1!r}"
_msg = _exstr.format(type(ex).__name__, ex.args)
print(_msg)
#########################################################################