forked from Jack-Cherish/python-spider
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfinancical.py
193 lines (180 loc) · 7.32 KB
/
financical.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#-*- coding:UTF-8 -*-
import sys
import pymysql
import requests
import json
import re
from bs4 import BeautifulSoup
"""
类说明:获取财务数据
Author:
Jack Cui
Blog:
http://blog.csdn.net/c406495762
Zhihu:
https://www.zhihu.com/people/Jack--Cui/
Modify:
2017-08-31
"""
class FinancialData():
def __init__(self):
#服务器域名
self.server = 'http://quotes.money.163.com/'
self.cwnb = 'http://quotes.money.163.com/hkstock/cwsj_'
#主要财务指标
self.cwzb_dict = {'EPS':'基本每股收益','EPS_DILUTED':'摊薄每股收益','GROSS_MARGIN':'毛利率',
'CAPITAL_ADEQUACY':'资本充足率','LOANS_DEPOSITS':'贷款回报率','ROTA':'总资产收益率',
'ROEQUITY':'净资产收益率','CURRENT_RATIO':'流动比率','QUICK_RATIO':'速动比率',
'ROLOANS':'存贷比','INVENTORY_TURNOVER':'存货周转率','GENERAL_ADMIN_RATIO':'管理费用比率',
'TOTAL_ASSET2TURNOVER':'资产周转率','FINCOSTS_GROSSPROFIT':'财务费用比率','TURNOVER_CASH':'销售现金比率','YEAREND_DATE':'报表日期'}
#利润表
self.lrb_dict = {'TURNOVER':'总营收','OPER_PROFIT':'经营利润','PBT':'除税前利润',
'NET_PROF':'净利润','EPS':'每股基本盈利','DPS':'每股派息',
'INCOME_INTEREST':'利息收益','INCOME_NETTRADING':'交易收益','INCOME_NETFEE':'费用收益','YEAREND_DATE':'报表日期'}
#资产负债表
self.fzb_dict = {
'FIX_ASS':'固定资产','CURR_ASS':'流动资产','CURR_LIAB':'流动负债',
'INVENTORY':'存款','CASH':'现金及银行存结','OTHER_ASS':'其他资产',
'TOTAL_ASS':'总资产','TOTAL_LIAB':'总负债','EQUITY':'股东权益',
'CASH_SHORTTERMFUND':'库存现金及短期资金','DEPOSITS_FROM_CUSTOMER':'客户存款',
'FINANCIALASSET_SALE':'可供出售之证券','LOAN_TO_BANK':'银行同业存款及贷款',
'DERIVATIVES_LIABILITIES':'金融负债','DERIVATIVES_ASSET':'金融资产','YEAREND_DATE':'报表日期'}
#现金流表
self.llb_dict = {
'CF_NCF_OPERACT':'经营活动产生的现金流','CF_INT_REC':'已收利息','CF_INT_PAID':'已付利息',
'CF_INT_REC':'已收股息','CF_DIV_PAID':'已派股息','CF_INV':'投资活动产生现金流',
'CF_FIN_ACT':'融资活动产生现金流','CF_BEG':'期初现金及现金等价物','CF_CHANGE_CSH':'现金及现金等价物净增加额',
'CF_END':'期末现金及现金等价物','CF_EXCH':'汇率变动影响','YEAREND_DATE':'报表日期'}
#总表
self.table_dict = {'cwzb':self.cwzb_dict,'lrb':self.lrb_dict,'fzb':self.fzb_dict,'llb':self.llb_dict}
#请求头
self.headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36'}
"""
函数说明:获取股票页面信息
Author:
Jack Cui
Parameters:
url - 股票财务数据界面地址
Returns:
name - 股票名
table_name_list - 财务报表名称
table_date_list - 财务报表年限
url_list - 财务报表查询连接
Blog:
http://blog.csdn.net/c406495762
Zhihu:
https://www.zhihu.com/people/Jack--Cui/
Modify:
2017-08-31
"""
def get_informations(self, url):
req = requests.get(url = url, headers = self.headers)
req.encoding = 'utf-8'
html = req.text
page_bf = BeautifulSoup(html, 'lxml')
#股票名称,股票代码
name = page_bf.find_all('span', class_ = 'name')[0].string
# code = page_bf.find_all('span', class_ = 'code')[0].string
# code = re.findall('\d+',code)[0]
#存储各个表名的列表
table_name_list = []
table_date_list = []
each_date_list = []
url_list = []
#表名和表时间
table_name = page_bf.find_all('div', class_ = 'titlebar3')
for each_table_name in table_name:
#表名
table_name_list.append(each_table_name.span.string)
#表时间
for each_table_date in each_table_name.div.find_all('select', id = re.compile('.+1$')):
url_list.append(re.findall('(\w+)1',each_table_date.get('id'))[0])
for each_date in each_table_date.find_all('option'):
each_date_list.append(each_date.string)
table_date_list.append(each_date_list)
each_date_list = []
return name,table_name_list,table_date_list,url_list
"""
函数说明:财务报表入库
Author:
Jack Cui
Parameters:
name - 股票名
table_name_list - 财务报表名称
table_date_list - 财务报表年限
url_list - 财务报表查询连接
Returns:
无
Blog:
http://blog.csdn.net/c406495762
Zhihu:
https://www.zhihu.com/people/Jack--Cui/
Modify:
2017-08-31
"""
def insert_tables(self, name, table_name_list,table_date_list, url_list):
#打开数据库连接:host-连接主机地址,port-端口号,user-用户名,passwd-用户密码,db-数据库名,charset-编码
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='yourpasswd',db='financialdata',charset='utf8')
#使用cursor()方法获取操作游标
cursor = conn.cursor()
#插入信息
for i in range(len(table_name_list)):
sys.stdout.write(' [正在下载 ] %s' % table_name_list[i] + '\r')
#获取数据地址
url = self.server + 'hk/service/cwsj_service.php?symbol={}&start={}&end={}&type={}&unit=yuan'.format(code,table_date_list[i][-1],table_date_list[i][0],url_list[i])
req_table = requests.get(url = url, headers = self.headers)
table = req_table.json()
nums = len(table)
value_dict = {}
for num in range(nums):
sys.stdout.write(' [正在下载 %.2f%%] ' % (((num+1) / nums)*100) + '\r')
sys.stdout.flush()
value_dict['股票名'] = name
value_dict['股票代码'] = code
for key, value in table[i].items():
if key in self.table_dict[url_list[i]]:
value_dict[self.table_dict[url_list[i]][key]] = value
sql1 = """
INSERT INTO %s (`股票名`,`股票代码`,`报表日期`) VALUES ('%s','%s','%s')""" % (url_list[i],value_dict['股票名'],value_dict['股票代码'],value_dict['报表日期'])
try:
cursor.execute(sql1)
# 执行sql语句
conn.commit()
except:
# 发生错误时回滚
conn.rollback()
for key, value in value_dict.items():
if key not in ['股票名','股票代码','报表日期']:
sql2 = """
UPDATE %s SET %s='%s' WHERE `股票名`='%s' AND `报表日期`='%s'""" % (url_list[i],key,value,value_dict['股票名'],value_dict['报表日期'])
try:
cursor.execute(sql2)
# 执行sql语句
conn.commit()
except:
# 发生错误时回滚
conn.rollback()
value_dict = {}
print(' [下载完成 ')
# 关闭数据库连接
cursor.close()
conn.close()
if __name__ == '__main__':
print('*' * 100)
print('\t\t\t\t\t财务数据下载助手\n')
print('作者:Jack-Cui\n')
print('About Me:\n')
print(' 知乎:https://www.zhihu.com/people/Jack--Cui')
print(' Blog:http://blog.csdn.net/c406495762')
print(' Gihub:https://github.com/Jack-Cherish\n')
print('*' * 100)
fd = FinancialData()
#上市股票地址
code = input('请输入股票代码:')
name,table_name_list,table_date_list,url_list = fd.get_informations(fd.cwnb + code + '.html')
print('\n %s:(%s)财务数据下载中!\n' % (name,code))
fd.insert_tables(name,table_name_list,table_date_list,url_list)
print('\n %s:(%s)财务数据下载完成!' % (name,code))