Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

修改 #1

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 41 additions & 79 deletions app/spider/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,43 +207,49 @@ def get_professional_courses_info_from_table(soup, table_id, sess):
# 判断是否有课程信息
if tr_list:
for tr in tr_list:
td_list = tr.find_all('td')
# 依次提取:课程名称、上课老师、上课时间、上课地点、起止周、容量、选上、本轮已选、选课方式、学分、备注、双语等级
tmp_dict = dict()
# 选课链接
tmp_dict[LESSON_URL] = '{}/{}'.format(COURSE_SYSTEM_URL, add_url.format(suid_obj=tr['rel']))
# 课程名称
tmp_dict[LESSON_NAME] = td_list[1].find('a').string
# 老师
tmp_dict[TEACHER] = td_list[2].find('a').string
# 上课时间
tmp_dict[TIME] = td_list[3].string
# 上课地点
tmp_dict[CLASSROOM] = td_list[4].string
# 起始周
# tmp_dict['period'] = td_list[5].string
# 课程容量
tmp_dict[CAPACITY] = td_list[6].string
# 已选上
tmp_dict[SELECTED] = td_list[7].string
# 本轮已选
tmp_dict[THIS_SELECTED] = td_list[8].string
# 选课方式 推荐课程 or 跨专业选课
tmp_dict[LESSON_TYPE] = td_list[9].string
# 学分
tmp_dict[CREDIT] = td_list[10].string
# 备注
tmp_dict[REMARK] = td_list[11].string
# 双语等级
a_tag = td_list[12].find('a')
if a_tag:
tmp_dict[LANG_LEVEL] = a_tag.string
else:
tmp_dict[LANG_LEVEL] = td_list[12].string.strip()
course_info.append(tmp_dict)
course_info.append(add_tmp_dict(tr, True))
return course_info


def add_tmp_dict(tr, professional=False):
"""professional是否是专业课"""
td_list = tr.find_all('td')
if professional:
# 干掉专业课的起始周
# tmp_dict['period'] = td_list[5].string
td_list.pop(5)

tmp_dict = {}
tmp_dict[LESSON_URL] = '{}/{}'.format(COURSE_SYSTEM_URL, add_url.format(suid_obj=tr['rel']))
# 课程名称
tmp_dict[LESSON_NAME] = td_list[1].find('a').string
# 老师
tmp_dict[TEACHER] = td_list[2].find('a').string
# 上课时间
tmp_dict[TIME] = td_list[3].string
# 上课地点
tmp_dict[CLASSROOM] = td_list[4].string
# 课程容量
tmp_dict[CAPACITY] = td_list[5].string
# 已选上
tmp_dict[SELECTED] = td_list[6].string
# 本轮已选
tmp_dict[THIS_SELECTED] = td_list[7].string
# 选课方式 推荐课程 or 跨专业选课
tmp_dict[LESSON_TYPE] = td_list[8].string
# 学分
tmp_dict[CREDIT] = td_list[9].string
# 备注
tmp_dict[REMARK] = td_list[10].string
# 双语等级
a_tag = td_list[11].find('a')
if a_tag:
tmp_dict[LANG_LEVEL] = a_tag.string
else:
tmp_dict[LANG_LEVEL] = td_list[11].string.strip()
return tmp_dict


def get_courses_info_from_table(soup, table_id, sess):
"""
由于专业课选课界面与其他课程选课不一样,专业课选课会有起始周信息,而其他没有
Expand All @@ -262,19 +268,6 @@ def get_courses_info_from_table(soup, table_id, sess):
# 获取到底有多少门课程信息
item_num = int(PAGE_MSG_RE_PATTERN.search(page_msg)[1])
if item_num >= 10:
# 选课网站默认一页 10 条数据,所以这里出现了分页,需要重新发送请求获取所有 items
# 从 add_url 中获取以下数据
# {'flag': '2',
# 'gsdm': '',
# 'jxjhh': '2015',
# 'kcdm': '4210085150',
# 'numPerPage': 50,
# 'orderDirection': 'asc',
# 'orderField': 'jsxm,sksj',
# 'pageNum': 1,
# 'temp': 'true',
# 'xnxq': '2017-2018-2'}
# 其中 gsdm / jxjhh / kcdm / xnxq 是可以通过 add_url 获取的
parse_result = parse.urlparse(add_url)
# 解析 add_url 中的参数
url_params = parse.parse_qs(parse_result.query)
Expand Down Expand Up @@ -303,38 +296,7 @@ def get_courses_info_from_table(soup, table_id, sess):
# 判断是否有课程信息
if tr_list:
for tr in tr_list:
td_list = tr.find_all('td')
# 依次提取:课程名称、上课老师、上课时间、上课地点、起止周、容量、选上、本轮已选、选课方式、学分、备注、双语等级
tmp_dict = dict()
# 选课链接
tmp_dict[LESSON_URL] = '{}/{}'.format(COURSE_SYSTEM_URL, add_url.format(suid_obj=tr['rel']))
# 课程名称
tmp_dict[LESSON_NAME] = td_list[1].find('a').string
# 老师
tmp_dict[TEACHER] = td_list[2].find('a').string
# 上课时间
tmp_dict[TIME] = td_list[3].string
# 上课地点
tmp_dict[CLASSROOM] = td_list[4].string
# 课程容量
tmp_dict[CAPACITY] = td_list[5].string
# 已选上
tmp_dict[SELECTED] = td_list[6].string
# 本轮已选
tmp_dict[THIS_SELECTED] = td_list[7].string
# 选课方式 推荐课程 or 跨专业选课
tmp_dict[LESSON_TYPE] = td_list[8].string
# 学分
tmp_dict[CREDIT] = td_list[9].string
# 备注
tmp_dict[REMARK] = td_list[10].string
# 双语等级
a_tag = td_list[11].find('a')
if a_tag:
tmp_dict[LANG_LEVEL] = a_tag.string
else:
tmp_dict[LANG_LEVEL] = td_list[11].string.strip()
course_info.append(tmp_dict)
course_info.append(add_tmp_dict(tr))
return course_info


Expand Down
27 changes: 9 additions & 18 deletions app/spider/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,16 @@ def validate_user(account, password):
"password": password,
"type": "xs",
}
keep_request = True
while keep_request:
while 1:
try:
response = requests.post(url=JWC_LOGIN_URL, data=post_data, headers=headers)
except requests.exceptions.ConnectionError as e:
# 如果没有连接上教务处就继续请求,知道教务处能够被连接上
# 尝试连接 baidu.com 如果不能够连接就判断无法连接网络,提醒用户连接网络
# 教务处登陆失败也是返回 200 状态码,通过cookie中是否有 CERLOGIN 来判断是否成功登陆
if 'CERLOGIN' in response.cookies.keys():
return 1
return 0
except requests.exceptions.ConnectionError:
try:
baidu_response = requests.get(BAIDU_INDEX_URL)
except requests.exceptions.ConnectionError as e:
# 确实没有网络连接,假设 baidu.com 挂掉的可能性很低
# 尝试连接 baidu.com 如果不能够连接就判断无法连接网络
_baidu_response = requests.get(BAIDU_INDEX_URL)
except requests.exceptions.ConnectionError:
return 2
else:
# 如果没有发生异常就可以结束
keep_request = False
# 连接不上网站,不断尝试
# 因为教务处登陆失败也是返回 200 状态码,但是能够通过cookie中是否有 CERLOGIN 来判断是否成功登陆
if 'CERLOGIN' in response.cookies.keys():
# 如果返回值中有该 cookie,则判断用户身份正确
return 1
else:
return 0
85 changes: 22 additions & 63 deletions app/spider/whut_add_lession_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ def request_index(account, password):
}
response = requests.post(url=JWC_LOGIN_URL, data=post_data, headers=headers)
if 'CERLOGIN' in response.cookies.keys():
for name in response.cookies.keys():
local_thread.index_cookie[name] = response.cookies.get(name)
for name, value in response.cookies.items():
local_thread.index_cookie[name] = value
return 'success login'
else:
print('账号或者密码错误')
return '账号或者密码错误'

print('账号或者密码错误')
return '账号或者密码错误'


def request_courses():
Expand Down Expand Up @@ -74,34 +74,16 @@ def request_add_lesson(add_lession_url):
6 ==> 达到学分上限
7 ==> 课程容量不足
"""
tag = False
if tag:
# 模拟退出登录状态,也就是登录超时
r = requests.get(url='http://202.114.90.180/Course/logout.do', cookies=local_thread.course_cookie, headers=headers)
# tag = False
# if tag:
# # 模拟退出登录状态,也就是登录超时
# _r = requests.get(url='http://202.114.90.180/Course/logout.do', cookies=local_thread.course_cookie, headers=headers)
try:
response = requests.get(url=add_lession_url, cookies=local_thread.course_cookie, headers=headers)
except requests.exceptions.ConnectionError as e:
# 连接不上异常
except requests.exceptions.ConnectionError:
return 4
try:
response_data = json.loads(response.text)
except json.decoder.JSONDecodeError as e:
# 在成功选课的时候返回一个 JSONP,是 (js/css/html) 的集合体,不是 json 对象无法解析
return 0
else:
# 成功解析为 json,那么证明没有 成功选课
# 无论是否抢到课,返回的 http 状态码都是 200
# 如果抢课失败,(登陆超时、重复选课、未到选课时间)返回的都是 200 状态码
# 但是会返回一个 JSON {"message": "xxx", "statusCode": "300"}
# 如果成功抢课,那么返回一个 JSONP (js / html / css)
# {"message": "登录超时,请重新登录!", "statusCode": "300"}
# {"message": "课程重复,不能选已选课程", "statusCode": "300"}
# {"message": "目前不在选课时间,不能选课", "statusCode": "300"}
# {"message": "该课程与已选课程上课时间冲突", "statusCode": "300"}
# {"message": "该门课程容量不足,选课失败", "statusCode": "300"}
# {"message": "你所选的课程的课程性质已超出了限制的可选门数,不能选择此课程性质的课程!", "statusCode": "300"}
# http 返回状态码为 200 则成功选到了课程
# 进一步分析 message 信息
response_message = response_data.get(SELECT_COURSE_MESSAGE)
if response_message == LOGIN_TIMEOUT_MESSAGE:
# 登陆超时
Expand All @@ -121,7 +103,10 @@ def request_add_lesson(add_lession_url):
elif response_message == NO_ENOUGH_POSITION:
# 课程容量不足
return 7
return -1
return -1
except json.decoder.JSONDecodeError:
# 在成功选课的时候返回一个 JSONP,是 (js/css/html) 的集合体,不是 json 对象无法解析
return 0


def start_request(username, password, lesson_url, tasks):
Expand All @@ -131,46 +116,20 @@ def start_request(username, password, lesson_url, tasks):
local_thread.course_cookie = {}
# 默认初始状态为 1, 为登陆超时,需要重新登陆操作
status = 1
while True:
while 1:
if status in (0, 3, 5, 6):
# 0:成功 3:重复选课 5:时间冲突 6:学分上限
# 4:在连接不上、1:登陆超时、2:尚未开始抢课,需要继续不断重复循环
break
# 还没有获得登陆状态,在登陆超时、未连接上服务器情况下需要重新模拟登陆
# 其他状态不需要重新模拟登陆
if status in (1, 4, -1):
elif status in (1, 4, -1):
try:
request_index(username, password)
request_courses()
except requests.exceptions.ConnectionError as e:
# 途中发生了连接不上,重新请求连接
continue
# 抢课逻辑
except requests.exceptions.ConnectionError:
pass
status = request_add_lesson(lesson_url)
# 在连接不上、登陆超时、尚未开始抢课的情况下,需要继续不断重复循环
if status == 0:
# 成功抢课
break
elif status == 1:
# 登陆超时
pass
elif status == 2:
# 未到抢课时间
pass
elif status == 3:
# 重复选课
break
elif status == 4:
# 连接不上选课页面
pass
elif status == 5:
# 选课时间冲突
break
elif status == 6:
# 达到学分上限
break
elif status == 7:
# 课程容量不足,一直发送请求,等候课程有名额
pass
else:
# 遇到了不能够处理的状态码,一直循环
pass
# 更新状态是当前线程的最后一步
call_back_update_manager(username, lesson_url, tasks, status)

Expand Down
10 changes: 3 additions & 7 deletions app/tools/courses_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,8 @@ def remove_space(string):
:param string:
:return:
"""
new = []
empty_ch = {' ', '\t', '\n', '\r'}
for ch in string:
if ch not in empty_ch:
new.append(ch)
return ''.join(new)
empty_ch = (' ', '\t', '\n', '\r')
return ''.join([ch for ch in string if ch not in empty_ch])


def clean_lesson_name(courses):
Expand All @@ -30,7 +26,7 @@ def clean_lesson_name(courses):
:param courses: 课程 list
:return:
"""
for k, v in courses.items():
for _k, v in courses.items():
for item in v:
item[LESSON_NAME] = remove_space(item[LESSON_NAME])

Expand Down