-
Notifications
You must be signed in to change notification settings - Fork 2
/
gate_exam_papersg4g.py
98 lines (95 loc) · 4.02 KB
/
gate_exam_papersg4g.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import re # Ignore PyLintBear (C0111)
import requests # Ignore PyLintBear (C0111)
from bs4 import * # Ignore PyLintBear (W0401)
# Ignore PyLintBear (C0103)
data = requests.get('http://www.geeksforgeeks.org/')
soup = BeautifulSoup(data.text, 'html.parser')
data1 = soup.find_all('hgroup')
for link in data1[1].find_all('a', href=True, text='GATE CS'):
nextlink = link['href']
url = requests.get(str(nextlink))
soup = BeautifulSoup(url.text, 'html.parser')
i = 1 # Ignore PycodestyleBear (E225)
list1 = soup.find_all('a', href=True, text=re.compile('^GATE-CS'))
list2 = soup.find_all('a', href=True, text=re.compile('^GATE\sCS\s20..$'))
for link in list1:
link1 = link['href']
filename = 'gateprep-'+str(i)
with open(filename, 'w') as handle:
finalpage = requests.get(link1)
soup = BeautifulSoup(finalpage.text, 'html.parser')
data3 = soup.find('div', id='mtq_question_container-1')
data4 = data3.find('div')
divs = data4.find_all(
'div', {'class': 'mtq_question mtq_scroll_item-1'})
try:
for div in divs:
nd1 = div.find('div', {'class': 'mtq_question_label'})
handle.writelines(nd1.text)
handle.writelines('\n')
nd2 = div.find('div', {'class': 'mtq_question_text'})
handle.writelines(nd2.text)
handle.writelines('\n')
nd3 = div.find_all('div', {'class': 'mtq_answer_text'})
count = 1
for ans in nd3:
st = '('+str(count)+')'
handle.writelines(st)
handle.writelines(ans.text)
handle.writelines('\n')
count += 1
handle.writelines('\n\n\n')
try:
nd4 = div.find('div', {'class': 'mtq_explanation-label'})
handle.writelines(nd4.text)
handle.writelines('\n')
nd5 = div.find('div', {'class': 'mtq_explanation-text'})
handle.writelines(nd5.text)
handle.writelines('\n')
except Exception as e: # Ignore PyLintBear (W0703)
pass
handle.writelines('\n\n\n')
i += 1
except Exception as e: # Ignore PyLintBear (C0103)
print(e)
for link in list2:
link2 = link['href']
filename = 'gateprep-'+str(i)
with open(filename, 'w') as handle:
finalpage = requests.get(link1)
soup = BeautifulSoup(finalpage.text, 'html.parser')
data3 = soup.find('div', id='mtq_question_container-1')
data4 = data3.find('div')
divs = data4.find_all(
'div', {'class': 'mtq_question mtq_scroll_item-1'})
try:
for div in divs:
nd1 = div.find('div', {'class': 'mtq_question_label'})
handle.writelines(nd1.text)
handle.writelines('\n')
nd2 = div.find('div', {'class': 'mtq_question_text'})
handle.writelines(nd2.text)
handle.writelines('\n')
nd3 = div.find_all('div', {'class': 'mtq_answer_text'})
count = 1
for ans in nd3:
st = '('+str(count)+')'
handle.writelines(st)
handle.writelines(ans.text)
handle.writelines('\n')
count += 1
handle.writelines('\n\n\n')
try:
nd4 = div.find('div', {'class': 'mtq_explanation-label'})
handle.writelines(nd4.text)
handle.writelines('\n')
nd5 = div.find('div', {'class': 'mtq_explanation-text'})
handle.writelines(nd5.text)
handle.writelines('\n')
except Exception as e:
pass
handle.writelines('\n\n\n')
i += 1
except Exception as e:
print(e)
print('Successfully saved all the files :)')