-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtest.py
79 lines (67 loc) · 2.38 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import praw
import matplotlib.pyplot as plt
#top secret data
reddit = praw.Reddit(client_id='id', \
client_secret='secret', \
user_agent='Scraper', \
username='username', \
password='password')
subredditname = "politics"
subreddit = reddit.subreddit(subredditname)
top_subbreddit = subreddit.top()
count = 0
max = 10000
print('success')
words = []
wordCount = {}
commonWords = {'that','this','and','of','the','for','I','it','has','in',
'you','to','was','but','have','they','a','is','','be','on','are','an','or',
'at','as','do','if','your','not','can','my','their','them','they','with',
'at','about','would','like','there','You','from','get','just','more','so',
'me','more','out','up','some','will','how','one','what',"don't",'should',
'could','did','no','know','were','did',"it's",'This','he','The','we',
'all','when','had','see','his','him','who','by','her','she','our','thing','-',
'now','what','going','been','we',"I'm",'than','any','because','We','even',
'said','only','want','other','into','He','what','i','That','thought',
'think',"that's",'Is','much'}
for submission in subreddit.top(limit=500):
submission.comments.replace_more(limit=0)
for top_level_comment in submission.comments:
count += 1
if(count == max):
break
word = ""
for letter in top_level_comment.body:
if(letter == ' '):
if(word and not word[-1].isalnum()):
word = word[:-1]
if not word in commonWords:
words.append(word)
word = ""
else:
word += letter
if(count == max):
break
for word in words:
if word in wordCount:
wordCount[word] += 1
else:
wordCount[word] = 1
sortedList = sorted(wordCount, key = wordCount.get, reverse = True)
keyWords = []
keyCount = []
amount = 0
for entry in sortedList:
keyWords.append(entry)
keyCount.append(wordCount[entry])
amount += 1
if (amount == 10):
break
labels = keyWords
sizes = keyCount
# explode = (0, 0.1, 0, 0) # only "explode" the 2nd slice (i.e. 'Hogs')
plt.title('Top comments for: r/' + subredditname)
plt.pie(sizes, labels=labels, autopct='%1.1f%%',
shadow=True, startangle=90)
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()