-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmarkov.py
102 lines (77 loc) · 2.98 KB
/
markov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import secret # this has our API keys
import tweepy
from random import randrange
sentence_starters = []
# Create map of words
# word_lines is a list of strings that represents our file
def create_word_map(word_lines):
word_map = dict()
# Go through each line in our corpus
for line in word_lines:
# Split each line into a list of words
words = line.split()
end_of_sentence = True
# Go through each word in our corpus line
for index in range(0,len(words)-2):
# If we're at the end of a sentence, add the start of the next sentence
# to our array of sentence starters
if end_of_sentence:
sentence_starters.append((words[index], words[index + 1]))
end_of_sentence = False
# Check if at the end of sentence
if words[index + 1] == "?" or words[index + 1] == "." or words[index + 1] == "!":
end_of_sentence = True
# Add word pairings to our word_map
if (words[index], words[index + 1]) in word_map.keys():
word_map[(words[index], words[index + 1])].append(words[index + 2])
else:
word_map[(words[index], words[index + 1])] = [words[index + 2]]
return word_map
# Returns true if the contents of the array is under Twitter's character limit
def under_limit(array):
return len(' '.join(array)) < 280
# Generate tweet
def gen_message(word_map):
# Find a random starting point for our tweet
index = randrange(len(sentence_starters) - 1)
# Figure out the first 2 words in our tweet
first_word = sentence_starters[index][0]
second_word = sentence_starters[index][1]
# Holds an array of all words in the tweet
tweet_array = [first_word, second_word]
while under_limit(tweet_array) is True:
# Figure out the last 2 words in our tweet
end_index = len(tweet_array)
last_words = [tweet_array[end_index-2], tweet_array[end_index-1]]
# Try to add more words to our tweet
if (last_words[0], last_words[1]) in word_map.keys():
possible_third_words = word_map[(last_words[0], last_words[1])]
third_word_index = randrange(len(possible_third_words))
random_third_word = possible_third_words[third_word_index]
tweet_array.append(random_third_word)
else:
# We can't add any more words so just return the tweet
return tweet_array
# Make sure we're not over 280 characters
if under_limit(tweet_array) is False:
return tweet_array
def main():
# Open and read corpus
corpus = open("simple-corpus.txt", "r")
word_lines = corpus.readlines()
corpus.close()
# Create word map
word_map = create_word_map(word_lines)
tweet_array = gen_message(word_map)
tweet = ' '.join(tweet_array)
# Twitter authentication
auth = tweepy.OAuthHandler(secret.TWITTER_CLIENT, secret.TWITTER_SECRET)
auth.set_access_token(secret.TWITTER_ACCESS, secret.TWITTER_ACCESS_SECRET)
api = tweepy.API(auth)
# Try to tweet
try:
api.update_status(tweet)
except TweepyError:
print "Could not tweet"
if __name__ == '__main__':
main()