forked from ptwobrussell/Recipes-for-Mining-Twitter
-
Notifications
You must be signed in to change notification settings - Fork 3
/
recipe__get_rt_origins.py
67 lines (45 loc) · 1.67 KB
/
recipe__get_rt_origins.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# -*- coding: utf-8 -*-
import re
def get_rt_origins(tweet):
# Regex adapted from
# http://stackoverflow.com/questions/655903/python-regular-expression-for-retweets
rt_patterns = re.compile(r"(RT|via)((?:\b\W*@\w+)+)", re.IGNORECASE)
rt_origins = []
# Inspect the tweet to see if was produced with /statuses/retweet/:id
# See http://dev.twitter.com/doc/post/statuses/retweet/:id
if tweet.has_key('retweet_count'):
if tweet['retweet_count'] > 0:
rt_origins += [ tweet['user']['name'].lower() ]
# Also, inspect the tweet for the presence of "legacy" retweet
# patterns such as "RT" and "via"
try:
rt_origins += [
mention.strip()
for mention in rt_patterns.findall(tweet['text'])[0][1].split()
]
except IndexError, e:
pass
# Filter out any duplicates
return list(set([rto.strip("@").lower() for rto in rt_origins]))
if __name__ == '__main__':
# A mocked up array of tweets for purposes of illustration.
# Assume tweets have been fetched from the /search resource or elsewhere.
tweets = \
[
{
'text' : 'RT @ptowbrussell Get @SocialWebMining at http://bit.ly/biais2 #w00t'
# ... more tweet fields ...
},
{
'text' : 'Get @SocialWebMining example code at http://bit.ly/biais2 #w00t',
'retweet_count' : 1,
'user' : {
'name' : 'ptwobrussell'
# ... more user fields ...
}
# ... more tweet fields ...
},
# ... more tweets ...
]
for tweet in tweets:
print get_rt_origins(tweet)