twContrib.py

import simplejson,urllib2
import md5, tempfile, time
import argparse,os
import networkx as nx

parser = argparse.ArgumentParser(description='Mine Twitter account contributions')
parser.add_argument('-contributeto',nargs='*', help="A space separated list of account names (without the @) for whom you want to find the contributors.")
parser.add_argument('-contributeby',nargs='*', help="A space separated list of account names (without the @) whom you believe contributes to other accounts.")
parser.add_argument('-depth',default=3,type=int,metavar='N',help='Snowball search depth.')

args=parser.parse_args()

DG=nx.DiGraph()

def checkDir(dirpath):
  if not os.path.exists(dirpath):
    os.makedirs(dirpath)

def getContributors(user,userlist):
    net=[]
    print 'Getting contributors to',user
    try:
        data= simplejson.load(urllib2.urlopen('https://api.twitter.com/1/users/contributors.json?screen_name='+user))
        print data
        for d in data:
            net.append(d['screen_name'])
            if d['screen_name'] not in userlist: userlist.append(d['screen_name'])
    except:
        print 'oops 1'
    return net,userlist

def getContributees(user,accountlist):
    print 'Getting contributions of',user
    net=[]
    try:
        data= simplejson.load(urllib2.urlopen('https://api.twitter.com/1/users/contributees.json?screen_name='+user))
        for d in data:
            net.append(d['screen_name'])
            if d['screen_name'] not in accountlist: accountlist.append(d['screen_name'])
    except:
        pass
    return net,accountlist

#accountlist=['twitterapi']
accountlist=args.contributeto
userlist=args.contributeby
	
contributors={}
contributees={}

depth=args.depth

if args.contributeto and len(args.contributeto):
	print "finding contributors to..."
	fpath='/'.join(['reports','contributors','_'.join(args.contributeto)])
	typ='contributors'
	data={'accountlist':args.contributeto,'userlist':[],'contributors':{},'contributees':{},'graph':DG}
elif args.contributeby and len(args.contributeby):
	print "finding contributions by..."
	fpath='/'.join(['reports','contributees','_'.join(args.contributeby)])	
	typ='contributees'
	data={'accountlist':[],'userlist':args.contributeby,'contributors':{},'contributees':{},'graph':DG}
else:
	exit(-1)

checkDir(fpath)


#==
#tweak of http://developer.yahoo.com/python/python-caching.html
class DiskCacheFetcherfname:
	def __init__(self, cache_dir=None):
		# If no cache directory specified, use system temp directory
		if cache_dir is None:
			cache_dir = tempfile.gettempdir()
		self.cache_dir = cache_dir
	def fetch(self, url, max_age=0):
		# Use MD5 hash of the URL as the filename
		filename = md5.new(url).hexdigest()
		filepath = os.path.join(self.cache_dir, filename)
		if os.path.exists(filepath):
			if int(time.time()) - os.path.getmtime(filepath) < max_age:
				#return open(filepath).read()
				print "using cached copy of fetched url: ",url
				return filepath
		print "fetching fresh copy of fetched url: ",url
		# Retrieve over HTTP and cache, using rename to avoid collisions
		tempdata = urllib2.urlopen(url).read()
		fd, temppath = tempfile.mkstemp()
		fp = os.fdopen(fd, 'w')
		fp.write(tempdata)
		fp.close()
		os.rename(temppath, filepath)
		return filepath
        
def getTwCachedData(url, cachetime=144000):
	fetcher=DiskCacheFetcherfname('cache')
	fn=fetcher.fetch(url, cachetime)
	f=open(fn)
	data=f.read()
	f.close()
	#print 'data----',data
	jdata=simplejson.loads(data)

	if 'error' in jdata:
		if jdata['error'].startswith('Rate limit exceeded'):
			os.remove(fn)
	return jdata


def rgetContributors(user,bigdata):
	net=[]
	print 'Getting contributors to',user
	bigdata['graph'].add_node(user.lower(),label=user)
	try:
		url='https://api.twitter.com/1/users/contributors.json?screen_name='+user
		print 'trying',url
		#data= simplejson.load(urllib2.urlopen(url))
		data=getTwCachedData(url)
		#print data
		for d in data:
			if 'screen_name' in d:
				dsname=d['screen_name']
				net.append(dsname)
				if dsname not in bigdata['userlist']:
					bigdata['userlist'].append(dsname)
					bigdata['graph'].add_node(dsname.lower(),label=dsname)
					bigdata['graph'].add_edge(dsname.lower(),user.lower())
	except:
		print 'oops 2'
	bigdata['contributors'][user]=net
	return bigdata

def rgetContributees(user,bigdata):
	print 'Getting contributions of',user
	bigdata['graph'].add_node(user.lower(),label=user)
	net=[]
	try:
		url='https://api.twitter.com/1/users/contributees.json?screen_name='+user
		print 'trying',url
		#data= simplejson.load(urllib2.urlopen(url))
		data=getTwCachedData(url)
		for d in data:
			if 'screen_name' in d:
				dsname=d['screen_name']
				net.append(dsname)
				if dsname not in bigdata['accountlist']:
					bigdata['accountlist'].append(dsname)
					bigdata['graph'].add_node(dsname.lower(),label=dsname)
					bigdata['graph'].add_edge(user.lower(),dsname.lower())
	except:
		print 'oops2'
	bigdata['contributees'][user]=net
	return bigdata

#via mhawksey - googole: site:twitter.com "via web by"
#twitterapi, starbucks, HuffingtonPost,sportscenter,todayshow,reelseo,qualcomm,DefJamRecords,HornitosTequila,googletalks,salesforce,noh8campaign,chevron,mtv,jangomail,ESPNCFB,noh8campaign,playstation,mail


#Originally inspired by http://www.drewconway.com/zia/?p=345
def snowball_build(bigdata,rounds,typ='contributors'):
	print 'Starting...'
	if typ=='contributors':
		offset=0
	else:
		offset=1
	for r in range(0,rounds):
		print "STARTING PASS",str(r)
		if (r+offset) % 2:
			print "Finding contributees...",str(r)
			for user in bigdata['userlist']:
				if user not in bigdata['contributees']:
					bigdata=rgetContributees(user,bigdata)
		else:
			# THis includes first pass
			print "Finding contributors...",str(r)
			for account in bigdata['accountlist']:
				if account not in bigdata['contributors']:
					bigdata=rgetContributors(account,bigdata)
	return bigdata

data=snowball_build(data,depth,typ)

print data
print 'contributors',data['contributors']
print 'contributees',data['contributees']
print 'accountlist',data['accountlist']
print 'userlist',data['userlist']


nx.write_graphml(data['graph'], fpath+"/graph.graphml")
nx.write_edgelist(data['graph'], fpath+"/graph.txt",data=False)