-
Notifications
You must be signed in to change notification settings - Fork 0
/
db.py
75 lines (61 loc) · 2.3 KB
/
db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import sys, re, json
sys.path.append("/my/proj/sparqlhttp/build/lib.linux-x86_64-2.6")
from sparqlhttp.graph2 import SyncGraph
from memoize import lru_cache
from rdflib import RDFS, Literal, Namespace
SKIP = Namespace('http://skipforward.net/skipforward/resource/seeder/skipinions/')
DBT = Namespace('http://dbtropes.org/ont/')
MAIN = Namespace('http://dbtropes.org/resource/Main/')
FILM = Namespace('http://dbtropes.org/resource/Film/')
graph = SyncGraph("sesame", "http://bang:9080/sparql/",
initNs=dict(rdfs=RDFS.RDFSNS, skip=SKIP, dbt=DBT)
)
def findMovie(name):
rows = graph.queryd("SELECT DISTINCT ?uri WHERE { ?uri rdfs:label ?name }",
initBindings={"name" : Literal(name, lang="en")})
try:
return rows[0]['uri']
except IndexError:
raise ValueError("movie %r not found" % name)
@lru_cache(10000)
def movieName(uri):
return graph.queryd("SELECT ?label WHERE { ?uri rdfs:label ?label }",
initBindings={"uri" : uri})[0]['label']
def findMoviesByRegex(s):
return [r['movie'] for r in graph.queryd("""
SELECT DISTINCT ?movie WHERE {
?movie a dbt:TVTItem;
rdfs:label ?label .
FILTER (regex(?label, "%s"))
}
""" % re.escape(s))]
def allItems(limit=10):
return [r['i'] for r in graph.queryd(
"SELECT DISTINCT ?i WHERE { ?i a dbt:TVTItem } LIMIT %d" % limit)]
stopFeatures = set([SKIP['ItemName']])
@lru_cache(10000)
def movieFeatures(movie):
return set(r['trope'] for r in graph.queryd("""
SELECT DISTINCT ?trope WHERE {
?movie skip:hasFeature ?f .
?f a ?trope .
}
""", initBindings={"movie" : movie})
if r['trope'] not in stopFeatures)
def countFeatures(m1, m2):
f1 = movieFeatures(m1)
f2 = movieFeatures(m2)
return len(f1), len(f2), len(f1.intersection(f2))
def top500():
uris = []
for title in open("top500"):
title = title.strip()
try:
uris.append(findMovie(title))
except (KeyError, ValueError):
print "couldn't find uri for %s" % title
return uris
#print findMoviesByRegex("shrek")
#print movieFeatures(findMovie("Lord"))
#print countFeatures(MAIN['Shrek'], MAIN['TheMatrix'])
#print countFeatures(FILM['DieHard'], MAIN['TheMatrix'])