Skip to content

Commit

Permalink
Merge pull request #269 from rice-crc/develop
Browse files Browse the repository at this point in the history
Merge develop into main
  • Loading branch information
derekjkeller authored Oct 30, 2024
2 parents ff6e8e8 + 164050e commit 7aee723
Show file tree
Hide file tree
Showing 8 changed files with 368 additions and 381 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ data/*

src/api/static/*
!src/api/static/empty
src/api/common/static/

src/geo-networks/tmp/*.pickle
src/geo-networks/tmp/*.json
Expand Down
3 changes: 2 additions & 1 deletion src/api/common/reqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def post_req(orig_queryset,s,r,options_dict,auto_prefetch=True,paginate=False):
"<class 'past.models.Enslaved'>":'enslaved',
"<class 'blog.models.Post'>":'blog'
}
dedupe=False
if 'global_search' in params:
core_name=solrcorenamedict[qsetclassstr]
if DEBUG:
Expand Down Expand Up @@ -251,7 +252,7 @@ def post_req(orig_queryset,s,r,options_dict,auto_prefetch=True,paginate=False):
# Specifically, we noticed that
## when searching for voyage years simultaneously with other variables like ports of embarkation
## despite indexing, and only on staging, it kicked off a hugely inefficient db query
dedupe=False

for item in filter_obj:
# print("FILTER ITEM OBJECT--->",item)
if ids is not None:
Expand Down
4 changes: 2 additions & 2 deletions src/api/geo/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class ChildOfInLine(admin.TabularInline):
model=Location
verbose_name = "Child"
readonly_fields=['latitude','longitude','name','location_type','value','spatial_extent']
# readonly_fields=['latitude','longitude','name','location_type','value','spatial_extent']
verbose_name_plural="Children"
can_delete=False
classes=["collapse"]
Expand All @@ -27,7 +27,7 @@ class LocationAdmin(admin.ModelAdmin):
]
list_display=('name','value','longitude','latitude','location_type')
search_fields=('name','value')
readonly_fields=['value','name','parent','children','location_type']
# readonly_fields=['value','name','parent','children','location_type']
model=Location

admin.site.register(Location,LocationAdmin)
Expand Down
16 changes: 14 additions & 2 deletions src/api/voyage/serializers_READONLY.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,21 +238,33 @@ class Meta:
model=VoyageCargoConnection
fields='__all__'


class LinkedVoyageSerializer(serializers.Serializer):
voyage_id=serializers.IntegerField()

class VoyageSerializer(serializers.ModelSerializer):
sources=serializers.SerializerMethodField()
voyage_itinerary=VoyageItinerarySerializer(many=False,read_only=True)
voyage_dates=VoyageDatesSerializer(many=False,read_only=True)
enslavers=serializers.SerializerMethodField()
named_enslaved_people=serializers.SerializerMethodField()


voyage_crew=VoyageCrewSerializer(many=False,read_only=True)
voyage_ship=VoyageShipSerializer(many=False,read_only=True)
voyage_slaves_numbers=VoyageSlavesNumbersSerializer(many=False,read_only=True)
voyage_outcome=VoyageOutcomeSerializer(many=False,read_only=True)
voyage_groupings=VoyageGroupingsSerializer(many=False,read_only=True)
cargo=VoyageCargoConnectionSerializer(many=True,read_only=True)
african_info=AfricanInfoSerializer(many=True,read_only=True)
linked_voyages=serializers.SerializerMethodField()

def get_linked_voyages(self,instance) -> VoyageSourceSerializer(many=True):
incoming=instance.incoming_from_other_voyages.all()
outgoing=instance.outgoing_to_other_voyages.all()
incoming_ids=[i.voyage_id for i in incoming]
outgoing_ids=[o.voyage_id for o in outgoing]
linked_voyage_ids=list(set(incoming_ids+outgoing_ids))
return LinkedVoyageSerializer(linked_voyage_ids,many=True,read_only=True).data

##DIDN'T DO LINKED VOYAGES YET
def get_sources(self,instance) -> VoyageSourceSerializer(many=True):
vscs=instance.voyage_source_connections.all()
Expand Down
26 changes: 0 additions & 26 deletions src/api/voyage/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import hashlib
from rest_framework import filters
from common.reqs import autocomplete_req,post_req,get_fieldstats,paginate_queryset,clean_long_df
# from common.serializers import autocompleterequestserializer, autocompleteresponseserializer,crosstabresponseserializer,crosstabrequestserializer
from geo.common import GeoTreeFilter
from geo.serializers_READONLY import LocationSerializerDeep
import collections
Expand All @@ -35,7 +34,6 @@
import pickle
from voyage.cross_filter_fields import VoyageBasicFilterVarNames


redis_cache = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)

class VoyageList(generics.GenericAPIView):
Expand Down Expand Up @@ -674,30 +672,6 @@ class VoyageGET(generics.RetrieveAPIView):
queryset=Voyage.objects.all()
serializer_class=VoyageSerializer
lookup_field='voyage_id'

# authentication_classes=[TokenAuthentication]
# permission_classes=[IsAuthenticated]
# class VoyageCreate(generics.CreateAPIView):
# '''
# Create a Voyage. You MUST supply a voyage_id
# '''
# queryset=Voyage.objects.all()
# serializer_class=VoyageSerializerCRUD
# lookup_field='voyage_id'
# authentication_classes=[TokenAuthentication]
# permission_classes=[IsAdminUser]
#
# class VoyageRetrieveUpdateDestroy(generics.RetrieveUpdateDestroyAPIView):
# '''
# Retrieve, Update, or Delete a Voyage
# '''
# queryset=Voyage.objects.all()
# serializer_class=VoyageSerializerCRUD
# lookup_field='voyage_id'
# authentication_classes=[TokenAuthentication]
# permission_classes=[IsAdminUser]

######## READ-ONLY CONTROLLED VOCAB ENDPOINTS

class RigOfVesselList(generics.ListAPIView):
'''
Expand Down
171 changes: 58 additions & 113 deletions src/geo-networks/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,103 +13,96 @@
import pickle
import os
import pandas as pd
from flask_login import login_user,LoginManager,UserMixin,login_required
from flask.cli import AppGroup

app = Flask(__name__,template_folder='./templates/')
app.config['JSON_SORT_KEYS'] = False
app.config.from_object(__name__)
app.secret_key = FLASK_SECRET_KEY
login_manager = LoginManager()

class User(UserMixin):
def __init__(self, name, id, active=True):
self.name = name
self.id = id
self.active = active
def is_active(self):
# Here you should write whatever the code is
# that checks the database if your user is active
return self.active
def is_anonymous(self):
return False
def is_authenticated(self):
return True
app = Flask(__name__)

@login_manager.user_loader
def load_user(userid):
return USERS.get(int(userid))
def kickoff():
print('LOADING GRAPHS')
for rcname in rcnames:
for graph_params in registered_caches[rcname]['graph_params']:
graphname=graph_params['name']
load_index(rcname,graphname)

pickle_cli = AppGroup('pickle')
@pickle_cli.command('rebuild')
def rebuild_pickles():
for rcname in rcnames:
for graph_params in registered_caches[rcname]['graph_params']:

graphname=graph_params['name']

rc=registered_caches[rcname]
dataframe_endpoint=rc['endpoint']
if 'graphs' not in rc:
rc['graphs']={}

picklefilepath=f'{TMP_PATH}/{rcname}__{graphname}.pickle'

graph,oceanic_subgraph_view,graphname=load_graph(dataframe_endpoint,graph_params,rc)
linklabels=rc['indices']['linklabels']
nodelabels=rc['indices']['nodelabels']
graph_idx=rc['indices'][graphname]
pk_var=graph_idx['pk']
itinerary_vars=graph_idx['itinerary']
weight_vars=graph_idx['weight']
graph_index=build_index(
dataframe_endpoint,
graph,
oceanic_subgraph_view,
pk_var,
itinerary_vars,
weight_vars,
linklabels,
nodelabels
)
with open(picklefilepath, 'wb') as f:
pickle.dump(graph_index, f, pickle.HIGHEST_PROTOCOL)
print(f"PICKLE BUILT: {rcname} -- {graphname}")

login_manager.setup_app(app)
app.cli.add_command(pickle_cli)

def load_index(rcname,graphname):
rc=registered_caches[rcname]
dataframe_endpoint=rc['endpoint']
if 'graphs' not in rc:
rc['graphs']={}
if not os.path.exists(TMP_PATH):
os.makedirs(TMP_PATH)

picklefilepath=f'{TMP_PATH}/{rcname}__{graphname}.pickle'
graph_params=[gp for gp in registered_caches[rcname]['graph_params'] if gp['name']==graphname][0]
if os.path.exists(picklefilepath):
with open(picklefilepath, 'rb') as f:
graph_index = pickle.load(f)
else:
graph,oceanic_subgraph_view,graphname=load_graph(dataframe_endpoint,graph_params,rc)
linklabels=rc['indices']['linklabels']
nodelabels=rc['indices']['nodelabels']
graph_idx=rc['indices'][graphname]
pk_var=graph_idx['pk']
itinerary_vars=graph_idx['itinerary']
weight_vars=graph_idx['weight']
graph_index=build_index(
dataframe_endpoint,
graph,
oceanic_subgraph_view,
pk_var,
itinerary_vars,
weight_vars,
linklabels,
nodelabels
)
with open(picklefilepath, 'wb') as f:
pickle.dump(graph_index, f, pickle.HIGHEST_PROTOCOL)
print(f"WARNING. MAP PICKLE DOES NOT EXIST: {rcname} -- {graphname}")
graph_index={
'nodes':pd.DataFrame.from_records({}),
'edges':pd.DataFrame.from_records({}),
'nodesdata':{},
'edgesdata':{}
}

if graphname not in rc['graphs']:
rc['graphs'][graphname]={'index':graph_index}
else:
rc['graphs'][graphname]['index']=graph_index
# print("test node record-->",graph_index['nodes'].loc[[0,2]].to_dict())
# print("test edge record-->",graph_index['edges'].loc[[0,2]].to_dict())

def kickoff():
standoff_base=4
standoff_count=0
st=time.time()
while True:
failures_count=0
print('BUILDING GRAPHS')
for rcname in rcnames:
for graph_params in registered_caches[rcname]['graph_params']:
graphname=graph_params['name']
load_index(rcname,graphname)
print("failed on %d of %d caches" %(failures_count,len(rcnames)))
if failures_count>=len(rcnames):
standoff_time=standoff_base**standoff_count
print("retrying after %d seconds" %(standoff_time))
time.sleep(standoff_time)
standoff_count+=1
else:
break
print("finished building graphs in %d seconds" %int(time.time()-st))

#on initialization, load every index as a graph, via a call to the django api
#SEE INDEX_VARS.PY FOR THE MAPPINGS OF THE DJANGO API FIELDS TO THE DATA DICTIONARIES USED BY THIS APP
registered_caches={
'ao_maps':ao_maps,
'voyage_maps':voyage_maps,
'estimate_maps':estimate_maps
}

rcnames=list(registered_caches.keys())

if not os.path.exists(TMP_PATH):
os.makedirs(TMP_PATH)

kickoff()

@app.route('/network_maps/',methods=['POST'])
Expand Down Expand Up @@ -182,51 +175,3 @@ def network_maps():
# print(node)

return(jsonify({'nodes':finalnodes,'edges':finaledges}))

@app.route('/rebuild_indices/<indexname>', methods=['GET'])
@login_required
def rebuild_index(indexname):

if not os.path.exists(TMP_PATH):
os.makedirs(TMP_PATH)

picklepath=f"{TMP_PATH}/{indexname}.pickle"
# "tmp/"+indexname+".pickle"
if os.path.exists(picklepath):
os.remove(picklepath)
rcname,graphname=indexname.split("__")
load_index(rcname,graphname)
time.sleep(2)
return redirect('/displayindices')

@app.route('/displayindices', methods=['GET'])
@login_required
def displayindices():
if not os.path.exists(TMP_PATH):
os.makedirs(TMP_PATH)

indices=[
[
'__'.join([rcname,graph_params['name']]),
os.path.exists(TMP_PATH+"/"+'__'.join([rcname,graph_params['name']])+".pickle")
] for rcname in rcnames
for graph_params in registered_caches[rcname]['graph_params']
]
return render_template(
'displayindices.html',
indices=indices
)
# Here we use a class of some kind to represent and validate our

@app.route('/login', methods=['GET','POST'])
def login():
if request.method == 'POST':
username = request.form['username']
pw = request.form['password']
if pw == PW and username in USER_NAMES:
# Login and validate the user.
# user should be an instance of your `User` class
login_user(USER_NAMES[username])
flash('Logged in successfully.')
return redirect('/displayindices')
return render_template('login.html')
4 changes: 3 additions & 1 deletion src/geo-networks/localsettings.py-default.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,6 @@ def is_authenticated(self):

# DEBUG=True

TMP_PATH="/mnt/geo_networks_tmp"
TMP_PATH="/mnt/geo_networks_tmp"

rebuilder_number_of_workers=1
Loading

0 comments on commit 7aee723

Please sign in to comment.