Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving anon user tracking from image element to form submit #433

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions hasjob/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ def as_dict(self):
def save_to_cache(self, key):
# Use cache instead of redis_store because we're too lazy to handle type marshalling
# manually. Redis only stores string values in a hash and we have some integer data.
cache.set('anon/' + key, self.as_dict(), timeout=120)
cache.set('anon/' + str(key), self.as_dict(), timeout=300)

def load_from_cache(self, key, eventclass):
result = cache.get('anon/' + key)
result = cache.get('anon/' + str(key))
if result:
for key in result:
if key != 'events':
Expand Down
8 changes: 3 additions & 5 deletions hasjob/templates/layout.html.jinja2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{%- extends "baseframe.html.jinja2" -%}
{%- from "baseframe/components.html.jinja2" import hgnav -%}
{%- from "macros.html.jinja2" import campaign_header, campaign_script, filters_setup_script -%}
{%- from "macros.html.jinja2" import campaign_header, campaign_script, filters_setup_script, anon_user_script -%}

{%- block doctypehtml -%}
<!DOCTYPE html>
Expand Down Expand Up @@ -65,7 +65,7 @@
{{ filters_setup_script(job_filters, data_filters) }}
{%- else %}
{{ filters_setup_script(job_filters) }}
{%- endif %}
{%- endif %}
{%- endif %}
{%- endblock %}

Expand Down Expand Up @@ -239,9 +239,6 @@
to find out when new jobs are posted. Hosted by
<a href="http://e2enetworks.com/">E2E Networks</a>.
{%- endif %}
{%- if not g.user and not g.anon_user %}
<img src="{{ url_for('sniffle') }}" width="1" height="1" alt=""/>
{%- endif %}
</p>
{% endblock %}

Expand Down Expand Up @@ -274,4 +271,5 @@
}
</script>
{% block footerscripts %}{% endblock %}
{{ anon_user_script() }}
{% endblock %}
34 changes: 34 additions & 0 deletions hasjob/templates/macros.html.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -224,3 +224,37 @@
</script>
{%- endwith %}
{%- endmacro -%}

{%- macro anon_user_script() -%}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be moved into app.js? It adds bulk to every page otherwise (to pages served to bots).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But if we move it to app.js then it'll load with all page load regardless of bots or humans? Also, will have to hardcode the api endpoint.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will load only once because app.js is cached.

{%- if not g.user and not g.anon_user %}
<script type="text/javascript">
$(function () {
var anonymized = false;
var csrf_token = $('meta[name="csrf-token"]').attr('content');
var anonymize = function (event) {
$.post(
"{{ url_for('anon_session') }}",
{
event: event,
csrf_token: csrf_token
},
function (data) {
console.log(data);
});
anonymized = true;
}
window.onscroll = function (e) {
if (!anonymized) {
anonymize("scroll");
}
}
window.onpointermove = function (e) {
if (!anonymized) {
anonymize("pointermove");
}
}
})
</script>
{%- endif %}
{%- endmacro -%}

40 changes: 36 additions & 4 deletions hasjob/views/api.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-

from flask import jsonify
from coaster.views import requestargs
from ..models import Tag, Domain
from datetime import datetime
from flask import jsonify, g, session, request, Response
from flask_wtf import FlaskForm
from coaster.views import requestargs, render_with
from .helper import rq_save_impressions
from ..models import db, Tag, Domain, AnonUser, EventSession, UserEvent
from .. import app, lastuser


Expand All @@ -18,3 +20,33 @@ def tag_autocomplete(q):
@requestargs('q')
def domain_autocomplete(q):
return jsonify({'domains': [d.name for d in Domain.autocomplete(q)]})


@app.route('/api/1/anonsession', methods=['POST'])
@render_with(json=True)
def anon_session():
"""
Load anon user:

1. If client sends valid csrf token, create and set g.anon_user
2. if g.anon_user exists, set session['au'] to anon_user.id
"""
now = datetime.utcnow()

csrf_form = FlaskForm()
if not g.user and not g.anon_user and csrf_form.validate_on_submit():
# This client sent us valid csrf token
g.anon_user = AnonUser()
db.session.add(g.anon_user)
g.esession = EventSession.new_from_request(request)
g.esession.anon_user = g.anon_user
g.esession.load_from_cache(session['es'], UserEvent)
db.session.add(g.esession)
db.session.commit()

if g.anon_user:
session['au'] = g.anon_user.id
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we using session['au'] for two different sorts of content? That doesn't seem clean.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm using session['au'] only to store anon user ID now. if it exists in cookie, it'll contain the anon user id.

if 'impression' in session:
rq_save_impressions(g.esession.id, session.pop('impressions').values(), now, delay=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo. impression or impressions?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the main save_impressions function.


return Response({'status': 'ok'})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to be jsonified.

114 changes: 39 additions & 75 deletions hasjob/views/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from os import path
from datetime import datetime, timedelta
from uuid import uuid4
from urllib import quote, quote_plus
import hashlib
import bleach
Expand All @@ -29,16 +28,6 @@
MAX_COUNTS_KEY = u'maxcounts'


@app.route('/_sniffle.gif')
def sniffle():
return gif1x1, 200, {
'Content-Type': 'image/gif',
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0'
}


def index_is_paginated():
return request.method == 'POST' and 'startdate' in request.values

Expand Down Expand Up @@ -67,18 +56,12 @@ def load_user_data(user):
"""
All pre-request utilities, run after g.user becomes available.

Part 1: Load anon user:

1. If there's g.user and session['anon_user'], it loads that anon_user and tags with user=g.user, then removes anon
2. If there's no g.user and no session['anon_user'], sets session['anon_user'] = 'test'
3. If there's no g.user and there is session['anon_user'] = 'test', creates a new anon user, then saves to cookie
4. If there's no g.user and there is session['anon_user'] != 'test', loads g.anon_user

Part 1: If session['au'] exists, either set g.anon_user or set anon_user.user (if g.user exists).
Part 2: Are we in kiosk mode? Is there a preview campaign?
Part 3: Look up user's IP address location as geonameids for use in targeting.
"""
g.anon_user = None # Could change below
g.event_data = {} # Views can add data to the current pageview event
g.event_data = {} # Views can add data to the current pageview event
g.esession = None
g.viewcounts = {}
g.impressions = session.pop('impressions', {}) # Retrieve from cookie session if present there
Expand All @@ -88,65 +71,40 @@ def load_user_data(user):
now = datetime.utcnow()

if request.endpoint not in ('static', 'baseframe.static'):
# Loading an anon user only if we're not rendering static resources
if user:
if 'au' in session and session['au'] is not None and not unicode(session['au']).startswith(u'test'):
if 'au' in session and session['au'] is not None:
if unicode(session['au']).startswith('test'):
# old test token that we no longer need
session.pop('au', None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

session.pop('au') since it's guaranteed present. If it's not, that's definitely an error worth raising for us to investigate.

else:
# fetch anon user and set anon_user.user if g.user exists
anon_user = AnonUser.query.get(session['au'])
if anon_user:
anon_user.user = user
session.pop('au', None)
else:
if not session.get('au'):
session['au'] = u'test-' + unicode(uuid4())
g.esession = EventSessionBase.new_from_request(request)
g.event_data['anon_cookie_test'] = session['au']
# elif session['au'] == 'test': # Legacy test cookie, original request now lost
# g.anon_user = AnonUser()
# db.session.add(g.anon_user)
# g.esession = EventSession.new_from_request(request)
# g.esession.anon_user = g.anon_user
# db.session.add(g.esession)
# # We'll update session['au'] below after database commit
# elif unicode(session['au']).startswith('test-'): # Newer redis-backed test cookie
# # This client sent us back our test cookie, so set a real value now
# g.anon_user = AnonUser()
# db.session.add(g.anon_user)
# g.esession = EventSession.new_from_request(request)
# g.esession.anon_user = g.anon_user
# db.session.add(g.esession)
# g.esession.load_from_cache(session['au'], UserEvent)
# # We'll update session['au'] below after database commit
else:
anon_user = None # AnonUser.query.get(session['au'])
if not anon_user:
# XXX: We got a fake value? This shouldn't happen
g.event_data['anon_cookie_test'] = session['au']
session['au'] = u'test-' + unicode(uuid4()) # Try again
g.esession = EventSessionBase.new_from_request(request)
if g.user:
anon_user.user = g.user
g.anon_user = None
session.pop('au', None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Under if g.user: add g.anon_user = None, and then an else clause before the following line. You don't want to have both g.user and g.anon_user

else:
g.anon_user = anon_user
else:
g.anon_user = anon_user

# Prepare event session if it's not already present
if g.user or g.anon_user and not g.esession:
g.esession = EventSession.get_session(uuid=session.get('es'), user=g.user, anon_user=g.anon_user)
if g.esession:
session['es'] = g.esession.uuid

# Don't commit here. It flushes SQLAlchemy's session cache and forces
# fresh database hits. Let after_request commit. (Commented out 30-03-2016)
# db.session.commit()
g.db_commit_needed = True

if g.anon_user:
session['au'] = g.anon_user.id
session.permanent = True
if 'impressions' in session:
# Run this in the foreground since we need this later in the request for A/B display consistency.
# This is most likely being called from the UI-non-blocking sniffle.gif anyway.
save_impressions(g.esession.id, session.pop('impressions').values(), now)
# the AnonUser record has been deleted for some reason,
# this should not happen.
session.pop('au', None)
elif not g.user:
# g.user, g.anon_user, session['au'], none of them are set
g.esession = EventSessionBase.new_from_request(request)

# Prepare event session if it's not already present
if g.user or g.anon_user and not g.esession:
g.esession = EventSession.get_session(uuid=session.get('es'), user=g.user, anon_user=g.anon_user)
if g.esession:
session['es'] = g.esession.uuid

if g.anon_user and 'impressions' in session:
# Run this in the foreground
# since we need this later in the request for A/B display consistency.
rq_save_impressions(g.esession.id, session.pop('impressions').values(), now, delay=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use regular save_impressions.


# We have a user, now look up everything else

if session.get('kiosk'):
g.kiosk = True
else:
Expand Down Expand Up @@ -192,7 +150,7 @@ def load_user_data(user):

@app.after_request
def record_views_and_events(response):
if hasattr(g, 'db_commit_needed') and g.db_commit_needed:
if len(db.session.dirty) > 0 or len(db.session.new) > 0:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd move this into a generic function in Coaster so that this call can be commit_if_necessary(db.session)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or we could just do db.session.commit() without an if condition. What is the impact of committing an empty transaction?

db.session.commit()

# We had a few error reports with g.* variables missing in this function, so now
Expand Down Expand Up @@ -301,7 +259,7 @@ def record_views_and_events(response):
jobpost_id=g.jobpost_viewed[0],
bgroup=g.jobpost_viewed[1])
else:
g.esession.save_to_cache(session['au'])
g.esession.save_to_cache(session['es'])
if g.impressions:
# Save impressions to user's cookie session to write to db later
session['impressions'] = g.impressions
Expand Down Expand Up @@ -625,6 +583,12 @@ def save_impressions(session_id, impressions, viewed_time):
mark_dirty_impression_counts([postid for pinned, postid, bgroup in impressions])


def rq_save_impressions(session_id, impressions, viewed_time, delay=True):
func = save_impressions.delay if delay else save_impressions
print func, session_id
func(session_id, impressions, viewed_time)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need this function. Call save_impressions or save_impressions.delay directly from wherever you need it.



@job('hasjob')
def campaign_view_count_update(campaign_id, user_id=None, anon_user_id=None):
if not user_id and not anon_user_id:
Expand Down
1 change: 0 additions & 1 deletion hasjob/views/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ def index(basequery=None, filters={}, md5sum=None, tag=None, domain=None, locati
BoardJobPost.board == g.board, JobPost.state.LISTED).options(
db.load_only('jobpost_id', 'pinned')).all()
}

else:
board_jobs = {}

Expand Down