Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving anon user tracking from image element to form submit #433

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions hasjob/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ def as_dict(self):
def save_to_cache(self, key):
# Use cache instead of redis_store because we're too lazy to handle type marshalling
# manually. Redis only stores string values in a hash and we have some integer data.
cache.set('anon/' + key, self.as_dict(), timeout=120)
cache.set('anon/' + str(key), self.as_dict(), timeout=300)

def load_from_cache(self, key, eventclass):
result = cache.get('anon/' + key)
result = cache.get('anon/' + str(key))
if result:
for key in result:
if key != 'events':
Expand Down
8 changes: 3 additions & 5 deletions hasjob/templates/layout.html.jinja2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{%- extends "baseframe.html.jinja2" -%}
{%- from "baseframe/components.html.jinja2" import hgnav -%}
{%- from "macros.html.jinja2" import campaign_header, campaign_script, filters_setup_script -%}
{%- from "macros.html.jinja2" import campaign_header, campaign_script, filters_setup_script, anon_user_script -%}

{%- block doctypehtml -%}
<!DOCTYPE html>
Expand Down Expand Up @@ -65,7 +65,7 @@
{{ filters_setup_script(job_filters, data_filters) }}
{%- else %}
{{ filters_setup_script(job_filters) }}
{%- endif %}
{%- endif %}
{%- endif %}
{%- endblock %}

Expand Down Expand Up @@ -239,9 +239,6 @@
to find out when new jobs are posted. Hosted by
<a href="http://e2enetworks.com/">E2E Networks</a>.
{%- endif %}
{%- if not g.user and not g.anon_user %}
<img src="{{ url_for('sniffle') }}" width="1" height="1" alt=""/>
{%- endif %}
</p>
{% endblock %}

Expand Down Expand Up @@ -274,4 +271,5 @@
}
</script>
{% block footerscripts %}{% endblock %}
{{ anon_user_script() }}
{% endblock %}
34 changes: 34 additions & 0 deletions hasjob/templates/macros.html.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -224,3 +224,37 @@
</script>
{%- endwith %}
{%- endmacro -%}

{%- macro anon_user_script() -%}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be moved into app.js? It adds bulk to every page otherwise (to pages served to bots).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But if we move it to app.js then it'll load with all page load regardless of bots or humans? Also, will have to hardcode the api endpoint.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will load only once because app.js is cached.

{%- if not g.user and not g.anon_user %}
<script type="text/javascript">
$(function () {
var anonymized = false;
var csrf_token = $('meta[name="csrf-token"]').attr('content');
var anonymize = function (event) {
$.post(
"{{ url_for('anon_session') }}",
{
event: event,
csrf_token: csrf_token
},
function (data) {
console.log(data);
});
anonymized = true;
}
window.onscroll = function (e) {
if (!anonymized) {
anonymize("scroll");
}
}
window.onpointermove = function (e) {
if (!anonymized) {
anonymize("pointermove");
}
}
})
</script>
{%- endif %}
{%- endmacro -%}

40 changes: 36 additions & 4 deletions hasjob/views/api.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-

from flask import jsonify
from coaster.views import requestargs
from ..models import Tag, Domain
from datetime import datetime
from flask import jsonify, g, session, request, Response
from flask_wtf import FlaskForm
from coaster.views import requestargs, render_with
from .helper import save_impressions
from ..models import db, Tag, Domain, AnonUser, EventSession, UserEvent
from .. import app, lastuser


Expand All @@ -18,3 +20,33 @@ def tag_autocomplete(q):
@requestargs('q')
def domain_autocomplete(q):
return jsonify({'domains': [d.name for d in Domain.autocomplete(q)]})


@app.route('/api/1/anonsession', methods=['POST'])
@render_with(json=True)
def anon_session():
"""
Load anon user:

1. If client sends valid csrf token, create and set g.anon_user
2. if g.anon_user exists, set session['au'] to anon_user.id
"""
now = datetime.utcnow()

csrf_form = FlaskForm()
if not g.user and not g.anon_user and csrf_form.validate_on_submit():
# This client sent us valid csrf token
g.anon_user = AnonUser()
db.session.add(g.anon_user)
g.esession = EventSession.new_from_request(request)
g.esession.anon_user = g.anon_user
g.esession.load_from_cache(session['es'], UserEvent)
db.session.add(g.esession)
db.session.commit()

if g.anon_user:
session['au'] = g.anon_user.id
if 'impressions' in session:
save_impressions(g.esession.id, session.pop('impressions').values(), now)

return Response({'status': 'ok'})
116 changes: 41 additions & 75 deletions hasjob/views/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from os import path
from datetime import datetime, timedelta
from uuid import uuid4
from urllib import quote, quote_plus
import hashlib
import bleach
Expand All @@ -29,16 +28,6 @@
MAX_COUNTS_KEY = u'maxcounts'


@app.route('/_sniffle.gif')
def sniffle():
return gif1x1, 200, {
'Content-Type': 'image/gif',
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0'
}


def index_is_paginated():
return request.method == 'POST' and 'startdate' in request.values

Expand Down Expand Up @@ -67,18 +56,12 @@ def load_user_data(user):
"""
All pre-request utilities, run after g.user becomes available.

Part 1: Load anon user:

1. If there's g.user and session['anon_user'], it loads that anon_user and tags with user=g.user, then removes anon
2. If there's no g.user and no session['anon_user'], sets session['anon_user'] = 'test'
3. If there's no g.user and there is session['anon_user'] = 'test', creates a new anon user, then saves to cookie
4. If there's no g.user and there is session['anon_user'] != 'test', loads g.anon_user

Part 1: If session['au'] exists, either set g.anon_user or set anon_user.user (if g.user exists).
Part 2: Are we in kiosk mode? Is there a preview campaign?
Part 3: Look up user's IP address location as geonameids for use in targeting.
"""
g.anon_user = None # Could change below
g.event_data = {} # Views can add data to the current pageview event
g.event_data = {} # Views can add data to the current pageview event
g.esession = None
g.viewcounts = {}
g.impressions = session.pop('impressions', {}) # Retrieve from cookie session if present there
Expand All @@ -88,65 +71,45 @@ def load_user_data(user):
now = datetime.utcnow()

if request.endpoint not in ('static', 'baseframe.static'):
# Loading an anon user only if we're not rendering static resources
if user:
if 'au' in session and session['au'] is not None and not unicode(session['au']).startswith(u'test'):
if 'au' in session and session['au'] is not None:
if unicode(session['au']).startswith('test'):
# old test token that we no longer need
session.pop('au')
else:
# fetch anon user and set anon_user.user if g.user exists
anon_user = AnonUser.query.get(session['au'])
if anon_user:
anon_user.user = user
session.pop('au', None)
else:
if not session.get('au'):
session['au'] = u'test-' + unicode(uuid4())
g.esession = EventSessionBase.new_from_request(request)
g.event_data['anon_cookie_test'] = session['au']
# elif session['au'] == 'test': # Legacy test cookie, original request now lost
# g.anon_user = AnonUser()
# db.session.add(g.anon_user)
# g.esession = EventSession.new_from_request(request)
# g.esession.anon_user = g.anon_user
# db.session.add(g.esession)
# # We'll update session['au'] below after database commit
# elif unicode(session['au']).startswith('test-'): # Newer redis-backed test cookie
# # This client sent us back our test cookie, so set a real value now
# g.anon_user = AnonUser()
# db.session.add(g.anon_user)
# g.esession = EventSession.new_from_request(request)
# g.esession.anon_user = g.anon_user
# db.session.add(g.esession)
# g.esession.load_from_cache(session['au'], UserEvent)
# # We'll update session['au'] below after database commit
else:
anon_user = None # AnonUser.query.get(session['au'])
if not anon_user:
# XXX: We got a fake value? This shouldn't happen
g.event_data['anon_cookie_test'] = session['au']
session['au'] = u'test-' + unicode(uuid4()) # Try again
g.esession = EventSessionBase.new_from_request(request)
if g.user:
anon_user.user = g.user
g.anon_user = None
session.pop('au', None)
else:
g.anon_user = anon_user
else:
# the AnonUser record has been deleted for some reason,
# this should not happen.
session.pop('au')

# Prepare event session if it's not already present
if g.esession is None:
if 'es' in session and session['es'] is not None:
# it's in cache or the db, load it
if g.user or g.anon_user:
g.esession = EventSession.get_session(uuid=session['es'], user=g.user, anon_user=g.anon_user)
else:
g.anon_user = anon_user

# Prepare event session if it's not already present
if g.user or g.anon_user and not g.esession:
g.esession = EventSession.get_session(uuid=session.get('es'), user=g.user, anon_user=g.anon_user)
if g.esession:
session['es'] = g.esession.uuid

# Don't commit here. It flushes SQLAlchemy's session cache and forces
# fresh database hits. Let after_request commit. (Commented out 30-03-2016)
# db.session.commit()
g.db_commit_needed = True

if g.anon_user:
session['au'] = g.anon_user.id
session.permanent = True
if 'impressions' in session:
# Run this in the foreground since we need this later in the request for A/B display consistency.
# This is most likely being called from the UI-non-blocking sniffle.gif anyway.
g.esession = EventSessionBase.new_from_request(request)
g.esession.load_from_cache(session['es'], UserEvent)
else:
# create a new session
g.esession = EventSessionBase.new_from_request(request)
session['es'] = g.esession.uuid

if g.anon_user and 'impressions' in session:
# Run this in the foreground
# since we need this later in the request for A/B display consistency.
save_impressions(g.esession.id, session.pop('impressions').values(), now)

# We have a user, now look up everything else

if session.get('kiosk'):
g.kiosk = True
else:
Expand Down Expand Up @@ -192,8 +155,11 @@ def load_user_data(user):

@app.after_request
def record_views_and_events(response):
if hasattr(g, 'db_commit_needed') and g.db_commit_needed:
db.session.commit()
# if there were any transaction changes in load_user_data(), that'll get commited here.
# commit() is supposed to raise an error if no transaction exists,
# but the default behavior of the Session is that a transaction is always present;
# http://docs.sqlalchemy.org/en/latest/orm/session_basics.html#committing
db.session.commit()

# We had a few error reports with g.* variables missing in this function, so now
# we look again and make note if something is missing. We haven't encountered
Expand Down Expand Up @@ -301,7 +267,7 @@ def record_views_and_events(response):
jobpost_id=g.jobpost_viewed[0],
bgroup=g.jobpost_viewed[1])
else:
g.esession.save_to_cache(session['au'])
g.esession.save_to_cache(session['es'])
if g.impressions:
# Save impressions to user's cookie session to write to db later
session['impressions'] = g.impressions
Expand Down
1 change: 0 additions & 1 deletion hasjob/views/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ def index(basequery=None, filters={}, md5sum=None, tag=None, domain=None, locati
BoardJobPost.board == g.board, JobPost.state.LISTED).options(
db.load_only('jobpost_id', 'pinned')).all()
}

else:
board_jobs = {}

Expand Down