Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to Python 3.8 #145

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ db.sqlite3
__pycache__
.DS_Store
tags
api/migrations/
api/places365/model/
Conv2d.patch
Linear.patch
Expand All @@ -31,7 +30,6 @@ api/places365/*.tar.gz
*.db
media*
*.log
api/migrations_dev
protected_media
.vscode
.coverage
Expand Down
22 changes: 13 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:16.04
FROM ubuntu:20.10
MAINTAINER Hooram Nam <[email protected]>

ENV MAPZEN_API_KEY mapzen-XXXX
Expand All @@ -13,14 +13,14 @@ RUN apt-get update && \
libxrender-dev \
wget \
curl \
nginx
nginx \
cmake

RUN apt-get install -y bzip2


RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
RUN bash Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda
# RUN apt-get install libopenblas-dev liblapack-dev

RUN /miniconda/bin/conda install -y faiss-cpu -c pytorch
RUN /miniconda/bin/conda install -y cython

Expand All @@ -33,19 +33,24 @@ RUN apt-get update && \
cmake .. -DDLIB_USE_CUDA=0 -DUSE_AVX_INSTRUCTIONS=0 && \
cmake --build . && \
cd /dlib && \
/miniconda/bin/python setup.py install --no USE_AVX_INSTRUCTIONS --no DLIB_USE_CUDA
/miniconda/bin/python setup.py install --no USE_AVX_INSTRUCT


RUN /miniconda/bin/conda install -y pytorch=0.4.1 -c pytorch
# RUN /venv/bin/pip install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl && /venv/bin/pip install torchvision
RUN /miniconda/bin/conda install -y pytorch torchvision torchaudio cpuonly -c pytorch
RUN /miniconda/bin/conda install -y psycopg2

RUN /miniconda/bin/conda install -y numpy -c pytorch
RUN /miniconda/bin/conda install -y pandas -c pytorch
RUN /miniconda/bin/conda install -y scikit-learn -c pytorch
RUN /miniconda/bin/conda install -y scikit-image -c pytorch

RUN mkdir /code
WORKDIR /code
COPY requirements.txt /code/
RUN /miniconda/bin/pip install -r requirements.txt

RUN /miniconda/bin/python -m spacy download en_core_web_sm

RUN apt-get install -y libgl1-mesa-glx
WORKDIR /code/api/places365
RUN wget https://s3.eu-central-1.amazonaws.com/ownphotos-deploy/places365_model.tar.gz
RUN tar xf places365_model.tar.gz
Expand Down Expand Up @@ -93,7 +98,6 @@ ENV TIME_ZONE UTC
EXPOSE 80
COPY . /code


RUN mv /code/config_docker.py /code/config.py

WORKDIR /code
Expand Down
14 changes: 3 additions & 11 deletions api/autoalbum.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
import shutil
import numpy as np

import uuid
import ipdb

from django_rq import job
Expand All @@ -20,9 +20,7 @@
import pytz

@job
def regenerate_event_titles(user):
job_id = rq.get_current_job().id

def regenerate_event_titles(user,job_id):
if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.now().replace(tzinfo=pytz.utc)
Expand All @@ -35,11 +33,7 @@ def regenerate_event_titles(user):
started_at=datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUM_TITLES)
lrj.save()



try:

aus = AlbumAuto.objects.filter(owner=user).prefetch_related('photos')
target_count = len(aus)
for idx,au in enumerate(aus):
Expand Down Expand Up @@ -74,9 +68,7 @@ def regenerate_event_titles(user):


@job
def generate_event_albums(user):
job_id = rq.get_current_job().id

def generate_event_albums(user, job_id):
if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.now().replace(tzinfo=pytz.utc)
Expand Down
98 changes: 23 additions & 75 deletions api/directory_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,18 @@
import datetime
import hashlib
import pytz
import time
import traceback
from joblib import Parallel, delayed
import multiprocessing

from api.models import (Photo, Person, LongRunningJob)

from tqdm import tqdm
from config import image_dirs
from api.models import (Photo, LongRunningJob)

import api.util as util
from api.image_similarity import build_image_similarity_index

import ipdb
from django_rq import job
import time
import numpy as np
import rq


from django.db.models import Q
import json


def is_new_image(existing_hashes, image_path):
hash_md5 = hashlib.md5()
with open(image_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
image_hash = hash_md5.hexdigest()
if image_hash not in existing_hashes or (
not Photo.objects.filter(image_path=image_path).exists()):
return image_path
return


def handle_new_image(user, image_path, job_id):
if image_path.lower().endswith('.jpg'):

if image_path.lower().endswith('.jpg') or image_path.lower().endswith('.jpeg') :
try:
elapsed_times = {
'md5':None,
Expand Down Expand Up @@ -81,69 +55,53 @@ def handle_new_image(user, image_path, job_id):
added_on=datetime.datetime.now().replace(tzinfo=pytz.utc),
geolocation_json={})
#photo._generate_md5()



start = datetime.datetime.now()
photo._generate_thumbnail()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['thumbnails'] = elapsed
# util.logger.info('thumbnail get took %.2f' % elapsed)
util.logger.info('thumbnail get took %.2f' % elapsed)

start = datetime.datetime.now()
photo._generate_captions()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['captions'] = elapsed
# util.logger.info('caption generation took %.2f' % elapsed)
util.logger.info('caption generation took %.2f' % elapsed)

# start = datetime.datetime.now()
# photo._save_image_to_db()
# elapsed = (datetime.datetime.now() - start).total_seconds()
# elapsed_times['image_save'] = elapsed
# util.logger.info('image save took %.2f' % elapsed)
util.logger.info('image save took %.2f' % elapsed)

start = datetime.datetime.now()
photo._extract_exif()
photo.save()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['exif'] = elapsed
# util.logger.info('exif extraction took %.2f' % elapsed)

start = datetime.datetime.now()
util.logger.info('add to AlbumPlace took %.2f' % elapsed)
photo._geolocate_mapbox()
photo.save()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['geolocation'] = elapsed
# util.logger.info('geolocation took %.2f' % elapsed)

start = datetime.datetime.now()
photo._add_to_album_place()
photo.save()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['album_place'] = elapsed
# util.logger.info('add to AlbumPlace took %.2f' % elapsed)

start = datetime.datetime.now()
photo._extract_faces()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['faces'] = elapsed
# util.logger.info('face extraction took %.2f' % elapsed)
util.logger.info('face extraction took %.2f' % elapsed)

start = datetime.datetime.now()
photo._add_to_album_date()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['album_date'] = elapsed
# util.logger.info('adding to AlbumDate took %.2f' % elapsed)
util.logger.info('adding to AlbumDate took %.2f' % elapsed)

start = datetime.datetime.now()
photo._add_to_album_thing()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['album_thing'] = elapsed
# util.logger.info('adding to AlbumThing took %.2f' % elapsed)
util.logger.info('adding to AlbumThing took %.2f' % elapsed)

start = datetime.datetime.now()
photo._im2vec()
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['im2vec'] = elapsed
# util.logger.info('im2vec took %.2f' % elapsed)
util.logger.info('im2vec took %.2f' % elapsed)

util.logger.info("job {}: image processed: {}, elapsed: {}".format(job_id,img_abs_path,json.dumps(elapsed_times)))

Expand All @@ -154,17 +112,15 @@ def handle_new_image(user, image_path, job_id):

except Exception as e:
try:
util.logger.error("job {}: could not load image {}. reason: {}".format(
util.logger.exception("job {}: could not load image {}. reason: {}".format(
job_id,image_path, str(e)))
except:
util.logger.error("job {}: could not load image {}".format(job_id,image_path))
return

util.logger.exception("job {}: could not load image {}".format(job_id,image_path))


#job is currently not used, because the model.eval() doesn't execute when it is running as a job
@job
def scan_photos(user):
job_id = rq.get_current_job().id

def scan_photos(user, job_id):
if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.datetime.now().replace(tzinfo=pytz.utc)
Expand All @@ -178,9 +134,6 @@ def scan_photos(user):
job_type=LongRunningJob.JOB_SCAN_PHOTOS)
lrj.save()




added_photo_count = 0
already_existing_photo = 0

Expand All @@ -194,7 +147,7 @@ def scan_photos(user):

image_paths = [
p for p in image_paths
if p.lower().endswith('.jpg') and 'thumb' not in p.lower()
if (p.lower().endswith('.jpg') or p.lower().endswith('.jpeg')) and 'thumb' not in p.lower()
]
image_paths.sort()

Expand All @@ -216,26 +169,21 @@ def scan_photos(user):
}
}
lrj.save()
'''
image_paths_to_add = Parallel(n_jobs=multiprocessing.cpu_count(), backend="multiprocessing")(delayed(is_new_image)(existing_hashes, image_path) for image_path in tqdm(image_paths))
image_paths_to_add = filter(None, image_paths_to_add)
Parallel(n_jobs=multiprocessing.cpu_count(), backend="multiprocessing")(delayed(handle_new_image)(user, image_path) for image_path in tqdm(image_paths_to_add))
'''

util.logger.info("Added {} photos".format(len(image_paths_to_add)))
build_image_similarity_index(user)

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.finished = True
lrj.finished_at = datetime.datetime.now().replace(tzinfo=pytz.utc)
prev_result = lrj.result
next_result = prev_result
next_result['new_photo_count'] = added_photo_count
lrj.result = next_result
lrj.save()
except Exception as e:
util.logger.error(str(e))
lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
except Exception:
util.logger.exception("An error occured:")
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.finished = True
lrj.failed = True
lrj.finished_at = datetime.datetime.now().replace(tzinfo=pytz.utc)
Expand Down
41 changes: 5 additions & 36 deletions api/face_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,12 @@
from api.models import LongRunningJob
from api.util import logger

import base64
import pickle
import itertools
import ipdb

from scipy import linalg
from sklearn.decomposition import PCA
import numpy as np
from sklearn import cluster
from sklearn import mixture
from scipy.spatial import distance
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn import svm
from sklearn.manifold import TSNE

import seaborn as sns
from django_rq import job
import rq
import pytz

import datetime
Expand Down Expand Up @@ -69,9 +55,7 @@ def cluster_faces(user):


@job
def train_faces(user):
job_id = rq.get_current_job().id

def train_faces(user, job_id):
if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.datetime.now().replace(tzinfo=pytz.utc)
Expand Down Expand Up @@ -125,25 +109,13 @@ def train_faces(user):
[f['encoding'] for f in id2face_known.values()])
person_names_known = np.array(
[f['person_name'] for f in id2face_known.values()])

n_clusters = len(set(person_names_known.tolist()))

# clf = SGDClassifier(loss='log',penalty='l2')
logger.info("Before fitting")
clf = MLPClassifier(
solver='adam', alpha=1e-5, random_state=1, max_iter=1000)
# clf = svm.SVC(kernel='linear')
# scaler = StandardScaler()
# scaler.fit(face_encodings_all)
# X = scaler.transform(face_encodings_known)
X = face_encodings_known
Y = person_names_known
clf.fit(X, person_names_known)

solver='adam', alpha=1e-5, random_state=1, max_iter=1000).fit(face_encodings_known, person_names_known)
logger.info("After fitting")
face_encodings_unknown = np.array(
[f['encoding'] for f in id2face_unknown.values()])
face_paths_unknown = [
f['image_path'] for f in id2face_unknown.values()
]

face_ids_unknown = [f['id'] for f in id2face_unknown.values()]
pred = clf.predict(face_encodings_unknown)
probs = np.max(clf.predict_proba(face_encodings_unknown), 1)
Expand All @@ -166,9 +138,6 @@ def train_faces(user):
}
lrj.save()

# res = cluster_faces()
# print(res)

lrj.finished = True
lrj.failed = False
lrj.finished_at = datetime.datetime.now()
Expand Down
Loading