Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Improve perf random logo search #1397

Merged
merged 3 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions migrations/007_add_logo_annotation_server_type_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import peewee as pw
from peewee_migrate import Migrator


def migrate(migrator: Migrator, database: pw.Database, *, fake=False):
migrator.sql(
"CREATE INDEX CONCURRENTLY IF NOT EXISTS logo_annotation_server_type ON logo_annotation (server_type)"
)


def rollback(migrator: Migrator, database: pw.Database, *, fake=False):
migrator.sql("DROP INDEX IF EXISTS logo_annotation_server_type")
41 changes: 21 additions & 20 deletions robotoff/app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,38 +1217,39 @@
es_client = get_es_client()

if logo_id is None:
logo_embeddings = list(
LogoEmbedding.select()
.join(LogoAnnotation)
.join(ImagePrediction)
.join(ImageModel)
.where(
ImageModel.server_type == server_type.name,
# Don't include logos from deleted images
ImageModel.deleted == False, # noqa
)
.order_by(peewee.fn.Random())
.limit(1)
)

if not logo_embeddings:
# To fetch a random logo that has an embedding, we use
# TABLESAMPLE SYSTEM. The parameter in parentheses is the
# percentage of rows to sample.
# Here, we sample 20% of the rows in the logo_embedding table.
# See https://www.postgresql.org/docs/current/sql-select.html
# for more information.
result = db.execute_sql(

Check warning on line 1226 in robotoff/app/api.py

View check run for this annotation

Codecov / codecov/patch

robotoff/app/api.py#L1226

Added line #L1226 was not covered by tests
"""
SELECT logo_id, embedding
FROM embedding.logo_embedding as t1 TABLESAMPLE SYSTEM (20)
JOIN logo_annotation AS t2 ON t1.logo_id = t2.id
WHERE t2.server_type = %s
LIMIT 1;
""",
(server_type.name,),
).fetchone()

if not result:

Check warning on line 1237 in robotoff/app/api.py

View check run for this annotation

Codecov / codecov/patch

robotoff/app/api.py#L1237

Added line #L1237 was not covered by tests
resp.media = {"results": [], "count": 0, "query_logo_id": None}
return

logo_embedding = logo_embeddings[0]
logo_id = logo_embedding.logo_id
logo_id, embedding = result

Check warning on line 1241 in robotoff/app/api.py

View check run for this annotation

Codecov / codecov/patch

robotoff/app/api.py#L1241

Added line #L1241 was not covered by tests
else:
logo_embedding = LogoEmbedding.get_or_none(logo_id=logo_id)

if logo_embedding is None:
resp.status = falcon.HTTP_404
return
embedding = logo_embedding.embedding

Check warning on line 1248 in robotoff/app/api.py

View check run for this annotation

Codecov / codecov/patch

robotoff/app/api.py#L1248

Added line #L1248 was not covered by tests

raw_results = [
item
for item in knn_search(
es_client, logo_embedding.embedding, count, server_type=server_type
)
for item in knn_search(es_client, embedding, count, server_type=server_type)
if item[0] != logo_id
][:count]
results = [{"logo_id": item[0], "distance": item[1]} for item in raw_results]
Expand Down
1 change: 1 addition & 0 deletions robotoff/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@
seen = set(int(x) for x in text_file_iter(existing_ids_path))
else:
seen = get_stored_logo_ids(es_client)
logger.info("Number of existing logos: %d", len(seen))

Check warning on line 657 in robotoff/cli/main.py

View check run for this annotation

Codecov / codecov/patch

robotoff/cli/main.py#L657

Added line #L657 was not covered by tests

added = 0

Expand Down
Loading