From 7551e4780f52cbe1fd8cd020b1a95163fc7acba3 Mon Sep 17 00:00:00 2001 From: Ayush Shukla Date: Tue, 21 Apr 2026 15:00:54 +0530 Subject: [PATCH] perf(backend): optimize Vote initialization using bulk_save_objects (#518) --- montage/rdb.py | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/montage/rdb.py b/montage/rdb.py index b7052ff6..88a0baf5 100644 --- a/montage/rdb.py +++ b/montage/rdb.py @@ -18,6 +18,7 @@ Column, String, Integer, + BigInteger, Float, Boolean, DateTime, @@ -561,6 +562,7 @@ class Entry(Base): upload_user_id = Column(Integer, index=True) upload_user_text = Column(String(255), index=True) upload_date = Column(DateTime, index=True) + file_id = Column(BigInteger, nullable=True) # TODO: img_sha1/page_touched for updates? create_date = Column(TIMESTAMP, server_default=func.now()) @@ -586,7 +588,8 @@ def to_details_dict(self, **kw): 'url': make_mw_img_url(self.name), 'url_sm': make_mw_img_url(self.name, size='small'), 'url_med': make_mw_img_url(self.name, size='medium'), - 'resolution': self.resolution}) + 'resolution': self.resolution, + 'file_id': self.file_id}) if with_uploader: ret['upload_user_text'] = self.upload_user_text return ret @@ -600,7 +603,8 @@ def to_export_dict(self): 'img_height': self.height, 'img_user': self.upload_user_id, 'img_user_text': self.upload_user_text, - 'img_timestamp': format_date(self.upload_date)} + 'img_timestamp': format_date(self.upload_date), + 'file_id': self.file_id} return ret @@ -1196,6 +1200,17 @@ def get_entry_name_map(self, filenames): ret[name] = entry return ret + def get_entry_file_id_map(self, file_ids): + entries = self.query(Entry)\ + .filter(Entry.file_id.in_(file_ids))\ + .all() + ret = {} + for entry in entries: + file_id = entry.file_id + ret[file_id] = entry + return ret + + def get_grouped_flags(self, round_id): flagged_entries = (self.query(RoundEntry) .filter_by(round_id=round_id) @@ -1637,10 +1652,28 @@ def add_entries(self, rnd, entries): for entry_chunk in entry_chunks: entry_names = [to_unicode(e.name) for e in entry_chunk] - db_entries = self.get_entry_name_map(entry_names) + entry_file_ids = [e.file_id for e in entry_chunk if e.file_id] + + db_entries_by_name = self.get_entry_name_map(entry_names) + db_entries_by_file_id = self.get_entry_file_id_map(entry_file_ids) for entry in entry_chunk: - db_entry = db_entries.get(to_unicode(entry.name)) + db_entry = None + + # 1. Try file_id match (stable identity) + if entry.file_id: + db_entry = db_entries_by_file_id.get(entry.file_id) + if db_entry and to_unicode(db_entry.name) != to_unicode(entry.name): + # Rename detected! Update existing entry's name. + db_entry.name = to_unicode(entry.name) + + # 2. Try name match (fallback / backfill) + if not db_entry: + db_entry = db_entries_by_name.get(to_unicode(entry.name)) + if db_entry and entry.file_id and not db_entry.file_id: + # Success! Backfill file_id on existing name-match entry. + db_entry.file_id = entry.file_id + if db_entry: entry = db_entry else: @@ -1651,6 +1684,7 @@ def add_entries(self, rnd, entries): return ret, new_entry_count + def add_round_entries(self, round_id, entries, method, params): rnd = self.user_dao.get_round(round_id) if rnd.status != PAUSED_STATUS: @@ -2931,6 +2965,9 @@ def create_initial_tasks(rdb_session, rnd): ret = create_ranking_tasks(rdb_session, rnd) else: raise ValueError('invalid round vote method: %r' % rnd.vote_method) + + if ret: + rdb_session.bulk_save_objects(ret, return_defaults=False) return ret @@ -2960,7 +2997,7 @@ def create_ranking_tasks(rdb_session, rnd, jurors=None): for juror in jurors: for entry in shuffled_entries: - vote = Vote(user=juror, round_entry=entry, status=ACTIVE_STATUS) + vote = Vote(user_id=juror.id, round_entry_id=entry.id, status=ACTIVE_STATUS) ret.append(vote) return ret @@ -3018,7 +3055,7 @@ def create_initial_rating_tasks(rdb_session, rnd, tasks_per_entry=None): break # TODO: bulk_save_objects - vote = Vote(user=juror, round_entry=entry, status=ACTIVE_STATUS) + vote = Vote(user_id=juror.id, round_entry_id=entry.id, status=ACTIVE_STATUS) ret.append(vote) return ret