-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18493 from jdavcs/dev_user_db_fields
Add unique constraints to the email and username fields in the galaxy_user table
- Loading branch information
Showing
19 changed files
with
462 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
...laxy/model/migrations/alembic/versions_gxy/1cf595475b58_email_column_unique_constraint.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
"""Email column unique constraint | ||
Revision ID: 1cf595475b58 | ||
Revises: d619fdfa6168 | ||
Create Date: 2024-07-03 19:53:22.443016 | ||
""" | ||
|
||
from alembic import op | ||
|
||
from galaxy.model.database_object_names import build_index_name | ||
from galaxy.model.migrations.data_fixes.user_table_fixer import EmailDeduplicator | ||
from galaxy.model.migrations.util import ( | ||
create_index, | ||
drop_index, | ||
index_exists, | ||
transaction, | ||
) | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "1cf595475b58" | ||
down_revision = "d619fdfa6168" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
table_name = "galaxy_user" | ||
column_name = "email" | ||
index_name = build_index_name(table_name, [column_name]) | ||
|
||
|
||
def upgrade(): | ||
with transaction(): | ||
_fix_duplicate_emails() | ||
# Existing databases may have an existing index we no longer need | ||
# New databases will not have that index, so we must check. | ||
if index_exists(index_name, table_name, False): | ||
drop_index(index_name, table_name) | ||
# Create a UNIQUE index | ||
create_index(index_name, table_name, [column_name], unique=True) | ||
|
||
|
||
def downgrade(): | ||
with transaction(): | ||
drop_index(index_name, table_name) | ||
# Restore a non-unique index | ||
create_index(index_name, table_name, [column_name]) | ||
|
||
|
||
def _fix_duplicate_emails(): | ||
"""Fix records with duplicate usernames""" | ||
connection = op.get_bind() | ||
EmailDeduplicator(connection).run() |
51 changes: 51 additions & 0 deletions
51
...y/model/migrations/alembic/versions_gxy/d619fdfa6168_username_column_unique_constraint.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
"""Username column unique constraint | ||
Revision ID: d619fdfa6168 | ||
Revises: d2d8f51ebb7e | ||
Create Date: 2024-07-02 13:13:10.325586 | ||
""" | ||
|
||
from alembic import op | ||
|
||
from galaxy.model.database_object_names import build_index_name | ||
from galaxy.model.migrations.data_fixes.user_table_fixer import UsernameDeduplicator | ||
from galaxy.model.migrations.util import ( | ||
create_index, | ||
drop_index, | ||
index_exists, | ||
transaction, | ||
) | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "d619fdfa6168" | ||
down_revision = "d2d8f51ebb7e" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
table_name = "galaxy_user" | ||
column_name = "username" | ||
index_name = build_index_name(table_name, [column_name]) | ||
|
||
|
||
def upgrade(): | ||
with transaction(): | ||
_fix_duplicate_usernames() | ||
# Existing databases may have an existing index we no longer need | ||
# New databases will not have that index, so we must check. | ||
if index_exists(index_name, table_name, False): | ||
drop_index(index_name, table_name) | ||
# Create a UNIQUE index | ||
create_index(index_name, table_name, [column_name], unique=True) | ||
|
||
|
||
def downgrade(): | ||
with transaction(): | ||
drop_index(index_name, table_name) | ||
# Restore a non-unique index | ||
create_index(index_name, table_name, [column_name]) | ||
|
||
|
||
def _fix_duplicate_usernames(): | ||
"""Fix records with duplicate usernames""" | ||
connection = op.get_bind() | ||
UsernameDeduplicator(connection).run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
""" | ||
Package contains code for fixing inconsistent data in the database that must be | ||
run together with a migration script. | ||
""" |
112 changes: 112 additions & 0 deletions
112
lib/galaxy/model/migrations/data_fixes/user_table_fixer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import uuid | ||
|
||
from sqlalchemy import ( | ||
func, | ||
Result, | ||
select, | ||
text, | ||
update, | ||
) | ||
|
||
from galaxy.model import User | ||
|
||
|
||
class UsernameDeduplicator: | ||
|
||
def __init__(self, connection): | ||
self.connection = connection | ||
|
||
def run(self): | ||
""" | ||
Deduplicate usernames by generating a unique value for all duplicates, keeping | ||
the username of the most recently created user unchanged. | ||
Records updated with the generated value are marked as deleted. | ||
""" | ||
duplicates = self._get_duplicate_username_data() | ||
prev_username = None | ||
for id, username, _ in duplicates: | ||
if username == prev_username: | ||
new_username = self._generate_next_available_username(username) | ||
stmt = update(User).where(User.id == id).values(username=new_username, deleted=True) | ||
self.connection.execute(stmt) | ||
else: | ||
prev_username = username | ||
|
||
def _get_duplicate_username_data(self) -> Result: | ||
# Duplicate usernames | ||
duplicates_stmt = select(User.username).group_by(User.username).having(func.count() > 1) | ||
# User data for records with duplicate usernames (ordering: newest to oldest) | ||
stmt = ( | ||
select(User.id, User.username, User.create_time) | ||
.where(User.username.in_(duplicates_stmt)) | ||
.order_by(User.username, User.create_time.desc()) | ||
) | ||
return self.connection.execute(stmt) | ||
|
||
def _generate_next_available_username(self, username): | ||
i = 1 | ||
while self.connection.execute(select(User).where(User.username == f"{username}-{i}")).first(): | ||
i += 1 | ||
return f"{username}-{i}" | ||
|
||
|
||
class EmailDeduplicator: | ||
|
||
def __init__(self, connection): | ||
self.connection = connection | ||
|
||
def run(self): | ||
""" | ||
Deduplicate user emails by generating a unique value for all duplicates, keeping | ||
the email of the most recently created user that has one or more history unchanged. | ||
If such a user does not exist, keep the oldest user. | ||
Records updated with the generated value are marked as deleted (we presume them | ||
to be invalid, and the user should not be able to login). | ||
""" | ||
stmt = select(User.email).group_by(User.email).having(func.count() > 1) | ||
duplicate_emails = self.connection.scalars(stmt) | ||
for email in duplicate_emails: | ||
users = self._get_users_with_same_email(email) | ||
user_with_history = self._find_oldest_user_with_history(users) | ||
duplicates = self._get_users_to_deduplicate(users, user_with_history) | ||
self._deduplicate_users(email, duplicates) | ||
|
||
def _get_users_with_same_email(self, email: str): | ||
sql = text( | ||
""" | ||
SELECT u.id, EXISTS(SELECT h.id FROM history h WHERE h.user_id = u.id) | ||
FROM galaxy_user u | ||
WHERE u.email = :email | ||
ORDER BY u.create_time | ||
""" | ||
) | ||
params = {"email": email} | ||
return self.connection.execute(sql, params).all() | ||
|
||
def _find_oldest_user_with_history(self, users): | ||
for user_id, exists in users: | ||
if exists: | ||
return user_id | ||
return None | ||
|
||
def _get_users_to_deduplicate(self, users, user_with_history): | ||
if user_with_history: | ||
# Preserve the oldest user with a history | ||
return [user_id for user_id, _ in users if user_id != user_with_history] | ||
else: | ||
# Preserve the oldest user | ||
return [user_id for user_id, _ in users[1:]] | ||
|
||
def _deduplicate_users(self, email, to_deduplicate): | ||
for id in to_deduplicate: | ||
new_email = self._generate_replacement_for_duplicate_email(email) | ||
stmt = update(User).where(User.id == id).values(email=new_email, deleted=True) | ||
self.connection.execute(stmt) | ||
|
||
def _generate_replacement_for_duplicate_email(self, email: str) -> str: | ||
""" | ||
Generate a replacement for a duplicate email value. The new value consists of the original | ||
email and a unique suffix. Since the original email is part of the new value, it will be | ||
possible to retrieve the user record based on this value, if needed. | ||
""" | ||
return f"{email}-{uuid.uuid4()}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import random | ||
import string | ||
|
||
|
||
def random_str() -> str: | ||
alphabet = string.ascii_lowercase + string.digits | ||
size = random.randint(5, 10) | ||
return "".join(random.choices(alphabet, k=size)) | ||
|
||
|
||
def random_email() -> str: | ||
text = random_str() | ||
return f"{text}@galaxy.testing" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
PRIVATE_OBJECT_STORE_ID = "my_private_data" | ||
|
||
|
||
class MockObjectStore: | ||
|
||
def is_private(self, object): | ||
if object.object_store_id == PRIVATE_OBJECT_STORE_ID: | ||
return True | ||
else: | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.