From fa1d6e7ec39c3ed06e8a5ad38e1f0f2fd8340de2 Mon Sep 17 00:00:00 2001 From: Ewout Verlinde Date: Thu, 11 Apr 2024 09:47:29 +0200 Subject: [PATCH] Deployment: seeders (#291) * fix: glob imports * chore: linting * Update README.md * fix: better glob indexing * Update deployement.yml * chore: seeding config * fix: seeder * chore: linting * Glob import fix (#284) * fix: glob imports * chore: linting * Glob import fix (#285) * fix: glob imports * chore: linting * fix: better glob indexing * Glob import fix (#286) * fix: glob imports * chore: linting * fix: better glob indexing * chore: seeding config * Glob import fix (#287) * fix: glob imports * chore: linting * fix: better glob indexing * chore: seeding config * fix: seeder * chore: native SQL seeder queries * chore: removed db files * chore: some small cleanup --------- Co-authored-by: tyboro2002 Co-authored-by: Tybo Verslype <97916632+tyboro2002@users.noreply.github.com> --- .github/workflows/deployement.yml | 2 +- README.md | 2 +- backend/.gitignore | 2 +- .../api/management/commands/seed_db_new.py | 141 ++++++++ backend/api/seeders/faker.py | 24 ++ backend/api/seeders/providers/__init__.py | 0 .../api/seeders/providers/fake_provider.py | 86 +++++ .../api/seeders/providers/real_provider.py | 80 +++++ backend/api/seeders/seeder.py | 334 ++++++++++++++++++ backend/ypovoli/settings.py | 3 +- 10 files changed, 670 insertions(+), 4 deletions(-) create mode 100644 backend/api/management/commands/seed_db_new.py create mode 100644 backend/api/seeders/faker.py create mode 100644 backend/api/seeders/providers/__init__.py create mode 100644 backend/api/seeders/providers/fake_provider.py create mode 100644 backend/api/seeders/providers/real_provider.py create mode 100644 backend/api/seeders/seeder.py diff --git a/.github/workflows/deployement.yml b/.github/workflows/deployement.yml index eb61c1ac..e3f0e3ce 100644 --- a/.github/workflows/deployement.yml +++ b/.github/workflows/deployement.yml @@ -1,4 +1,4 @@ -name: Deploy +name: deploy on: push: branches: diff --git a/README.md b/README.md index 4c81c019..d8869a27 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Ypovoli ![backend linting](https://github.com/SELab-2/UGent-7/actions/workflows/backend-linting.yaml/badge.svg) -![backend tests](https://github.com/SELab-2/UGent-7/actions/workflows/backend-tests.yaml/badge.svg) +![backend tests](https://github.com/SELab-2/UGent-7/actions/workflows/tests.yaml/badge.svg) This application was developed within the framework of the course "Software Engineering Lab 2" within the Computer Science program at Ghent University. diff --git a/backend/.gitignore b/backend/.gitignore index edbe73cb..eefa3cf2 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -1,6 +1,6 @@ .venv .idea staticfiles -db.sqlite3 +db.sqlite3* __pycache__ *.mo \ No newline at end of file diff --git a/backend/api/management/commands/seed_db_new.py b/backend/api/management/commands/seed_db_new.py new file mode 100644 index 00000000..64c07536 --- /dev/null +++ b/backend/api/management/commands/seed_db_new.py @@ -0,0 +1,141 @@ +from random import choices, randint +from django.core.management.base import BaseCommand +from authentication.models import User +from api.seeders.faker import faker +from api.seeders.seeder import seed_students, seed_assistants, seed_teachers, seed_courses, seed_projects, seed_groups, \ + seed_submissions +from api.models.course import Course +from api.models.group import Group +from api.models.project import Project +from api.models.teacher import Teacher +from api.models.student import Student +from api.models.assistant import Assistant +from api.models.submission import Submission + +import time + + +def _seed_users(num: int = 500, student_prob: int = 70, assistant_prob: int = 20, teacher_prob: int = 10): + fake = faker() + User.objects.all().delete() + + users: list[User] = User.objects.bulk_create( + [fake.fake_user(fake, id) for id in range(num)] + ) + + roles = choices(['student'] * student_prob + ['assistant'] * assistant_prob + ['teacher'] * teacher_prob, k=num) + + for user, role in zip(users, roles): + if role == 'assistant': + Assistant.create(user) + elif role == 'teacher': + Teacher.create(user) + elif role == 'student': + Student.create(user, student_id=user.id) + + user.faculties.add( + *[fake.real_faculty() for _ in range(randint(1, 2))] + ) + + +def _seed_courses(num: int = 200): + fake = faker() + Course.objects.all().delete() + + courses: list[Course] = Course.objects.bulk_create( + [fake.fake_course(fake) for _ in range(num)] + ) + + for course in courses: + course.students.add( + *[fake.real_student() for _ in range(randint(10, 100))] + ) + course.teachers.add( + *[fake.real_teacher() for _ in range(randint(1, 3))] + ) + course.assistants.add( + *[fake.real_assistant() for _ in range(randint(2, 5))] + ) + + +def _seed_projects(num: int = 1_000): + fake = faker() + Project.objects.all().delete() + + Project.objects.bulk_create( + [fake.fake_project(fake) for _ in range(num)] + ) + + +def _seed_groups(num: int = 3_000): + fake = faker() + Group.objects.all().delete() + + groups = Group.objects.bulk_create( + [fake.fake_group(fake) for _ in range(num)] + ) + + for group in groups: + group.students.add( + *[fake.real_student() for _ in range(randint(1, group.project.group_size))] + ) + + +def format_time(execution_time): + if execution_time < 1: + return f"{execution_time * 1000:.2f} milliseconds" + elif execution_time < 60: + return f"{execution_time:.2f} seconds" + elif execution_time < 3600: + return f"{execution_time / 60:.2f} minutes" + else: + return f"{execution_time / 3600:.2f} hours" + + +class Command(BaseCommand): + help = 'seed the db with data' + + amount_of_students = 50_000 + amount_of_assistants = 5_000 + amount_of_teachers = 1_500 + amount_of_courses = 1_500 + amount_of_projects = 3_000 + amount_of_groups = 3_000 + amount_of_submissions = 3_000 + + def handle(self, *args, **options): + # Reset DB + User.objects.all().delete() + Student.objects.all().delete() + Assistant.objects.all().delete() + Teacher.objects.all().delete() + Course.objects.all().delete() + Project.objects.all().delete() + Submission.objects.all().delete() + + # Seed students + fake = faker() + start_time = time.time() + seed_students(fake, self.amount_of_students, 0) + + # Seed assistants + seed_assistants(fake, self.amount_of_assistants, self.amount_of_students) + + # Seed teachers + seed_teachers(fake, self.amount_of_teachers, self.amount_of_students + self.amount_of_assistants) + + # Seed courses + seed_courses(faker(), self.amount_of_courses) + + # Seed projects + seed_projects(faker(), self.amount_of_projects) + + # Seed groups + seed_groups(faker(), self.amount_of_groups) + + # Seed submissions + seed_submissions(faker(), self.amount_of_submissions) + + end_time = time.time() + execution_time = end_time - start_time + self.stdout.write(self.style.SUCCESS(f"Successfully seeded db in {format_time(execution_time)}!")) diff --git a/backend/api/seeders/faker.py b/backend/api/seeders/faker.py new file mode 100644 index 00000000..9b9d0398 --- /dev/null +++ b/backend/api/seeders/faker.py @@ -0,0 +1,24 @@ +from faker import Faker +from api.seeders.providers.real_provider import real_group_provider, real_course_provider, real_faculty_provider, \ + real_file_extension_provider, real_structure_check_provider, real_project_provider, real_student_provider, \ + real_teacher_provider, real_assistant_provider, real_submission_provider +from api.seeders.providers.fake_provider import ModelProvider + + +def faker(): + fake = Faker() + + fake.add_provider(real_faculty_provider()) + fake.add_provider(real_student_provider()) + fake.add_provider(real_assistant_provider()) + fake.add_provider(real_teacher_provider()) + fake.add_provider(real_course_provider()) + fake.add_provider(real_project_provider()) + fake.add_provider(real_group_provider()) + fake.add_provider(real_submission_provider()) + fake.add_provider(real_file_extension_provider()) + fake.add_provider(real_structure_check_provider()) + + fake.add_provider(ModelProvider) + + return fake diff --git a/backend/api/seeders/providers/__init__.py b/backend/api/seeders/providers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/api/seeders/providers/fake_provider.py b/backend/api/seeders/providers/fake_provider.py new file mode 100644 index 00000000..c0a5074a --- /dev/null +++ b/backend/api/seeders/providers/fake_provider.py @@ -0,0 +1,86 @@ +from faker.providers import BaseProvider +from django.utils import timezone +from api.models.course import Course +from api.models.project import Project +from api.models.group import Group +from authentication.models import User + + +class ModelProvider(BaseProvider): + min_salt = 1 + max_salt = 100_000 + + def fake_username(self, first_name: str, last_name: str) -> str: + """Fake username for users""" + rand = self.random_int(min=self.min_salt, max=self.max_salt) + return f"{first_name.lower()}{last_name.lower()}{rand}"[:12] + + def fake_user(self, fake, id: int, staff_probability: float = 0.001) -> User: + """Create a fake user""" + return User( + id=id, + first_name=fake.first_name(), + last_name=fake.last_name(), + username=fake.unique.user_name(), + email=fake.unique.email(), + create_time=timezone.now(), + last_enrolled=timezone.now().year, + is_staff=fake.boolean(chance_of_getting_true=staff_probability) + ) + + def fake_course(self, fake, min_year=2022, max_year=2025) -> Course: + """Create a fake course""" + course = Course( + name=fake.catch_phrase(), + academic_startyear=fake.random_int(min=min_year, max=max_year), + faculty=fake.real_faculty(), + description=fake.paragraph() + ) + + return course + + def fake_project(self, + fake, + min_start_date_dev=-100, + max_start_date_dev=100, + min_deadline_dev=1, + max_deadline_dev=100, + visible_prob=80, + archived_prob=10, + score_visible_prob=30, + locked_groups_prob=30, + min_max_score=1, + max_max_score=100, + min_group_size=1, + max_group_size=15): + """Create a fake project""" + start_date = timezone.now() + timezone.timedelta( + days=fake.random_int(min=min_start_date_dev, max=max_start_date_dev) + ) + + course = fake.real_course() + + return Project( + name=fake.catch_phrase(), + description=fake.paragraph(), + visible=fake.boolean(chance_of_getting_true=visible_prob), + archived=fake.boolean(chance_of_getting_true=archived_prob), + score_visible=fake.boolean(chance_of_getting_true=score_visible_prob), + locked_groups=fake.boolean(chance_of_getting_true=locked_groups_prob), + deadline=start_date + timezone.timedelta(days=fake.random_int(min=min_deadline_dev, max=max_deadline_dev)), + course=course, + start_date=start_date, + max_score=fake.random_int(min=min_max_score, max=max_max_score), + group_size=fake.random_int(min=min_group_size, max=max_group_size) + ) + + def fake_group(self, fake, min_score: int = 0): + """Create a fake group""" + project: Project = fake.real_project() + + group: Group = Group( + project=fake.real_project(), + score=fake.random_int(min=min_score, max=project.max_score) + ) + + return group diff --git a/backend/api/seeders/providers/real_provider.py b/backend/api/seeders/providers/real_provider.py new file mode 100644 index 00000000..5eaf80f9 --- /dev/null +++ b/backend/api/seeders/providers/real_provider.py @@ -0,0 +1,80 @@ +from faker.providers import DynamicProvider +from authentication.models import Faculty +from api.models.student import Student +from api.models.assistant import Assistant +from api.models.teacher import Teacher +from api.models.course import Course +from api.models.group import Group +from api.models.project import Project +from api.models.submission import Submission +from api.models.checks import FileExtension, StructureCheck + + +def real_faculty_provider(): + return DynamicProvider( + provider_name="real_faculty", + elements=Faculty.objects.all(), + ) + + +def real_student_provider(): + return DynamicProvider( + provider_name="real_student", + elements=Student.objects.all(), + ) + + +def real_assistant_provider(): + return DynamicProvider( + provider_name="real_assistant", + elements=Assistant.objects.all(), + ) + + +def real_teacher_provider(): + DynamicProvider( + provider_name="real_teacher", + elements=Teacher.objects.all(), + ) + + +def real_course_provider(): + DynamicProvider( + provider_name="real_course", + elements=Course.objects.all(), + ) + + +def real_project_provider(): + DynamicProvider( + provider_name="real_project", + elements=Project.objects.all(), + ) + + +def real_group_provider(): + DynamicProvider( + provider_name="real_group", + elements=Group.objects.all(), + ) + + +def real_submission_provider(): + DynamicProvider( + provider_name="real_submission", + elements=Submission.objects.all(), + ) + + +def real_file_extension_provider(): + DynamicProvider( + provider_name="real_file_extension", + elements=FileExtension.objects.all(), + ) + + +def real_structure_check_provider(): + DynamicProvider( + provider_name="real_structure_check", + elements=StructureCheck.objects.all(), + ) diff --git a/backend/api/seeders/seeder.py b/backend/api/seeders/seeder.py new file mode 100644 index 00000000..8911d69f --- /dev/null +++ b/backend/api/seeders/seeder.py @@ -0,0 +1,334 @@ +from random import choice, randint, sample +from functools import wraps +from time import time +from django.db import connection +from django.utils import timezone + + +def format_time(execution_time): + if execution_time < 1: + return f"{execution_time * 1000:.2f} milliseconds" + elif execution_time < 60: + return f"{execution_time:.2f} seconds" + elif execution_time < 3600: + return f"{execution_time / 60:.2f} minutes" + else: + return f"{execution_time / 3600:.2f} hours" + + +def timer(func): + """Helper function to estimate view execution time""" + + @wraps(func) + def handle(*args, **kwargs): + start = time() + + result = func(*args, **kwargs) + + print('Seeder {} took {}'.format( + func.__name__, format_time((time() - start)) + )) + + return result + + return handle + + +@timer +def seed_users(faker, count: int, offset: int = 0, staff_prob: float = 0.001) -> list[list]: + """Seed users into the database""" + with connection.cursor() as cursor: + users = [ + [ + id, + faker.unique.user_name(), + faker.unique.email(), + faker.first_name(), + faker.last_name(), + timezone.now().year, + timezone.now(), + timezone.now(), + faker.boolean(chance_of_getting_true=staff_prob), + ] for id in range(offset, count + offset) + ] + + cursor.executemany( + "INSERT INTO authentication_user" + "(id, username, email, first_name, last_name, last_enrolled, last_login, create_time, is_staff)" + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + users + ) + + return users + + +@timer +def seed_students(faker, count: int = 1_000, offset: int = 0) -> None: + """Seed students into the database""" + users = seed_users(faker, count, offset) + + with connection.cursor() as cursor: + students = [ + [user[0], user[0], True] for user in users + ] + + cursor.executemany( + "INSERT INTO api_student(user_ptr_id, student_id, is_active) VALUES (?, ?, ?)", students + ) + + +@timer +def seed_assistants(faker, count: int = 500, offset: int = 0) -> None: + """Seed assistants into the database""" + users = seed_users(faker, count, offset) + + with connection.cursor() as cursor: + assistants = [ + [user[0], True] for user in users + ] + + cursor.executemany( + "INSERT INTO api_assistant(user_ptr_id, is_active) VALUES (?, ?)", assistants + ) + + +@timer +def seed_teachers(faker, count: int = 250, offset: int = 0) -> None: + """Seed assistants into the database""" + users = seed_users(faker, count, offset) + + with connection.cursor() as cursor: + teachers = [ + [user[0], True] for user in users + ] + + cursor.executemany( + "INSERT INTO api_teacher(user_ptr_id, is_active) VALUES (?, ?)", teachers + ) + + +@timer +def seed_courses(faker, + count: int = 1_000, + year_dev: int = 1, + max_students: int = 100, + max_teachers: int = 3, + max_assistants: int = 5, + min_students: int = 1, + min_teachers: int = 1, + min_assistants: int = 1 + ) -> None: + """Seed courses into the database""" + with connection.cursor() as cursor: + # Fetch existing faculties. + faculties = list( + map(lambda x: x[0], + cursor.execute("SELECT id FROM authentication_faculty").fetchall() + ) + ) + + # Create courses. + courses = [ + [ + faker.catch_phrase(), + faker.paragraph(), + timezone.now().year + faker.random_int(min=-year_dev, max=year_dev), + choice(faculties) + ] for _ in range(count) + ] + + # Insert courses + cursor.executemany( + "INSERT INTO api_course(name, description, academic_startyear, faculty_id) VALUES (?, ?, ?, ?)", courses + ) + + # Link students, teachers, assistants to courses + student_course = [] + teacher_course = [] + assistant_course = [] + + courses = list( + map(lambda x: x[0], + cursor.execute("SELECT id FROM api_course").fetchall() + ) + ) + + students = list( + map(lambda x: x[0], + cursor.execute("SELECT user_ptr_id FROM api_student").fetchall() + ) + ) + + teachers = list( + map(lambda x: x[0], + cursor.execute("SELECT user_ptr_id FROM api_teacher").fetchall() + ) + ) + + assistants = list( + map(lambda x: x[0], + cursor.execute("SELECT user_ptr_id FROM api_assistant").fetchall() + ) + ) + + for course in courses: + num_students = min(len(students), randint(min_students, max_students)) + num_assistants = min(len(assistants), randint(min_assistants, max_assistants)) + num_teachers = min(len(teachers), randint(min_teachers, max_teachers)) + + chosen_students = sample(students, k=num_students) + students = [student for student in students if student not in chosen_students] + student_course.extend( + zip([course] * num_students, chosen_students) + ) + + chosen_assistants = sample(assistants, k=num_assistants) + assistants = [assistant for assistant in assistants if assistant not in chosen_assistants] + assistant_course.extend( + zip([course] * num_assistants, chosen_assistants) + ) + + chosen_teachers = sample(teachers, k=num_teachers) + teachers = [teacher for teacher in teachers if teacher not in chosen_teachers] + teacher_course.extend( + zip([course] * num_teachers, chosen_teachers) + ) + + cursor.executemany( + "INSERT INTO api_student_courses(course_id, student_id) VALUES (?, ?)", student_course + ) + + cursor.executemany( + "INSERT INTO api_teacher_courses(course_id, teacher_id) VALUES (?, ?)", teacher_course + ) + + cursor.executemany( + "INSERT INTO api_assistant_courses(course_id, assistant_id) VALUES (?, ?)", assistant_course + ) + + +@timer +def seed_projects( + faker, + count: int = 1_500, + min_start_date_dev=-100, + max_start_date_dev=100, + min_deadline_dev=1, + max_deadline_dev=100, + visible_prob=80, + archived_prob=10, + score_visible_prob=30, + locked_groups_prob=30, + min_max_score=1, + max_max_score=100, + min_group_size=1, + max_group_size=15 +) -> None: + """Seed projects into the database""" + with connection.cursor() as cursor: + # Fetch existing courses. + courses = list( + map(lambda x: x[0], cursor.execute("SELECT id FROM api_course").fetchall()) + ) + + # Create projects + projects = [ + [ + faker.catch_phrase(), + faker.paragraph(), + timezone.now() + timezone.timedelta( + days=faker.random_int(min=min_start_date_dev, max=max_start_date_dev) + ), + timezone.now() + timezone.timedelta( + days=faker.random_int(min=min_deadline_dev, max=max_deadline_dev) + ), + faker.random_int(min=min_max_score, max=max_max_score), + faker.random_int(min=min_group_size, max=max_group_size), + faker.boolean(chance_of_getting_true=score_visible_prob), + faker.boolean(chance_of_getting_true=locked_groups_prob), + faker.boolean(chance_of_getting_true=visible_prob), + faker.boolean(chance_of_getting_true=archived_prob), + choice(courses) + ] for _ in range(count) + ] + + # Insert projects + cursor.executemany( + "INSERT INTO api_project(name, description, start_date, deadline, " + "max_score, group_size, score_visible, locked_groups, visible, archived, course_id)" + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + projects + ) + + +@timer +def seed_groups(faker, count: int = 4_000, min_score: int = 0) -> None: + """Seed groups into the database""" + with connection.cursor() as cursor: + # Fetch existing projects. + projects = list( + cursor.execute("SELECT id, max_score FROM api_project").fetchall() + ) + + # Create groups + groups = [] + + for _ in range(count): + project = choice(projects) + + groups.append([ + faker.random_int(min=min_score, max=project[1]), project[0] + ]) + + # Insert groups + cursor.executemany( + "INSERT INTO api_group(score, project_id)" + " VALUES (?, ?)", + groups + ) + + # Add students to groups + student_group = [] + + groups = list( + map(lambda x: x[0], cursor.execute("SELECT id FROM api_group").fetchall()) + ) + + students = list( + map(lambda x: x[0], cursor.execute("SELECT user_ptr_id FROM api_student").fetchall()) + ) + + for group in groups: + num_students = min(len(students), randint(0, 6)) + + chosen_students = sample(students, k=num_students) + students = [student for student in students if student not in chosen_students] + student_group.extend( + zip([group] * num_students, chosen_students) + ) + + cursor.executemany( + "INSERT INTO api_group_students(group_id, student_id) VALUES (?, ?)", student_group + ) + + +@timer +def seed_submissions(faker, count: int = 4_000, struct_check_passed_prob: float = 70): + """Seed submissions into the database""" + with connection.cursor() as cursor: + # Fetch existing groups. + groups = list( + map(lambda group: group[0], cursor.execute("SELECT id FROM api_group").fetchall()) + ) + + # Create submissions + submissions = [ + [faker.date_this_month(), faker.boolean(chance_of_getting_true=struct_check_passed_prob), choice(groups)] + for _ in range(count) + ] + + # Insert submissions + cursor.executemany( + "INSERT INTO api_submission(submission_time, structure_checks_passed, group_id) VALUES (?, ?, ?)", + submissions + ) diff --git a/backend/ypovoli/settings.py b/backend/ypovoli/settings.py index f136aad2..9dad0656 100644 --- a/backend/ypovoli/settings.py +++ b/backend/ypovoli/settings.py @@ -45,9 +45,10 @@ "rest_framework", # Django rest framework "drf_yasg", # Yet Another Swagger generator "sslserver", # Used for local SSL support (needed by CAS) + "django_seed", "authentication", # Ypovoli authentication "api", # Ypovoli logic of the base application - "notifications", # Ypovoli notifications + "notifications", # Ypovoli notifications, ] MIDDLEWARE = [