Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Seeder improvements #290

Merged
merged 15 commits into from
Apr 10, 2024
2 changes: 1 addition & 1 deletion backend/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.venv
.idea
staticfiles
db.sqlite3
db.sqlite3*
__pycache__
*.mo
141 changes: 141 additions & 0 deletions backend/api/management/commands/seed_db_new.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
from random import choices, randint
from django.core.management.base import BaseCommand
from authentication.models import User
from api.seeders.faker import faker
from api.seeders.seeder import seed_students, seed_assistants, seed_teachers, seed_courses, seed_projects, seed_groups, \
seed_submissions
from api.models.course import Course
from api.models.group import Group
from api.models.project import Project
from api.models.teacher import Teacher
from api.models.student import Student
from api.models.assistant import Assistant
from api.models.submission import Submission

import time


def _seed_users(num: int = 500, student_prob: int = 70, assistant_prob: int = 20, teacher_prob: int = 10):
fake = faker()
User.objects.all().delete()

users: list[User] = User.objects.bulk_create(
[fake.fake_user(fake, id) for id in range(num)]
)

roles = choices(['student'] * student_prob + ['assistant'] * assistant_prob + ['teacher'] * teacher_prob, k=num)

for user, role in zip(users, roles):
if role == 'assistant':
Assistant.create(user)
elif role == 'teacher':
Teacher.create(user)
elif role == 'student':
Student.create(user, student_id=user.id)

user.faculties.add(
*[fake.real_faculty() for _ in range(randint(1, 2))]
)


def _seed_courses(num: int = 200):
fake = faker()
Course.objects.all().delete()

courses: list[Course] = Course.objects.bulk_create(
[fake.fake_course(fake) for _ in range(num)]
)

for course in courses:
course.students.add(
*[fake.real_student() for _ in range(randint(10, 100))]
)
course.teachers.add(
*[fake.real_teacher() for _ in range(randint(1, 3))]
)
course.assistants.add(
*[fake.real_assistant() for _ in range(randint(2, 5))]
)


def _seed_projects(num: int = 1_000):
fake = faker()
Project.objects.all().delete()

Project.objects.bulk_create(
[fake.fake_project(fake) for _ in range(num)]
)


def _seed_groups(num: int = 3_000):
fake = faker()
Group.objects.all().delete()

groups = Group.objects.bulk_create(
[fake.fake_group(fake) for _ in range(num)]
)

for group in groups:
group.students.add(
*[fake.real_student() for _ in range(randint(1, group.project.group_size))]
)


def format_time(execution_time):
if execution_time < 1:
return f"{execution_time * 1000:.2f} milliseconds"
elif execution_time < 60:
return f"{execution_time:.2f} seconds"
elif execution_time < 3600:
return f"{execution_time / 60:.2f} minutes"
else:
return f"{execution_time / 3600:.2f} hours"


class Command(BaseCommand):
help = 'seed the db with data'

amount_of_students = 50_000
amount_of_assistants = 5_000
amount_of_teachers = 1_500
amount_of_courses = 1_500
amount_of_projects = 3_000
amount_of_groups = 3_000
amount_of_submissions = 3_000

def handle(self, *args, **options):
# Reset DB
User.objects.all().delete()
Student.objects.all().delete()
Assistant.objects.all().delete()
Teacher.objects.all().delete()
Course.objects.all().delete()
Project.objects.all().delete()
Submission.objects.all().delete()

# Seed students
fake = faker()
start_time = time.time()
seed_students(fake, self.amount_of_students, 0)

# Seed assistants
seed_assistants(fake, self.amount_of_assistants, self.amount_of_students)

# Seed teachers
seed_teachers(fake, self.amount_of_teachers, self.amount_of_students + self.amount_of_assistants)

# Seed courses
seed_courses(faker(), self.amount_of_courses)

# Seed projects
seed_projects(faker(), self.amount_of_projects)

# Seed groups
seed_groups(faker(), self.amount_of_groups)

# Seed submissions
seed_submissions(faker(), self.amount_of_submissions)

end_time = time.time()
execution_time = end_time - start_time
self.stdout.write(self.style.SUCCESS(f"Successfully seeded db in {format_time(execution_time)}!"))
24 changes: 24 additions & 0 deletions backend/api/seeders/faker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from faker import Faker
from api.seeders.providers.real_provider import real_group_provider, real_course_provider, real_faculty_provider, \
real_file_extension_provider, real_structure_check_provider, real_project_provider, real_student_provider, \
real_teacher_provider, real_assistant_provider, real_submission_provider
from api.seeders.providers.fake_provider import ModelProvider


def faker():
fake = Faker()

fake.add_provider(real_faculty_provider())
fake.add_provider(real_student_provider())
fake.add_provider(real_assistant_provider())
fake.add_provider(real_teacher_provider())
fake.add_provider(real_course_provider())
fake.add_provider(real_project_provider())
fake.add_provider(real_group_provider())
fake.add_provider(real_submission_provider())
fake.add_provider(real_file_extension_provider())
fake.add_provider(real_structure_check_provider())

fake.add_provider(ModelProvider)

return fake
Empty file.
86 changes: 86 additions & 0 deletions backend/api/seeders/providers/fake_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from faker.providers import BaseProvider
from django.utils import timezone
from api.models.course import Course
from api.models.project import Project
from api.models.group import Group
from authentication.models import User


class ModelProvider(BaseProvider):
min_salt = 1
max_salt = 100_000

def fake_username(self, first_name: str, last_name: str) -> str:
"""Fake username for users"""
rand = self.random_int(min=self.min_salt, max=self.max_salt)
return f"{first_name.lower()}{last_name.lower()}{rand}"[:12]

def fake_user(self, fake, id: int, staff_probability: float = 0.001) -> User:
"""Create a fake user"""
return User(
id=id,
first_name=fake.first_name(),
last_name=fake.last_name(),
username=fake.unique.user_name(),
email=fake.unique.email(),
create_time=timezone.now(),
last_enrolled=timezone.now().year,
is_staff=fake.boolean(chance_of_getting_true=staff_probability)
)

def fake_course(self, fake, min_year=2022, max_year=2025) -> Course:
"""Create a fake course"""
course = Course(
name=fake.catch_phrase(),
academic_startyear=fake.random_int(min=min_year, max=max_year),
faculty=fake.real_faculty(),
description=fake.paragraph()
)

return course

def fake_project(self,
fake,
min_start_date_dev=-100,
max_start_date_dev=100,
min_deadline_dev=1,
max_deadline_dev=100,
visible_prob=80,
archived_prob=10,
score_visible_prob=30,
locked_groups_prob=30,
min_max_score=1,
max_max_score=100,
min_group_size=1,
max_group_size=15):
"""Create a fake project"""
start_date = timezone.now() + timezone.timedelta(
days=fake.random_int(min=min_start_date_dev, max=max_start_date_dev)
)

course = fake.real_course()

return Project(
name=fake.catch_phrase(),
description=fake.paragraph(),
visible=fake.boolean(chance_of_getting_true=visible_prob),
archived=fake.boolean(chance_of_getting_true=archived_prob),
score_visible=fake.boolean(chance_of_getting_true=score_visible_prob),
locked_groups=fake.boolean(chance_of_getting_true=locked_groups_prob),
deadline=start_date + timezone.timedelta(days=fake.random_int(min=min_deadline_dev, max=max_deadline_dev)),
course=course,
start_date=start_date,
max_score=fake.random_int(min=min_max_score, max=max_max_score),
group_size=fake.random_int(min=min_group_size, max=max_group_size)
)

def fake_group(self, fake, min_score: int = 0):
"""Create a fake group"""
project: Project = fake.real_project()

group: Group = Group(
project=fake.real_project(),
score=fake.random_int(min=min_score, max=project.max_score)
)

return group
80 changes: 80 additions & 0 deletions backend/api/seeders/providers/real_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from faker.providers import DynamicProvider
from authentication.models import Faculty
from api.models.student import Student
from api.models.assistant import Assistant
from api.models.teacher import Teacher
from api.models.course import Course
from api.models.group import Group
from api.models.project import Project
from api.models.submission import Submission
from api.models.checks import FileExtension, StructureCheck


def real_faculty_provider():
return DynamicProvider(
provider_name="real_faculty",
elements=Faculty.objects.all(),
)


def real_student_provider():
return DynamicProvider(
provider_name="real_student",
elements=Student.objects.all(),
)


def real_assistant_provider():
return DynamicProvider(
provider_name="real_assistant",
elements=Assistant.objects.all(),
)


def real_teacher_provider():
DynamicProvider(
provider_name="real_teacher",
elements=Teacher.objects.all(),
)


def real_course_provider():
DynamicProvider(
provider_name="real_course",
elements=Course.objects.all(),
)


def real_project_provider():
DynamicProvider(
provider_name="real_project",
elements=Project.objects.all(),
)


def real_group_provider():
DynamicProvider(
provider_name="real_group",
elements=Group.objects.all(),
)


def real_submission_provider():
DynamicProvider(
provider_name="real_submission",
elements=Submission.objects.all(),
)


def real_file_extension_provider():
DynamicProvider(
provider_name="real_file_extension",
elements=FileExtension.objects.all(),
)


def real_structure_check_provider():
DynamicProvider(
provider_name="real_structure_check",
elements=StructureCheck.objects.all(),
)
Loading
Loading