Skip to content

Commit

Permalink
Merge pull request #290 from SELab-2/seeder-improvements
Browse files Browse the repository at this point in the history
Seeder improvements
  • Loading branch information
tyboro2002 authored Apr 10, 2024
2 parents 615ad75 + 530ac65 commit be64b5a
Show file tree
Hide file tree
Showing 8 changed files with 668 additions and 2 deletions.
2 changes: 1 addition & 1 deletion backend/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.venv
.idea
staticfiles
db.sqlite3
db.sqlite3*
__pycache__
*.mo
141 changes: 141 additions & 0 deletions backend/api/management/commands/seed_db_new.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
from random import choices, randint
from django.core.management.base import BaseCommand
from authentication.models import User
from api.seeders.faker import faker
from api.seeders.seeder import seed_students, seed_assistants, seed_teachers, seed_courses, seed_projects, seed_groups, \
seed_submissions
from api.models.course import Course
from api.models.group import Group
from api.models.project import Project
from api.models.teacher import Teacher
from api.models.student import Student
from api.models.assistant import Assistant
from api.models.submission import Submission

import time


def _seed_users(num: int = 500, student_prob: int = 70, assistant_prob: int = 20, teacher_prob: int = 10):
fake = faker()
User.objects.all().delete()

users: list[User] = User.objects.bulk_create(
[fake.fake_user(fake, id) for id in range(num)]
)

roles = choices(['student'] * student_prob + ['assistant'] * assistant_prob + ['teacher'] * teacher_prob, k=num)

for user, role in zip(users, roles):
if role == 'assistant':
Assistant.create(user)
elif role == 'teacher':
Teacher.create(user)
elif role == 'student':
Student.create(user, student_id=user.id)

user.faculties.add(
*[fake.real_faculty() for _ in range(randint(1, 2))]
)


def _seed_courses(num: int = 200):
fake = faker()
Course.objects.all().delete()

courses: list[Course] = Course.objects.bulk_create(
[fake.fake_course(fake) for _ in range(num)]
)

for course in courses:
course.students.add(
*[fake.real_student() for _ in range(randint(10, 100))]
)
course.teachers.add(
*[fake.real_teacher() for _ in range(randint(1, 3))]
)
course.assistants.add(
*[fake.real_assistant() for _ in range(randint(2, 5))]
)


def _seed_projects(num: int = 1_000):
fake = faker()
Project.objects.all().delete()

Project.objects.bulk_create(
[fake.fake_project(fake) for _ in range(num)]
)


def _seed_groups(num: int = 3_000):
fake = faker()
Group.objects.all().delete()

groups = Group.objects.bulk_create(
[fake.fake_group(fake) for _ in range(num)]
)

for group in groups:
group.students.add(
*[fake.real_student() for _ in range(randint(1, group.project.group_size))]
)


def format_time(execution_time):
if execution_time < 1:
return f"{execution_time * 1000:.2f} milliseconds"
elif execution_time < 60:
return f"{execution_time:.2f} seconds"
elif execution_time < 3600:
return f"{execution_time / 60:.2f} minutes"
else:
return f"{execution_time / 3600:.2f} hours"


class Command(BaseCommand):
help = 'seed the db with data'

amount_of_students = 50_000
amount_of_assistants = 5_000
amount_of_teachers = 1_500
amount_of_courses = 1_500
amount_of_projects = 3_000
amount_of_groups = 3_000
amount_of_submissions = 3_000

def handle(self, *args, **options):
# Reset DB
User.objects.all().delete()
Student.objects.all().delete()
Assistant.objects.all().delete()
Teacher.objects.all().delete()
Course.objects.all().delete()
Project.objects.all().delete()
Submission.objects.all().delete()

# Seed students
fake = faker()
start_time = time.time()
seed_students(fake, self.amount_of_students, 0)

# Seed assistants
seed_assistants(fake, self.amount_of_assistants, self.amount_of_students)

# Seed teachers
seed_teachers(fake, self.amount_of_teachers, self.amount_of_students + self.amount_of_assistants)

# Seed courses
seed_courses(faker(), self.amount_of_courses)

# Seed projects
seed_projects(faker(), self.amount_of_projects)

# Seed groups
seed_groups(faker(), self.amount_of_groups)

# Seed submissions
seed_submissions(faker(), self.amount_of_submissions)

end_time = time.time()
execution_time = end_time - start_time
self.stdout.write(self.style.SUCCESS(f"Successfully seeded db in {format_time(execution_time)}!"))
24 changes: 24 additions & 0 deletions backend/api/seeders/faker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from faker import Faker
from api.seeders.providers.real_provider import real_group_provider, real_course_provider, real_faculty_provider, \
real_file_extension_provider, real_structure_check_provider, real_project_provider, real_student_provider, \
real_teacher_provider, real_assistant_provider, real_submission_provider
from api.seeders.providers.fake_provider import ModelProvider


def faker():
fake = Faker()

fake.add_provider(real_faculty_provider())
fake.add_provider(real_student_provider())
fake.add_provider(real_assistant_provider())
fake.add_provider(real_teacher_provider())
fake.add_provider(real_course_provider())
fake.add_provider(real_project_provider())
fake.add_provider(real_group_provider())
fake.add_provider(real_submission_provider())
fake.add_provider(real_file_extension_provider())
fake.add_provider(real_structure_check_provider())

fake.add_provider(ModelProvider)

return fake
Empty file.
86 changes: 86 additions & 0 deletions backend/api/seeders/providers/fake_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from faker.providers import BaseProvider
from django.utils import timezone
from api.models.course import Course
from api.models.project import Project
from api.models.group import Group
from authentication.models import User


class ModelProvider(BaseProvider):
min_salt = 1
max_salt = 100_000

def fake_username(self, first_name: str, last_name: str) -> str:
"""Fake username for users"""
rand = self.random_int(min=self.min_salt, max=self.max_salt)
return f"{first_name.lower()}{last_name.lower()}{rand}"[:12]

def fake_user(self, fake, id: int, staff_probability: float = 0.001) -> User:
"""Create a fake user"""
return User(
id=id,
first_name=fake.first_name(),
last_name=fake.last_name(),
username=fake.unique.user_name(),
email=fake.unique.email(),
create_time=timezone.now(),
last_enrolled=timezone.now().year,
is_staff=fake.boolean(chance_of_getting_true=staff_probability)
)

def fake_course(self, fake, min_year=2022, max_year=2025) -> Course:
"""Create a fake course"""
course = Course(
name=fake.catch_phrase(),
academic_startyear=fake.random_int(min=min_year, max=max_year),
faculty=fake.real_faculty(),
description=fake.paragraph()
)

return course

def fake_project(self,
fake,
min_start_date_dev=-100,
max_start_date_dev=100,
min_deadline_dev=1,
max_deadline_dev=100,
visible_prob=80,
archived_prob=10,
score_visible_prob=30,
locked_groups_prob=30,
min_max_score=1,
max_max_score=100,
min_group_size=1,
max_group_size=15):
"""Create a fake project"""
start_date = timezone.now() + timezone.timedelta(
days=fake.random_int(min=min_start_date_dev, max=max_start_date_dev)
)

course = fake.real_course()

return Project(
name=fake.catch_phrase(),
description=fake.paragraph(),
visible=fake.boolean(chance_of_getting_true=visible_prob),
archived=fake.boolean(chance_of_getting_true=archived_prob),
score_visible=fake.boolean(chance_of_getting_true=score_visible_prob),
locked_groups=fake.boolean(chance_of_getting_true=locked_groups_prob),
deadline=start_date + timezone.timedelta(days=fake.random_int(min=min_deadline_dev, max=max_deadline_dev)),
course=course,
start_date=start_date,
max_score=fake.random_int(min=min_max_score, max=max_max_score),
group_size=fake.random_int(min=min_group_size, max=max_group_size)
)

def fake_group(self, fake, min_score: int = 0):
"""Create a fake group"""
project: Project = fake.real_project()

group: Group = Group(
project=fake.real_project(),
score=fake.random_int(min=min_score, max=project.max_score)
)

return group
80 changes: 80 additions & 0 deletions backend/api/seeders/providers/real_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from faker.providers import DynamicProvider
from authentication.models import Faculty
from api.models.student import Student
from api.models.assistant import Assistant
from api.models.teacher import Teacher
from api.models.course import Course
from api.models.group import Group
from api.models.project import Project
from api.models.submission import Submission
from api.models.checks import FileExtension, StructureCheck


def real_faculty_provider():
return DynamicProvider(
provider_name="real_faculty",
elements=Faculty.objects.all(),
)


def real_student_provider():
return DynamicProvider(
provider_name="real_student",
elements=Student.objects.all(),
)


def real_assistant_provider():
return DynamicProvider(
provider_name="real_assistant",
elements=Assistant.objects.all(),
)


def real_teacher_provider():
DynamicProvider(
provider_name="real_teacher",
elements=Teacher.objects.all(),
)


def real_course_provider():
DynamicProvider(
provider_name="real_course",
elements=Course.objects.all(),
)


def real_project_provider():
DynamicProvider(
provider_name="real_project",
elements=Project.objects.all(),
)


def real_group_provider():
DynamicProvider(
provider_name="real_group",
elements=Group.objects.all(),
)


def real_submission_provider():
DynamicProvider(
provider_name="real_submission",
elements=Submission.objects.all(),
)


def real_file_extension_provider():
DynamicProvider(
provider_name="real_file_extension",
elements=FileExtension.objects.all(),
)


def real_structure_check_provider():
DynamicProvider(
provider_name="real_structure_check",
elements=StructureCheck.objects.all(),
)
Loading

0 comments on commit be64b5a

Please sign in to comment.