Skip to content

Commit

Permalink
feat: kiwix library scraping, remove canvas seeder, make all hot relo…
Browse files Browse the repository at this point in the history
…adable

---------

Co-authored-by: calisio <calisio@wustl.edu>
Co-authored-by: jtucholski <josh@unlockedlabs.org>
  • Loading branch information
3 people committed Oct 16, 2024
1 parent 7a06003 commit d5287a3
Show file tree
Hide file tree
Showing 41 changed files with 666 additions and 3,269 deletions.
33 changes: 33 additions & 0 deletions .middleware.air.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
root = "."
tmp_dir = "tmp"

[build]
cmd = "go build -o ./bin/provider-middleware ./provider-middleware/."
bin = "bin/provider-middleware"
include_dir = ["backend", "provider-middleware"]
include_file = []
exclude_dir = ["frontend", "config", "backend/tasks"]
exclude_regex = ["_test\\.go"]
exclude_unchanged = true

log = "logs/middleware.air.log"
stop_on_error = true
send_interrupt = true
rerun_delay = 500

[log]
time = false
main_only = false

[color]
main = "magenta"
watcher = "cyan"
build = "yellow"
runner = "green"

[misc]
clean_on_exit = true

[screen]
clear_on_rebuild = true
keep_scroll = true
33 changes: 33 additions & 0 deletions .tasks.air.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
root = "."
tmp_dir = "tmp"

[build]
cmd = "go build -o ./bin/cron_tasks ./backend/tasks/."
bin = "bin/cron_tasks"
include_dir = ["backend"]
include_file = []
exclude_dir = ["frontend", "provider-middleware"]
exclude_regex = ["_test\\.go"]
exclude_unchanged = true

log = "logs/tasks.air.log"
stop_on_error = true
send_interrupt = true
rerun_delay = 500

[log]
time = false
main_only = false

[color]
main = "magenta"
watcher = "cyan"
build = "yellow"
runner = "green"

[misc]
clean_on_exit = true

[screen]
clear_on_rebuild = true
keep_scroll = true
13 changes: 13 additions & 0 deletions backend/migrations/00010_add_open_content_provider_id_to_tasks.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- +goose Up
-- +goose StatementBegin
ALTER TABLE public.runnable_tasks
ALTER COLUMN provider_platform_id DROP NOT NULL,
ADD COLUMN open_content_provider_id integer REFERENCES open_content_providers(id);
-- +goose StatementEnd

-- +goose Down
-- +goose StatementBegin
ALTER TABLE public.runnable_tasks
ALTER COLUMN provider_platform_id SET NOT NULL,
DROP COLUMN open_content_provider_id;
-- +goose StatementEnd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
-- +goose Up
-- +goose StatementBegin
ALTER TABLE public.open_content_providers RENAME COLUMN url TO base_url;
ALTER TABLE public.libraries RENAME COLUMN url TO path;
-- +goose StatementEnd

-- +goose Down
-- +goose StatementBegin
ALTER TABLE public.open_content_providers RENAME COLUMN base_url TO url;
ALTER TABLE public.libraries RENAME COLUMN path TO url;
-- +goose StatementEnd
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- +goose Up
-- +goose StatementBegin
ALTER TABLE public.libraries ALTER COLUMN language TYPE VARCHAR(512);
-- +goose StatementEnd

-- +goose Down
-- +goose StatementBegin
ALTER TABLE public.libraries ALTER COLUMN language TYPE VARCHAR(255);
-- +goose StatementEnd
22 changes: 7 additions & 15 deletions backend/seeder/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,31 +70,26 @@ func seedTestData(db *gorm.DB) {
State: models.Enabled,
AccessKey: "testing_key_replace_me",
}, {
Name: "kolibri_testing",
Name: "Kolibri",
BaseUrl: "https://kolibri.staging.unlockedlabs.xyz",
AccountID: "1234567890",
Type: models.Kolibri,
State: models.Enabled,
AccessKey: "testing_key_replace_me",
}}
for idx := range platforms {
accessKey, err := platforms[idx].EncryptAccessKey()
platforms[idx].AccessKey = accessKey
if err != nil {
log.Printf("Failed to create access key")
}
if err := db.Create(&platforms[idx]).Error; err != nil {
log.Printf("Failed to create platform: %v", err)
}
}
kiwix := models.OpenContentProvider{
Url: "unlockedlabs.org",
Name: "Kiwix",
BaseUrl: "https://library.kiwix.org",
Name: models.Kiwix,
Thumbnail: "https://images.fineartamerica.com/images/artworkimages/mediumlarge/3/llamas-wearing-party-hats-in-a-circle-looking-down-john-daniels.jpg",
CurrentlyEnabled: true,
Description: "Kiwix open content provider",
}
log.Printf("Creating Open Content Provider %s", kiwix.Url)
log.Printf("Creating Open Content Provider %s", kiwix.BaseUrl)
if err := db.Create(&kiwix).Error; err != nil {
log.Printf("Failed to create open content provider: %v", err)
}
Expand All @@ -105,32 +100,29 @@ func seedTestData(db *gorm.DB) {
Name: "TED ted connects",
Language: models.StringPtr("eng,spa,ara"),
Description: models.StringPtr("A collection of TED videos about ted connects"),
Url: "/content/ted_mul_ted-connects_2024-08",
Path: "/content/ted_mul_ted-connects_2024-08",
ImageUrl: models.StringPtr("/catalog/v2/illustration/67440563-a62b-fabe-415c-4c3ee4546f78/?size=48"),
VisibilityStatus: true,
OpenContentProvider: &kiwix,
},
{
OpenContentProviderID: kiwix.ID,
ExternalID: models.StringPtr("urn:uuid:84812c13-fa65-feb7-c206-4f22cc2e0f9a"),
Name: "Python Documentation",
Language: models.StringPtr("eng"),
Description: models.StringPtr("All documentation for Python"),
Url: "/content/docs.python.org_en_2024-09",
Path: "/content/docs.python.org_en_2024-09",
ImageUrl: models.StringPtr("/catalog/v2/illustration/84812c13-fa65-feb7-c206-4f22cc2e0f9a/?size=48"),
VisibilityStatus: true,
OpenContentProvider: &kiwix,
},
{
OpenContentProviderID: kiwix.ID,
ExternalID: models.StringPtr("urn:uuid:19e6fe12-09a9-0a38-5be4-71c0eba0a72d"),
Name: "Finiki",
Language: models.StringPtr("eng"),
Description: models.StringPtr("The Canadian financial wiki"),
Url: "/content/finiki_en_all_maxi_2024-06",
Path: "/content/finiki_en_all_maxi_2024-06",
ImageUrl: models.StringPtr("/catalog/v2/illustration/19e6fe12-09a9-0a38-5be4-71c0eba0a72d/?size=48"),
VisibilityStatus: true,
OpenContentProvider: &kiwix,
}}
for idx := range kiwixLibraries {
log.Printf("Creating library %s", kiwixLibraries[idx].Name)
Expand Down
2 changes: 1 addition & 1 deletion backend/src/database/open_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func (db *DB) ToggleContentProvider(id int) error {

func (db *DB) CreateContentProvider(url, thumbnail, description string, id int) error {
provider := models.OpenContentProvider{
Url: url,
BaseUrl: url,
Thumbnail: thumbnail,
Description: description,
}
Expand Down
5 changes: 2 additions & 3 deletions backend/src/database/provider_platforms.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ func (db *DB) GetAllProviderPlatforms(page, perPage int) (int64, []models.Provid
Offset(offset).Limit(perPage).Find(&platforms).Error; err != nil {
return 0, nil, newGetRecordsDBError(err, "provider_platforms")
}

toReturn := iterMap(func(prov models.ProviderPlatform) models.ProviderPlatform {
if prov.OidcClient != nil {
prov.OidcID = prov.OidcClient.ID
Expand Down Expand Up @@ -68,11 +67,11 @@ func (db *DB) CreateProviderPlatform(platform *models.ProviderPlatform) (*models
}
if platform.Type == models.Kolibri {
contentProv := models.OpenContentProvider{
Url: platform.BaseUrl,
BaseUrl: platform.BaseUrl,
ProviderPlatformID: &platform.ID,
CurrentlyEnabled: true,
Description: models.KolibriDescription,
Thumbnail: "https://learningequality.org/static/assets/kolibri-ecosystem-logos/blob-logo.svg",
Thumbnail: models.KolibriThumbnailUrl,
}
if err := db.Create(&contentProv).Error; err != nil {
log.Errorln("unable to create relevant content provider for new kolibri instance")
Expand Down
16 changes: 0 additions & 16 deletions backend/src/handlers/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ func (srv *Server) registerAuthRoutes() {
srv.Mux.Handle("POST /api/reset-password", srv.applyMiddleware(srv.handleResetPassword))
/* only use auth middleware, user activity bloats the database + results */
srv.Mux.Handle("GET /api/auth", srv.applyMiddleware(srv.handleCheckAuth))
srv.Mux.Handle("PUT /api/admin/facility-context/{id}", srv.applyAdminMiddleware(srv.handleChangeAdminFacility))
}

func (claims *Claims) getTraits() map[string]interface{} {
Expand Down Expand Up @@ -80,21 +79,6 @@ func (s *Server) authMiddleware(next http.Handler) http.Handler {
})
}

func (srv *Server) handleChangeAdminFacility(w http.ResponseWriter, r *http.Request, log sLog) error {
id, err := strconv.Atoi(r.PathValue("id"))
if err != nil {
return newInvalidIdServiceError(err, "facility ID")
}
claims := r.Context().Value(ClaimsKey).(*Claims)
claims.FacilityID = uint(id)
if err := srv.updateUserTraitsInKratos(claims); err != nil {
log.add("facilityId", id)
return newInternalServerServiceError(err, "error updating user traits in kratos")
}
w.WriteHeader(http.StatusOK)
return nil
}

func (s *Server) clearKratosCookies(w http.ResponseWriter, r *http.Request) {
cookies := r.Cookies()
for _, cookie := range cookies {
Expand Down
16 changes: 16 additions & 0 deletions backend/src/handlers/facilities_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ func (srv *Server) registerFacilitiesRoutes() {
srv.Mux.Handle("POST /api/facilities", srv.applyAdminMiddleware(srv.handleCreateFacility))
srv.Mux.Handle("DELETE /api/facilities/{id}", srv.applyAdminMiddleware(srv.handleDeleteFacility))
srv.Mux.Handle("PATCH /api/facilities/{id}", srv.applyAdminMiddleware(srv.handleUpdateFacility))
srv.Mux.Handle("PUT /api/admin/facility-context/{id}", srv.applyAdminMiddleware(srv.handleChangeAdminFacility))
}

func (srv *Server) handleIndexFacilities(w http.ResponseWriter, r *http.Request, log sLog) error {
Expand All @@ -37,6 +38,21 @@ func (srv *Server) handleShowFacility(w http.ResponseWriter, r *http.Request, lo
return writeJsonResponse(w, http.StatusOK, facility)
}

func (srv *Server) handleChangeAdminFacility(w http.ResponseWriter, r *http.Request, log sLog) error {
id, err := strconv.Atoi(r.PathValue("id"))
if err != nil {
return newInvalidIdServiceError(err, "facility ID")
}
claims := r.Context().Value(ClaimsKey).(*Claims)
claims.FacilityID = uint(id)
if err := srv.updateUserTraitsInKratos(claims); err != nil {
log.add("facilityId", id)
return newInternalServerServiceError(err, "error updating user traits in kratos")
}
w.WriteHeader(http.StatusOK)
return nil
}

func (srv *Server) handleCreateFacility(w http.ResponseWriter, r *http.Request, log sLog) error {
var facility models.Facility
err := json.NewDecoder(r.Body).Decode(&facility)
Expand Down
57 changes: 41 additions & 16 deletions backend/src/models/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,28 @@ type (

func (CronJob) TableName() string { return "cron_jobs" }

func (cj *CronJob) BeforeCreate(tx *gorm.DB) error {
if len(cj.ID) == 0 {
cj.ID = uuid.NewString()
}
if len(cj.Schedule) == 0 {
cj.Schedule = os.Getenv("MIDDLEWARE_CRON_SCHEDULE")
}
return nil
}

type RunnableTask struct {
ID uint `gorm:"primaryKey" json:"id"`
JobID string `gorm:"size 50" json:"job_id"`
Parameters map[string]interface{} `gorm:"-" json:"parameters"`
LastRun time.Time `json:"last_run"`
ProviderPlatformID uint `json:"provider_platform_id"`
Status JobStatus `json:"status"`
ID uint `gorm:"primaryKey" json:"id"`
JobID string `gorm:"size 50" json:"job_id"`
Parameters map[string]interface{} `gorm:"-" json:"parameters"`
LastRun time.Time `json:"last_run"`
ProviderPlatformID *uint `json:"provider_platform_id"`
OpenContentProviderID *uint `json:"open_content_provider_id"`
Status JobStatus `json:"status"`

Provider *ProviderPlatform `gorm:"foreignKey:ProviderPlatformID" json:"-"`
Job *CronJob `gorm:"foreignKey:JobID" json:"-"`
Provider *ProviderPlatform `gorm:"foreignKey:ProviderPlatformID" json:"-"`
ContentProvider *OpenContentProvider `gorm:"foreignKey:OpenContentProviderID" json:"-"`
Job *CronJob `gorm:"foreignKey:JobID" json:"-"`
}

func (RunnableTask) TableName() string { return "runnable_tasks" }
Expand All @@ -45,19 +57,28 @@ const (
GetActivityJob JobType = "get_activity"
// GetOutcomesJob JobType = "get_outcomes"

ScrapeKiwixJob JobType = "scrape_kiwix"

StatusPending JobStatus = "pending"
StatusRunning JobStatus = "running"
)

func (jt JobType) GetParams(db *gorm.DB, provId uint) (map[string]interface{}, error) {
// provider id can be nil pointer
func (jt JobType) GetParams(db *gorm.DB, provId *uint, jobId string) (map[string]interface{}, error) {
var skip bool
if jt == ScrapeKiwixJob {
return map[string]interface{}{
"open_content_provider_id": *provId,
"job_id": jobId,
}, nil
}
users := []map[string]interface{}{}
if err := db.Model(ProviderUserMapping{}).Select("user_id, external_user_id").Find(&users, "provider_platform_id = ?", provId).Error; err != nil {
if err := db.Model(ProviderUserMapping{}).Select("user_id, external_user_id").Joins("JOIN users u on provider_user_mappings.user_id = u.id").Find(&users, "provider_platform_id = ? AND u.role = 'student'", *provId).Error; err != nil {
log.Errorf("failed to fetch users: %v", err)
skip = true
}
courses := []map[string]interface{}{}
if err := db.Model(Course{}).Select("id as course_id, external_id as external_course_id").Find(&courses, "provider_platform_id = ?", provId).Error; err != nil {
if err := db.Model(Course{}).Select("id as course_id, external_id as external_course_id").Find(&courses, "provider_platform_id = ?", *provId).Error; err != nil {
log.Errorf("failed to fetch courses: %v", err)
skip = true
}
Expand All @@ -69,39 +90,43 @@ func (jt JobType) GetParams(db *gorm.DB, provId uint) (map[string]interface{}, e
return map[string]interface{}{
"user_mappings": users,
"courses": courses,
"provider_platform_id": provId,
"provider_platform_id": *provId,
"job_type": jt,
"job_id": jobId,
}, nil
case GetCoursesJob:
return map[string]interface{}{
"provider_platform_id": provId,
"job_type": jt,
"job_id": jobId,
}, nil
case GetActivityJob:
if skip {
return nil, errors.New("no users or courses found for provider platform")
}
return map[string]interface{}{
"provider_platform_id": provId,
"provider_platform_id": *provId,
"courses": courses,
"user_mappings": users,
"job_type": jt,
"job_id": jobId,
}, nil
// case GetOutcomesJob:
// if skip {
// return nil, errors.New("no users or courses found for provider platform")
// }
// return map[string]interface{}{
// "provider_platform_id": provId,
// "provider_platform_id": *provId,
// "user_mappings": users,
// "courses": courses,
// "job_type": jt,
// }, nil
}
return nil, nil
return nil, errors.New("job type not found")
}

var AllDefaultJobs = []JobType{GetCoursesJob, GetMilestonesJob, GetActivityJob /* GetOutcomesJob */}
var AllDefaultProviderJobs = []JobType{GetCoursesJob, GetMilestonesJob, GetActivityJob /* GetOutcomesJob */}
var AllOtherJobs = []JobType{ScrapeKiwixJob}

func NewCronJob(name JobType) *CronJob {
return &CronJob{
Expand Down
2 changes: 1 addition & 1 deletion backend/src/models/library.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ type Library struct {
Name string `gorm:"size:255;not null" json:"name"`
Language *string `gorm:"size:255" json:"language"`
Description *string `json:"description"`
Url string `gorm:"not null" json:"url"`
Path string `gorm:"not null" json:"url"`
ImageUrl *string `json:"image_url"`
VisibilityStatus bool `gorm:"default:false;not null" json:"visibility_status"`

Expand Down
Loading

0 comments on commit d5287a3

Please sign in to comment.