From 85c2a2c383995bfb287e299d8f79c364a16ea1b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Palma?= Date: Sat, 24 Aug 2024 15:32:22 +0100 Subject: [PATCH] feat: scrape teacher sigarra url --- src/scrapper/database/dbs/create_db_sqlite3.sql | 16 +++++++++++++--- src/scrapper/items.py | 5 +++++ src/scrapper/spiders/slot_spider.py | 13 ++++++++++--- src/scripts/dump/schema/create_db_sqlite3.sql | 10 +++++++++- src/scripts/dump/schema/schema_mysql.sql | 10 +++++++++- 5 files changed, 46 insertions(+), 8 deletions(-) diff --git a/src/scrapper/database/dbs/create_db_sqlite3.sql b/src/scrapper/database/dbs/create_db_sqlite3.sql index fd70f35..a6ccd6f 100644 --- a/src/scrapper/database/dbs/create_db_sqlite3.sql +++ b/src/scrapper/database/dbs/create_db_sqlite3.sql @@ -112,18 +112,27 @@ CREATE TABLE `slot_class` ( PRIMARY KEY (`slot_id`, `class_id`) ); +CREATE TABLE `professor_link` ( + `id` INTEGER PRIMARY KEY, + `link` varchar(256) +) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci; + -- -------------------------------------------------------- + -- --- Table structure for table `class_professor` +-- Table structure for table `schedule_professor` -- CREATE TABLE `slot_professor` ( `slot_id` INTEGER NOT NULL, `professor_id` INTEGER NOT NULL, + `professor_link_id` INTEGER NOT NULL, FOREIGN KEY (`slot_id`) REFERENCES `slot` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, FOREIGN KEY (`professor_id`) REFERENCES `professor` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (`professor_link_id`) REFERENCES `professor_link` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, PRIMARY KEY (`slot_id`, `professor_id`) -); +) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci; + -- -------------------------------------------------------- -- @@ -133,7 +142,8 @@ CREATE TABLE `slot_professor` ( CREATE TABLE `professor` ( `id` INTEGER PRIMARY KEY, `professor_acronym` varchar(16), - `professor_name` varchar(100) + `professor_name` varchar(100), + `professor_url` varchar(128) ); -- -------------------------------------------------------- diff --git a/src/scrapper/items.py b/src/scrapper/items.py index 3e777d8..6321e10 100644 --- a/src/scrapper/items.py +++ b/src/scrapper/items.py @@ -73,9 +73,14 @@ class SlotClass(scrapy.Item): class SlotProfessor(scrapy.Item): slot_id = scrapy.Field() professor_id = scrapy.Field() + professor_link_id = scrapy.Field() +class ProfessorLink(scrapy.Item): + id = scrapy.Field() + link = scrapy.Field() class Professor(scrapy.Item): id = scrapy.Field() professor_acronym = scrapy.Field() professor_name = scrapy.Field() + professor_url = scrapy.Field() diff --git a/src/scrapper/spiders/slot_spider.py b/src/scrapper/spiders/slot_spider.py index 46ca5da..382e1bd 100644 --- a/src/scrapper/spiders/slot_spider.py +++ b/src/scrapper/spiders/slot_spider.py @@ -12,7 +12,7 @@ from scrapper.settings import CONFIG, PASSWORD, USERNAME from ..database.Database import Database -from ..items import Slot, Class, SlotProfessor, Professor, SlotClass +from ..items import Slot, Class, SlotProfessor, Professor, SlotClass, ProfessorLink def get_class_id(course_unit_id, class_name): @@ -180,7 +180,8 @@ def extractSchedule(self, response): yield Professor( id=sigarra_id, professor_acronym=teacher["acronym"], - professor_name=name + professor_name=name, + professor_url=teacher["sigarra_url"] ) for current_class in schedule["classes"]: @@ -206,9 +207,15 @@ def extractSchedule(self, response): (sigarra_id, name) = self.get_professor_info( teacher) + yield ProfessorLink( + id=schedule["id"], + professor_link=teacher["sigarra_url"] + ) + yield SlotProfessor( slot_id=schedule["id"], - professor_id=sigarra_id + professor_id=sigarra_id, + professor_link=schedule["id"] ) for current_class in schedule["classes"]: diff --git a/src/scripts/dump/schema/create_db_sqlite3.sql b/src/scripts/dump/schema/create_db_sqlite3.sql index 3fb45b5..5845cea 100644 --- a/src/scripts/dump/schema/create_db_sqlite3.sql +++ b/src/scripts/dump/schema/create_db_sqlite3.sql @@ -119,6 +119,11 @@ CREATE TABLE `slot_class` ( PRIMARY KEY (`slot_id`, `class_id`) ) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci; +CREATE TABLE `professor_link` ( + `id` INTEGER PRIMARY KEY, + `link` varchar(256) +) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci; + -- -------------------------------------------------------- -- @@ -128,8 +133,10 @@ CREATE TABLE `slot_class` ( CREATE TABLE `slot_professor` ( `slot_id` INTEGER NOT NULL, `professor_id` INTEGER NOT NULL, + `professor_link_id` INTEGER NOT NULL, FOREIGN KEY (`slot_id`) REFERENCES `slot` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, FOREIGN KEY (`professor_id`) REFERENCES `professor` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (`professor_link_id`) REFERENCES `professor_link` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, PRIMARY KEY (`slot_id`, `professor_id`) ) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci; @@ -142,7 +149,8 @@ CREATE TABLE `slot_professor` ( CREATE TABLE `professor` ( `id` INTEGER PRIMARY KEY, `professor_acronym` varchar(16), - `professor_name` varchar(100) + `professor_name` varchar(100), + `professor_url` varchar(128) ) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci; -- -------------------------------------------------------- diff --git a/src/scripts/dump/schema/schema_mysql.sql b/src/scripts/dump/schema/schema_mysql.sql index 836b5a8..e606705 100644 --- a/src/scripts/dump/schema/schema_mysql.sql +++ b/src/scripts/dump/schema/schema_mysql.sql @@ -132,7 +132,13 @@ CREATE TABLE `slot_class` ( CREATE TABLE `professor` ( `id` INTEGER PRIMARY KEY, `professor_acronym` varchar(16), - `professor_name` varchar(100) + `professor_name` varchar(100), + `professor_url` varchar(128) +) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci; + +CREATE TABLE `professor_link` ( + `id` INTEGER PRIMARY KEY, + `link` varchar(256) ) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci; -- -------------------------------------------------------- @@ -144,8 +150,10 @@ CREATE TABLE `professor` ( CREATE TABLE `slot_professor` ( `slot_id` INTEGER NOT NULL, `professor_id` INTEGER NOT NULL, + `professor_link_id` INTEGER NOT NULL, FOREIGN KEY (`slot_id`) REFERENCES `slot` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, FOREIGN KEY (`professor_id`) REFERENCES `professor` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (`professor_link_id`) REFERENCES `professor_link` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, PRIMARY KEY (`slot_id`, `professor_id`) ) ENGINE=InnoDB CHARSET = utf8 COLLATE = utf8_general_ci;