add function for getting page content and unit test for it

vlaszdunov · Jun 4, 2024 · da2eea6 · da2eea6
1 parent ec46770
commit da2eea6
Show file tree

Hide file tree

Showing 7 changed files with 34 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,9 @@
+#byte-compiled
+__pycache__
+
 # Environment
 .venv
+.env
 
 # ide-staff
 .vscode
diff --git a/src/dockerfile_creator.py b/src/dockerfile_creator.py
diff --git a/src/request_header.json b/src/request_header.json
@@ -0,0 +1,3 @@
+{
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
+}
diff --git a/src/parse.py → src/source.py b/src/parse.py → src/source.py
@@ -12,6 +12,7 @@
 response = requests.get(URL, request_headers).text
 
 page_data = bs(response, 'lxml')
+page_data.con
 actual_versions_raw_list = page_data.find_all(
     'span', attrs={'class': 'release-version'})
 

diff --git a/src/version_parser.py b/src/version_parser.py
@@ -0,0 +1,17 @@
+import json
+from pathlib import Path
+
+import requests
+from bs4 import BeautifulSoup as bs
+
+BASE_URL = 'https://www.python.org/'
+headers = json.loads(Path('src/request_header.json').read_bytes())
+
+
+def get_page_content(internal_page: str = '') -> bs:
+    page_content = requests.get(BASE_URL+internal_page, headers=headers).text
+    return bs(page_content, 'lxml')
+
+
+def get_active_versions(versions_page_internal_url: str):
+    pass
diff --git a/tests/test_dockerfile_creator.py b/tests/test_dockerfile_creator.py
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -0,0 +1,9 @@
+import pytest
+from src.version_parser import *
+
+BASE_URL = 'https://www.python.org/'
+
+
+def test_get_page_content():
+    page_content = get_page_content(BASE_URL)
+    assert len(page_content.contents) != 0