Skip to content

Commit

Permalink
add function for getting page content and unit test for it
Browse files Browse the repository at this point in the history
  • Loading branch information
vlaszdunov committed Jun 4, 2024
1 parent ec46770 commit da2eea6
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#byte-compiled
__pycache__

# Environment
.venv
.env

# ide-staff
.vscode
Empty file added src/dockerfile_creator.py
Empty file.
3 changes: 3 additions & 0 deletions src/request_header.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
}
1 change: 1 addition & 0 deletions src/parse.py → src/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
response = requests.get(URL, request_headers).text

page_data = bs(response, 'lxml')
page_data.con
actual_versions_raw_list = page_data.find_all(
'span', attrs={'class': 'release-version'})

Expand Down
17 changes: 17 additions & 0 deletions src/version_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json
from pathlib import Path

import requests
from bs4 import BeautifulSoup as bs

BASE_URL = 'https://www.python.org/'
headers = json.loads(Path('src/request_header.json').read_bytes())


def get_page_content(internal_page: str = '') -> bs:
page_content = requests.get(BASE_URL+internal_page, headers=headers).text
return bs(page_content, 'lxml')


def get_active_versions(versions_page_internal_url: str):
pass
Empty file.
9 changes: 9 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import pytest
from src.version_parser import *

BASE_URL = 'https://www.python.org/'


def test_get_page_content():
page_content = get_page_content(BASE_URL)
assert len(page_content.contents) != 0

0 comments on commit da2eea6

Please sign in to comment.