-
Notifications
You must be signed in to change notification settings - Fork 1
/
datasets.py
68 lines (55 loc) · 1.82 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import json
import requests
def flatten(*, list_):
return [item.strip() for sublist in list_ for item in sublist]
class Builder:
def headers(self, *, cookies):
return {
'cookie': '; '.join([f'{k}={v}' for k, v in cookies.items()]),
'x-xsrf-token': cookies['XSRF-TOKEN'],
}
class BuilderDataset(Builder):
def body(self):
return {
'page': 1,
'group': 'public',
'size': 'all',
'fileType': 'all',
'license': 'all',
'viewed': 'all',
'categoryIds': [],
'search': '',
'sortBy': 'hottest',
'userId': None,
'competitionId': None,
'organizationId': None,
'maintainerOrganizationId': None, 'minSize': None, 'maxSize': None,
'isUserQuery': False,
'hasTasks': False,
'topicalDataset': None,
'includeTopicalDatasets': True,
}
class BuilderTags(Builder):
def body(self):
return {
"searchType": "dataset",
"searchQuery": None,
"showSystemTags": False,
"skip": 0,
"take": 10,
}
def request(*, url, builder):
__url = 'https://www.kaggle.com/datasets'
with requests.Session() as session:
resp = session.get(__url)
assert resp.status_code == 200
cookies = session.cookies.get_dict()
payload = builder.body()
headers = builder.headers(cookies=cookies)
resp = session.post(url, headers=headers, data=json.dumps(payload))
return resp.json()
url_search_datasets = 'https://www.kaggle.com/requests/SearchDatasetsRequest'
url_search_tags = 'https://www.kaggle.com/requests/SearchTagsRequest'
b = BuilderTags()
resp = request(url=url_search_tags, builder=b)
print(resp)