-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from TheScienceMuseum/develop
0.3.0
- Loading branch information
Showing
13 changed files
with
248 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Changelog | ||
|
||
All notable changes documented below. | ||
|
||
## 0.3.0 | ||
|
||
- add changeable timeout for `wbgetentities` GET request | ||
- handle more Wikidata claims than just QIDs | ||
- generate User Agent from request in line with Wikidata guidelines | ||
- make Wikidata-related methods importable (rather than just runnable from CLI) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,7 @@ | ||
[ELASTIC] | ||
ELASTIC_SEARCH_CLUSTER = | ||
ELASTIC_SEARCH_USER = | ||
ELASTIC_SEARCH_PASSWORD = | ||
ELASTIC_SEARCH_PASSWORD = | ||
|
||
[HTTP] | ||
CONTACT_DETAILS = |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from elastic_wikidata.__metadata__ import __version__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = "0.3.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
class RuntimeConfig: | ||
def __init__(self): | ||
self.items = {} | ||
|
||
def add_item(self, item: dict): | ||
""" | ||
Add an item to the runtime config | ||
""" | ||
|
||
self.items.update(item) | ||
|
||
def get(self, key: str): | ||
""" | ||
Get specific item from config. Returns None if key doesn't exist. | ||
""" | ||
|
||
return self.items.get(key, None) | ||
|
||
def get_all(self) -> dict: | ||
""" | ||
Return all items from runtime config | ||
""" | ||
|
||
return self.items | ||
|
||
|
||
runtime_config = RuntimeConfig() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import requests | ||
import sys | ||
from urllib.parse import quote | ||
from elastic_wikidata import __version__ as ew_version | ||
from elastic_wikidata.config import runtime_config | ||
|
||
|
||
def generate_user_agent(): | ||
""" | ||
Generates user agent string according to Wikidata User Agent Guidelines (https://meta.wikimedia.org/wiki/User-Agent_policy). | ||
Uses contact information from `runtime_config.get('user_agent_contact')`. | ||
Returns: | ||
str: user agent string | ||
""" | ||
v_params = { | ||
"python": "Python/" + ".".join(str(i) for i in sys.version_info), | ||
"http_backend": "requests/" + requests.__version__, | ||
"ew": "Elastic Wikidata bot/" + ew_version, | ||
} | ||
|
||
contact_information = runtime_config.get("user_agent_contact") | ||
|
||
if contact_information is not None: | ||
contact_information = " ".join( | ||
[process_user_agent_username(i) for i in contact_information.split(" ")] | ||
) | ||
return f"{v_params['ew']} ({contact_information}) {v_params['http_backend']} {v_params['python']}" | ||
else: | ||
if runtime_config.get("cli"): | ||
print( | ||
"WARNING: please consider adding contact information through config.ini or the -contact flag to improve the User Agent header for Wikidata requests." | ||
) | ||
return f"{v_params['ew']} {v_params['http_backend']} {v_params['python']}" | ||
|
||
|
||
def process_user_agent_username(username=None): | ||
""" | ||
**Credit to [pywikibot](https://www.mediawiki.org/wiki/Manual:Pywikibot)** | ||
Reduce username to a representation permitted in HTTP headers. | ||
To achieve that, this function: | ||
1) replaces spaces (' ') with '_' | ||
2) encodes the username as 'utf-8' and if the username is not ASCII | ||
3) URL encodes the username if it is not ASCII, or contains '%' | ||
""" | ||
if not username: | ||
return "" | ||
|
||
username = username.replace(" ", "_") # Avoid spaces or %20. | ||
try: | ||
username.encode("ascii") # just test, but not actually use it | ||
except UnicodeEncodeError: | ||
username = quote(username.encode("utf-8")) | ||
else: | ||
# % is legal in the default $wgLegalTitleChars | ||
# This is so that ops know the real pywikibot will not | ||
# allow a useragent in the username to allow through a hand-coded | ||
# percent-encoded value. | ||
if "%" in username: | ||
username = quote(username) | ||
return username |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.