Skip to content

Commit

Permalink
add python 3.5 support (#26)
Browse files Browse the repository at this point in the history
* add python 3.5 suport
  • Loading branch information
galtay authored Apr 11, 2019
1 parent 56b1797 commit 8438c88
Show file tree
Hide file tree
Showing 14 changed files with 136 additions and 84 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
dist: xenial
language: python
python:
- "3.5"
- "3.6"
- "3.7"

Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Quick Install
Requirements
------------

* python >= 3.6
* python >= 3.5

Install with pip
----------------
Expand Down
2 changes: 1 addition & 1 deletion docs/readme.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Quick Install
Requirements
------------

* python >= 3.6
* python >= 3.5

Install with pip
----------------
Expand Down
2 changes: 1 addition & 1 deletion examples/basic_json_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def has_occupation_politician(item: WikidataItem, truthy: bool = True) -> bool:
)
)

if ii > 10_000:
if ii > 10000:
break

# write the iterable of WikidataItem to disk as JSON
Expand Down
2 changes: 1 addition & 1 deletion qwikidata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"""Metadata for this package."""

__package_name__ = "qwikidata"
__version__ = "0.3.2"
__version__ = "0.4.0"
44 changes: 27 additions & 17 deletions qwikidata/claim.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, reference_dict: typedefs.ReferenceDict) -> None:
self._reference_dict = reference_dict

self.referencehash = reference_dict["hash"]
self.snaks: OrderedDict = OrderedDict()
self.snaks = OrderedDict() # type: OrderedDict
for property_id in reference_dict["snaks-order"]:
self.snaks[property_id] = [
WikidataSnak(snak_dict) for snak_dict in reference_dict["snaks"][property_id]
Expand All @@ -55,12 +55,13 @@ def _validate_reference_dict(self, reference_dict: typedefs.ReferenceDict) -> No
for req_key in _REQUIRED_KEYS:
if req_key not in reference_dict:
raise ValueError(
f"required reference_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(reference_dict.keys())}"
"required reference_dict keys are {} but only found {}".format(
_REQUIRED_KEYS, list(reference_dict.keys())
)
)

def __str__(self) -> str:
return f"WikidataReference(hash={self.referencehash}, snaks={self.snaks})"
return "WikidataReference(hash={}, snaks={})".format(self.referencehash, self.snaks)

def __repr__(self) -> str:
return self.__str__()
Expand Down Expand Up @@ -109,12 +110,13 @@ def _validate_qualifier_dict(self, qualifier_dict: typedefs.QualifierDict) -> No
for req_key in _REQUIRED_KEYS:
if req_key not in qualifier_dict:
raise ValueError(
f"required qualifier_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(qualifier_dict.keys())}"
"required qualifier_dict keys are {} but only found {}".format(
_REQUIRED_KEYS, list(qualifier_dict.keys())
)
)

def __str__(self) -> str:
return f"WikidataQualifier(hash={self.qualifierhash}, snak={self.snak})"
return "WikidataQualifier(hash={}, snak={})".format(self.qualifierhash, self.snak)

def __repr__(self) -> str:
return self.__str__()
Expand Down Expand Up @@ -181,14 +183,16 @@ def __init__(self, claim_dict: typedefs.ClaimDict) -> None:
self._claim_dict = claim_dict
self.property_id = self.mainsnak.property_id

self.qualifiers: OrderedDict[typedefs.PropertyId, List[WikidataQualifier]] = OrderedDict()
self.qualifiers = (
OrderedDict()
) # type: OrderedDict[typedefs.PropertyId, List[WikidataQualifier]]
self.qualifiers_order = claim_dict.get("qualifiers-order", [])
if "qualifiers" in claim_dict:
for property_id in self.qualifiers_order:
qualifier_dicts = claim_dict["qualifiers"][property_id]
self.qualifiers[property_id] = [WikidataQualifier(qd) for qd in qualifier_dicts]

self.references: List[WikidataReference] = []
self.references = [] # type: List[WikidataReference]
if "references" in claim_dict:
for reference_dict in claim_dict["references"]:
self.references.append(WikidataReference(reference_dict))
Expand All @@ -199,16 +203,19 @@ def _validate_claim_dict(self, claim_dict: typedefs.ClaimDict) -> None:
for req_key in _REQUIRED_KEYS:
if req_key not in claim_dict:
raise ValueError(
f"required claim_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(claim_dict.keys())}"
"required claim_dict keys are {} but only found {}".format(
_REQUIRED_KEYS, list(claim_dict.keys())
)
)
self.claim_id = claim_dict["id"]
self.claim_type = claim_dict["type"]
self.rank = claim_dict["rank"]
self.mainsnak = WikidataSnak(claim_dict["mainsnak"])

def __str__(self) -> str:
return f"WikidataClaim(type={self.claim_type}, rank={self.rank}, mainsnak={self.mainsnak}, qualifiers={self.qualifiers})"
return "WikidataClaim(type={}, rank={}, mainsnak={}, qualifiers={})".format(
self.claim_type, self.rank, self.mainsnak, self.qualifiers
)

def __repr__(self) -> str:
return self.__str__()
Expand Down Expand Up @@ -243,21 +250,22 @@ def __init__(self, claim_list: typedefs.ClaimList) -> None:
self._claims = [WikidataClaim(claim_dict) for claim_dict in claim_list]

property_ids = set([claim.mainsnak.property_id for claim in self._claims])
self.property_id: Union[typedefs.PropertyId, None]
self.property_id = None # type: Union[typedefs.PropertyId, None]
if len(property_ids) == 1:
self.property_id = property_ids.pop()
elif len(property_ids) == 0:
self.property_id = None
else:
raise ValueError(
"claims in a claim list must all have the same property id "
f"but found multiple property ids {property_ids}"
"claims in a claim list must all have the same property id but found multiple property ids {}".format(
property_ids
)
)

def _validate_claim_list(self, claim_list: typedefs.ClaimList) -> None:
"""Raise excpetions if claim_list is not valid."""
if not isinstance(claim_list, list):
raise TypeError(f"claim_list must be a list but got {type(claim_list)}.")
raise TypeError("claim_list must be a list but got {}.".format(type(claim_list)))

@overload
def __getitem__(self, indx: int) -> WikidataClaim:
Expand All @@ -274,7 +282,9 @@ def __len__(self) -> int:
return len(self._claims)

def __str__(self) -> str:
return f"WikidataClaimGroup(property_id={self.property_id}, claims={self._claims})"
return "WikidataClaimGroup(property_id={}, claims={})".format(
self.property_id, self._claims
)

def __repr__(self) -> str:
return self.__str__()
17 changes: 10 additions & 7 deletions qwikidata/datavalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ def _validate_datavalue_dict(datavalue_dict: typedefs.DatavalueDict) -> None:
for req_key in _REQUIRED_KEYS:
if req_key not in datavalue_dict:
raise ValueError(
f"required datavalue_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(datavalue_dict.keys())}"
"required datavalue_dict keys are {} but only found {}".format(
_REQUIRED_KEYS, list(datavalue_dict.keys())
)
)

_VALID_TYPES = frozenset(
Expand All @@ -30,8 +31,9 @@ def _validate_datavalue_dict(datavalue_dict: typedefs.DatavalueDict) -> None:
)
if datavalue_dict["type"] not in _VALID_TYPES:
raise ValueError(
f"datavalue datatype={datavalue_dict['type']} not in "
f"valid datatypes {_VALID_TYPES}."
"datavalue datatype={} not in valid datatypes {}.".format(
datavalue_dict["type"], _VALID_TYPES
)
)


Expand Down Expand Up @@ -261,7 +263,7 @@ def get_parsed_datetime_dict(self) -> Dict[str, int]:
dict
a dictionary representing the timestring's year, month, and date
"""
datetime_dict: Dict[str, int] = {}
datetime_dict = {} # type: Dict[str, int]
timestring = self.value["time"]
match = self.STANDARD_DATE_REGEX.fullmatch(timestring)
if match:
Expand Down Expand Up @@ -360,6 +362,7 @@ def get_datavalue_from_snak_dict(snak_dict: typedefs.SnakDict) -> Union[Wikidata
return None
else:
raise ValueError(
f'snaktype must be one of ["value", "somevalue", "novalue"] '
f"but got {snak_dict['snaktype']}"
'snaktype must be one of ["value", "somevalue", "novalue"] but got {}.'.format(
snak_dict["snaktype"]
)
)
56 changes: 35 additions & 21 deletions qwikidata/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@ def _validate_entity_dict(entity_dict: typedefs.EntityDict) -> None:
"""Raise excpetions if entity_dict is not valid."""
_REQUIRED_KEYS = ["id", "type"]
if not isinstance(entity_dict, dict):
raise TypeError(f"entity_dict must be a dictionary but got {type(entity_dict)}.")
raise TypeError(
"entity_dict must be a dictionary but got {}.".format(type(entity_dict))
)
for req_key in _REQUIRED_KEYS:
if req_key not in entity_dict:
raise ValueError(
f"required entity_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(entity_dict.keys())}"
"required entity_dict keys are {} but only found {}.".format(
_REQUIRED_KEYS, list(entity_dict.keys())
)
)


Expand All @@ -42,7 +45,7 @@ class LabelDescriptionAliasMixin:
"""

_entity_dict: typedefs.EntityDict
_entity_dict = None # type: typedefs.EntityDict

@staticmethod
def _validate_label_desc_alias_dict(
Expand All @@ -53,8 +56,9 @@ def _validate_label_desc_alias_dict(
for req_key in _REQUIRED_KEYS:
if req_key not in label_desc_alias_dict:
raise ValueError(
f"required label_desc_alias_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(label_desc_alias_dict.keys())}"
"required label_desc_alias_dict keys are {} but only found {}.".format(
_REQUIRED_KEYS, list(label_desc_alias_dict.keys())
)
)

def get_label(self, lang: typedefs.LanguageCode = typedefs.LanguageCode("en")) -> str:
Expand Down Expand Up @@ -120,7 +124,7 @@ class ClaimsMixin:
* :py:class:`WikidataSense`
"""

_entity_dict: typedefs.EntityDict
_entity_dict = None # type: typedefs.EntityDict

@staticmethod
def _validate_claim_dict(claim_dict: typedefs.EntityDict) -> None:
Expand All @@ -129,8 +133,9 @@ def _validate_claim_dict(claim_dict: typedefs.EntityDict) -> None:
for req_key in _REQUIRED_KEYS:
if req_key not in claim_dict:
raise ValueError(
f"required claim_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(claim_dict.keys())}"
"required claim_dict keys are {}. but only found {}".format(
_REQUIRED_KEYS, list(claim_dict.keys())
)
)

def get_claim_groups(self) -> Dict[typedefs.PropertyId, WikidataClaimGroup]:
Expand Down Expand Up @@ -257,7 +262,9 @@ def _validate_item_dict(self, item_dict: typedefs.ItemDict) -> None:
"""Raise excpetions if item_dict is not valid."""
self._validate_entity_dict(item_dict)
if item_dict["type"] != "item":
raise ValueError(f"item_dict['type'] must be 'item' but found '{item_dict['type']}'")
raise ValueError(
"item_dict['type'] must be 'item' but found '{}'".format(item_dict["type"])
)
self._validate_label_desc_alias_dict(item_dict)
self._validate_claim_dict(item_dict)

Expand Down Expand Up @@ -341,7 +348,9 @@ def _validate_property_dict(self, property_dict: typedefs.PropertyDict) -> None:
self._validate_entity_dict(property_dict)
if property_dict["type"] != "property":
raise ValueError(
f"property_dict['type'] must be 'property' but found '{property_dict['type']}'"
"property_dict['type'] must be 'property' but found '{}'".format(
property_dict["type"]
)
)
self._validate_label_desc_alias_dict(property_dict)
self._validate_claim_dict(property_dict)
Expand Down Expand Up @@ -395,8 +404,9 @@ def _validate_form_dict(self, form_dict: typedefs.FormDict) -> None:
for req_key in _REQUIRED_KEYS:
if req_key not in form_dict:
raise ValueError(
f"required form_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(form_dict.keys())}"
"required form_dict keys are {} but only found {}".format(
_REQUIRED_KEYS, list(form_dict.keys())
)
)

def get_representation(self, lang: typedefs.LanguageCode = typedefs.LanguageCode("en")) -> str:
Expand All @@ -418,7 +428,9 @@ def get_representation(self, lang: typedefs.LanguageCode = typedefs.LanguageCode
return ""

def __str__(self) -> str:
return f"WikidataForm(form_id={self.form_id}, representation={self.get_representation()}, grammatical_features={self.grammatical_features})"
return "WikidataForm(form_id={}, representation={}, grammatical_features={})".format(
self.form_id, self.get_representation(), self.grammatical_features
)

def __repr__(self) -> str:
return self.__str__()
Expand Down Expand Up @@ -461,8 +473,9 @@ def _validate_sense_dict(self, sense_dict: typedefs.SenseDict) -> None:
for req_key in _REQUIRED_KEYS:
if req_key not in sense_dict:
raise ValueError(
f"required sense_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(sense_dict.keys())}"
"required sense_dict keys are {} but only found {}".format(
_REQUIRED_KEYS, list(sense_dict.keys())
)
)

def get_gloss(self, lang: typedefs.LanguageCode = typedefs.LanguageCode("en")) -> str:
Expand All @@ -481,7 +494,7 @@ def get_gloss(self, lang: typedefs.LanguageCode = typedefs.LanguageCode("en")) -
return ""

def __str__(self) -> str:
return f"WikidataSense(sense_id={self.sense_id}, gloss={self.get_gloss()})"
return "WikidataSense(sense_id={}, gloss={})".format(self.sense_id, self.get_gloss())

def __repr__(self) -> str:
return self.__str__()
Expand Down Expand Up @@ -516,7 +529,7 @@ class WikidataLexeme(ClaimsMixin, EntityMixin):

def __init__(self, lexeme_dict: typedefs.LexemeDict) -> None:
self._validate_lexeme_dict(lexeme_dict)
self._entity_dict: typedefs.LexemeDict = lexeme_dict
self._entity_dict = lexeme_dict # type: typedefs.LexemeDict
self.entity_id = lexeme_dict["id"]
self.entity_type = lexeme_dict["type"]
self.language = lexeme_dict["language"]
Expand All @@ -527,7 +540,7 @@ def _validate_lexeme_dict(self, lexeme_dict: typedefs.LexemeDict) -> None:
self._validate_entity_dict(lexeme_dict)
if lexeme_dict["type"] != "lexeme":
raise ValueError(
f"lexeme_dict['type'] must be 'lexeme' but found '{lexeme_dict['type']}'"
"lexeme_dict['type'] must be 'lexeme' but found '{}'".format(lexeme_dict["type"])
)
self._validate_claim_dict(lexeme_dict)

Expand All @@ -536,8 +549,9 @@ def _validate_lexeme_dict(self, lexeme_dict: typedefs.LexemeDict) -> None:
for req_key in _REQUIRED_KEYS:
if req_key not in lexeme_dict:
raise ValueError(
f"required lexeme_dict keys are {_REQUIRED_KEYS}. "
f"only found {list(lexeme_dict.keys())}"
"required lexeme_dict keys are {} but only found {}".format(
_REQUIRED_KEYS, list(lexeme_dict.keys())
)
)

def get_lemma(self, lang: typedefs.LanguageCode = typedefs.LanguageCode("en")) -> str:
Expand Down
12 changes: 6 additions & 6 deletions qwikidata/json_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def _write_chunk(
self, out_fbase: str, ichunk: int, out_lines: List[str]
) -> Tuple[List[str], int, str]:
"""Write a single chunk to disk."""
out_fname = f"{out_fbase}-ichunk_{ichunk}.json"
self.logger.debug(f"writing {out_fname}")
out_fname = "{}-ichunk_{}.json".format(out_fbase, ichunk)
self.logger.debug("writing {}".format(out_fname))
out_lines = [out_line.rstrip(",\n") for out_line in out_lines]
with open(out_fname, "w") as fp:
fp.write("[\n")
Expand All @@ -91,11 +91,11 @@ def _write_chunk(
if self.compression == "bz2":
args = ["bzip2", out_fname]
subprocess.check_output(args)
out_fname = f"{out_fname}.bz2"
out_fname = "{}.bz2".format(out_fname)
elif self.compression == "gz":
args = ["gzip", out_fname]
subprocess.check_output(args)
out_fname = f"{out_fname}.gz"
out_fname = "{}.gz".format(out_fname)

out_lines = []
ichunk += 1
Expand Down Expand Up @@ -123,8 +123,8 @@ def create_chunks(
out_fbase = self.basename

ichunk = 0
out_lines: List[str] = []
out_fnames: List[str] = []
out_lines = [] # type: List[str]
out_fnames = [] # type: List[str]

for iline, line in enumerate(wd_dump.iter_lines()):
if line.strip() in ["[", "]"]:
Expand Down
Loading

0 comments on commit 8438c88

Please sign in to comment.