-
Notifications
You must be signed in to change notification settings - Fork 0
/
e3_validation.py
130 lines (116 loc) · 6.72 KB
/
e3_validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
'''
Created on Nov 22, 2016
@author: Thomas
'''
from autologging import logged
from pinject import copy_args_to_public_fields
import networkx as nx
@logged
class ValidRelation(object):
@copy_args_to_public_fields
def __init__(self, regex, leftCount, rightCount):
pass
def __str__(self):
return self.regex
node = "(\S+\.\S+)"
anyRCC5 = "(?:equals|includes|is_included_in|overlaps|disjoint)"
combinedRCC5s = "{\s*(?:" + anyRCC5 + "\s*)+\s*}"
validRelations = [
ValidRelation("\[\s*" + node + "\s+" + node + "\s+(lsum)\s+" + node + "\s*\]", 2, 1),
ValidRelation("\[\s*" + node + "\s+" + node + "\s+" + node + "\s+(l3sum)\s+" + node + "\s*\]", 3, 1),
ValidRelation("\[\s*" + node + "\s+" + node + "\s+" + node + "\s+" + node + "\s+(l4sum)\s+" + node + "\s*\]", 4, 1),
ValidRelation("\[\s*" + node + "\s+(rsum)\s+" + node + "\s+" + node + "\s*\]", 1, 2),
ValidRelation("\[\s*" + node + "\s+(r3sum)\s+" + node + "\s+" + node + "\s+" + node + "\s*\]", 1, 3),
ValidRelation("\[\s*" + node + "\s+(r4sum)\s+" + node + "\s+" + node + "\s+" + node + "\s+" + node + "\s*\]", 1, 4),
ValidRelation("\[\s*" + node + "\s+" + node + "\s+(ldiff)\s+" + node + "\s*\]", 2, 1),
ValidRelation("\[\s*" + node + "\s+(rdiff)\s+" + node + "\s+" + node + "\s*\]", 1, 2),
ValidRelation("\[\s*" + node + "\s+" + node + "\s+(e4sum)\s+" + node + "\s+" + node + "\s*\]", 2, 2),
ValidRelation("\[\s*" + node + "\s+" + node + "\s+(i4sum)\s+" + node + "\s+" + node + "\s*\]", 2, 2),
ValidRelation("\[\s*" + node + "\s+(equals)\s+" + node + "\s*\]", 1, 1),
ValidRelation("\[\s*" + node + "\s+(includes)\s+" + node + "\s*\]", 1, 1),
ValidRelation("\[\s*" + node + "\s+(is_included_in)\s+" + node + "\s*\]", 1, 1),
ValidRelation("\[\s*" + node + "\s+(overlaps)\s+" + node + "\s*\]", 1, 1),
ValidRelation("\[\s*" + node + "\s+(disjoint)\s+" + node + "\s*\]", 1, 1),
ValidRelation("\[\s*" + node + "\s+(" + combinedRCC5s + ")\s+"+ node + "\s*\]", 1, 1)
]
@logged
class ModelValidator(object):
def is_valid_new_articulation(self, newArticulation, tap):
for a in tap.articulations:
if a.leftNodes == newArticulation.leftNodes and a.rightNodes == newArticulation.rightNodes and a.relation == newArticulation.relation:
return False
return True
def is_dag(self, taxonomy):
if taxonomy.g.number_of_nodes() == 0:
return False
return nx.is_directed_acyclic_graph(taxonomy.g)
def is_tree(self, taxonomy):
#nx returns exception if graph empty
if taxonomy.g.number_of_nodes() == 0:
return False
return nx.is_tree(taxonomy.g)
@logged
class CleantaxValidator(object):
def validate_cleantax(self, cleantax):
import e3_io
cleantaxReader = e3_io.CleantaxReader()
cleantax = cleantaxReader.get_normalized_cleantax(cleantax)
cleantaxTaxonomyLines = cleantaxReader.get_cleantax_taxonomy_lines(cleantax)
for taxonomy in cleantaxTaxonomyLines:
self.validate_cleantax_taxonomy(taxonomy)
cleantaxArticulationLines = cleantaxReader.get_cleantax_articulation_lines(cleantax)
validated_articulations = []
for articulation in cleantaxArticulationLines[1:]:
self.validate_cleantax_articulation(articulation, cleantaxTaxonomyLines, validated_articulations)
validated_articulations.append(articulation)
def validate_cleantax_taxonomy(self, taxonomy):
if not len(taxonomy[0].split()) == 3:
raise ValidationException("Taxonomy head must consist of the three parts: taxonomy <id> <name>")
#what are the validation requirements for a taxonomy in the euler context? one or multiple roots possible?
#utilize a graph lirary before going ahead with this
#only validate syntax for now
for line in taxonomy[1:]:
line = line.strip()
if not line[0] == '(' or not line[-1] == ')':
raise ValidationException("Taxonomy line has to start with '(' and end with ')'")
inside = line[1:-1]
def validate_cleantax_articulation(self, articulation, taxonomies, articulations):
if articulation in articulations:
self.__log.warn("This articulation already exists: " + articulation)
taxonomyIdToNodes = { }
for taxonomy in taxonomies:
id = taxonomy[0].split()[1]
taxonomyIdToNodes[id] = []
for line in taxonomy[1:]:
taxonomyIdToNodes[id].extend(line.strip()[1:-1].split())
import re
for validRelation in validRelations:
match = re.match(validRelation.regex, articulation)
if match:
valid = True
leftNodes = list(match.groups())[:validRelation.leftCount]
relation = list(match.groups())[validRelation.leftCount : validRelation.leftCount + 1][0]
rightNodes = list(match.groups())[validRelation.leftCount + 1:]
for node in leftNodes + rightNodes:
self.validate_cleantax_node(node, taxonomyIdToNodes)
return
import e3_model
raise ValidationException("No valid relation found for articulation {articulation}. The set of supported relations is: {validRelations}.".format(
articulation = articulation,
validRelations = ', '.join(e3_model.relations)))
def validate_cleantax_node(self, node, taxonomyIdToNodes):
taxonomyIds = ', '.join(taxonomyIdToNodes.keys())
taxonomyIdNotFoundText = "{taxonomyId} of {node} not found in the list of taxonomies ({taxonomyIds})"
nodeNotFoundText = "{nodeName} of {node} not found in the nodes of taxonomy {taxonomyId}"
if not '.' in node:
raise ValidationException(node + " has an invalid node syntax. The period is missing.")
if not len(node.split('.')) == 2:
raise ValidationException(node + " has an invalid node syntax. More than one period contained.")
nodeTaxonomyId = node.split('.')[0]
nodeName = node.split('.')[1]
if not nodeTaxonomyId in taxonomyIdToNodes:
raise ValidationException(taxonomyIdNotFoundText.format(taxonomyId = nodeTaxonomyId, node = node, taxonomyIds = taxonomyIds))
if not nodeName in taxonomyIdToNodes[nodeTaxonomyId]:
raise ValidationException(nodeNotFoundText.format(nodeName = nodeName, node = node, taxonomyId = nodeTaxonomyId))
class ValidationException(Exception):
pass