Skip to content

Commit

Permalink
Fix a test case in metrics_plots_and_validations_evaluator_test.py.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 686706928
  • Loading branch information
zhouhao138 authored and tfx-copybara committed Oct 17, 2024
1 parent b030a5a commit f8fef85
Showing 1 changed file with 1 addition and 337 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -166,330 +166,6 @@ def ci_derived_fn():
self.assertLen(derived, 1)
self.assertLen(ci_derived, 1)

def testEvaluateWithKerasAndValidateMetrics(self):
model_dir, baseline_dir = self._getExportDir(), self._getBaselineDir()
eval_shared_model = self._build_keras_model('candidate', model_dir, mul=0)
baseline_eval_shared_model = self._build_keras_model(
'baseline', baseline_dir, mul=1
)

schema = text_format.Parse(
"""
tensor_representation_group {
key: ""
value {
tensor_representation {
key: "input_1"
value {
dense_tensor {
column_name: "input_1"
shape { dim { size: 1 } }
}
}
}
}
}
feature {
name: "input_1"
type: FLOAT
}
feature {
name: "label"
type: FLOAT
}
feature {
name: "example_weight"
type: FLOAT
}
feature {
name: "extra_feature"
type: BYTES
}
""",
schema_pb2.Schema(),
)
tfx_io = test_util.InMemoryTFExampleRecord(
schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN
)
tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
arrow_schema=tfx_io.ArrowSchema(),
tensor_representations=tfx_io.TensorRepresentations(),
)

examples = [
self._makeExample(
input_1=0.0,
label=1.0,
example_weight=1.0,
extra_feature='non_model_feature',
),
self._makeExample(
input_1=1.0,
label=0.0,
example_weight=0.5,
extra_feature='non_model_feature',
),
]

eval_config = config_pb2.EvalConfig(
model_specs=[
config_pb2.ModelSpec(
name='candidate',
label_key='label',
example_weight_key='example_weight',
),
config_pb2.ModelSpec(
name='baseline',
label_key='label',
example_weight_key='example_weight',
is_baseline=True,
),
],
slicing_specs=[config_pb2.SlicingSpec()],
metrics_specs=[
config_pb2.MetricsSpec(
metrics=[
config_pb2.MetricConfig(
class_name='ExampleCount',
# 2 > 10, NOT OK.
threshold=config_pb2.MetricThreshold(
value_threshold=config_pb2.GenericValueThreshold(
lower_bound={'value': 10}
)
),
),
],
model_names=['candidate', 'baseline'],
example_weights=config_pb2.ExampleWeightOptions(
unweighted=True
),
),
config_pb2.MetricsSpec(
metrics=[
config_pb2.MetricConfig(
class_name='WeightedExampleCount',
# 1.5 < 1, NOT OK.
threshold=config_pb2.MetricThreshold(
value_threshold=config_pb2.GenericValueThreshold(
upper_bound={'value': 1}
)
),
),
],
model_names=['candidate', 'baseline'],
example_weights=config_pb2.ExampleWeightOptions(weighted=True),
),
config_pb2.MetricsSpec(
metrics=[
config_pb2.MetricConfig(
class_name='MeanLabel',
# 0 > 1 and 0 > 1?: NOT OK.
threshold=config_pb2.MetricThreshold(
change_threshold=config_pb2.GenericChangeThreshold(
direction=config_pb2.MetricDirection.HIGHER_IS_BETTER,
relative={'value': 1},
absolute={'value': 1},
)
),
),
config_pb2.MetricConfig(
# MeanPrediction = (0+0)/(1+0.5) = 0
class_name='MeanPrediction',
# -.01 < 0 < .01, OK.
# Diff% = -.333/.333 = -100% < -99%, OK.
# Diff = 0 - .333 = -.333 < 0, OK.
threshold=config_pb2.MetricThreshold(
value_threshold=config_pb2.GenericValueThreshold(
upper_bound={'value': 0.01},
lower_bound={'value': -0.01},
),
change_threshold=config_pb2.GenericChangeThreshold(
direction=config_pb2.MetricDirection.LOWER_IS_BETTER,
relative={'value': -0.99},
absolute={'value': 0},
),
),
),
],
thresholds={
'loss': config_pb2.MetricThreshold(
value_threshold=config_pb2.GenericValueThreshold(
upper_bound={'value': 0}
)
)
},
model_names=['candidate', 'baseline'],
),
],
)
eval_shared_models = [eval_shared_model, baseline_eval_shared_model]
extractors = [
features_extractor.FeaturesExtractor(
eval_config=eval_config,
tensor_representations=tensor_adapter_config.tensor_representations,
),
labels_extractor.LabelsExtractor(eval_config),
example_weights_extractor.ExampleWeightsExtractor(eval_config),
predictions_extractor.PredictionsExtractor(
eval_shared_model=eval_shared_models, eval_config=eval_config
),
unbatch_extractor.UnbatchExtractor(),
slice_key_extractor.SliceKeyExtractor(eval_config=eval_config),
]
evaluators = [
metrics_plots_and_validations_evaluator.MetricsPlotsAndValidationsEvaluator(
eval_config=eval_config, eval_shared_model=eval_shared_models
)
]

with beam.Pipeline() as pipeline:
# pylint: disable=no-value-for-parameter
evaluations = (
pipeline
| 'Create' >> beam.Create([e.SerializeToString() for e in examples])
| 'BatchExamples' >> tfx_io.BeamSource()
| 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
| 'ExtractEvaluate'
>> model_eval_lib.ExtractAndEvaluate(
extractors=extractors, evaluators=evaluators
)
)

# pylint: enable=no-value-for-parameter

def check_validations(got):
try:
self.assertLen(got, 1)
got = got[0]
expected_metric_validations_per_slice = [
text_format.Parse(
"""
metric_key {
name: "loss"
model_name: "candidate"
}
metric_threshold {
value_threshold {
upper_bound {
value: 0.0
}
}
}
metric_value {
double_value {
value: 7.712474346160889
}
}
""",
validation_result_pb2.ValidationFailure(),
),
text_format.Parse(
"""
metric_key {
name: "example_count"
model_name: "candidate"
example_weighted { }
}
metric_threshold {
value_threshold {
lower_bound {
value: 10.0
}
}
}
metric_value {
double_value {
value: 2.0
}
}
""",
validation_result_pb2.ValidationFailure(),
),
text_format.Parse(
"""
metric_key {
name: "weighted_example_count"
model_name: "candidate"
example_weighted { value: true }
}
metric_threshold {
value_threshold {
upper_bound {
value: 1.0
}
}
}
metric_value {
double_value {
value: 1.5
}
}
""",
validation_result_pb2.ValidationFailure(),
),
text_format.Parse(
"""
metric_key {
name: "mean_label"
model_name: "candidate"
is_diff: true
example_weighted { value: true }
}
metric_threshold {
change_threshold {
absolute {
value: 1.0
}
relative {
value: 1.0
}
direction: HIGHER_IS_BETTER
}
}
metric_value {
double_value {
value: 0.0
}
}
""",
validation_result_pb2.ValidationFailure(),
),
]
# Loss not supported in TFv1
if _TF_MAJOR_VERSION < 2:
expected_metric_validations_per_slice[0].ClearField('metric_value')
expected_metric_validations_per_slice[0].message = (
'Metric not found.'
)
self.assertFalse(got.validation_ok)
self.assertLen(got.metric_validations_per_slice, 1)
self.assertLen(
got.metric_validations_per_slice[0].failures,
len(expected_metric_validations_per_slice),
)
self.assertCountEqual(
got.metric_validations_per_slice[0].failures,
expected_metric_validations_per_slice,
)

except AssertionError as err:
raise util.BeamAssertException(err)

util.assert_that(
evaluations[constants.VALIDATIONS_KEY], check_validations
)

metric_filter = beam.metrics.metric.MetricsFilter().with_name(
'metric_computed_ExampleCount_v2_' + constants.TF_KERAS
)
actual_metrics_count = (
pipeline.run()
.metrics()
.query(filter=metric_filter)['counters'][0]
.committed
)
self.assertEqual(actual_metrics_count, 1)

def testEvaluateWithKerasAndDiffMetrics(self):
model_dir, baseline_dir = self._getExportDir(), self._getBaselineDir()
eval_shared_model = self._build_keras_model('candidate', model_dir, mul=0)
Expand Down Expand Up @@ -1080,31 +756,19 @@ def check_metrics(got):
name='mean_label', example_weighted=False
)
binary_accuracy_key = metric_types.MetricKey(
name='binary_accuracy', example_weighted=None
name='binary_accuracy', example_weighted=False
)
self.assertIn(binary_accuracy_key, got_metrics)
binary_accuracy_unweighted_key = metric_types.MetricKey(
name='binary_accuracy', example_weighted=False
)
self.assertIn(binary_accuracy_unweighted_key, got_metrics)
# Loss not supported in TFv1
if _TF_MAJOR_VERSION > 1:
loss_key = metric_types.MetricKey(
name='loss', example_weighted=None
)
self.assertIn(loss_key, got_metrics)
expected_values = {
example_count_key: 2,
weighted_example_count_key: 1.0 + 0.5,
label_key: (1.0 * 1.0 + 0.0 * 0.5) / (1.0 + 0.5),
label_unweighted_key: (1.0 + 0.0) / (1.0 + 1.0),
}
if add_custom_metrics:
custom_key = metric_types.MetricKey(
name='custom', example_weighted=None
)
self.assertIn(custom_key, got_metrics)
expected_values[custom_key] = 0.0 + 1.0 + 0.0 + 1.0
self.assertDictElementsAlmostEqual(got_metrics, expected_values)

except AssertionError as err:
Expand Down

0 comments on commit f8fef85

Please sign in to comment.