From 3db282fc2d559f54f618940e05d7bef7f94cc61f Mon Sep 17 00:00:00 2001
From: David Goffredo <david.goffredo@datadoghq.com>
Date: Fri, 15 Jul 2022 11:48:08 -0400
Subject: [PATCH] span sampling rules (#231)

* undo non-test related removals from #218

* fix bug

* add additional "sample" test

* clang-format

* glob.{h,cpp}

* class SpanSampler, without using or testing it

* small (premature?) optimization

* TODO: AMEND

* checkpoint: added span rules to Tracer, but not elsewhere

* checkpoint: add span rules to SpanBuffer, but don't use them

* checkpoint: written but untested

* test rule parsing

* obnoxious compiler

* test rule matching

* test span sampling

* clang-format

* test environment variables

* repo-level documentation

* attempt at fixing doc links

* describe glob in the repo-level docs

* unit tests catch bugs!

* a little const correctness

* code comment typo

* temporary files are temporary

* review: trim some docs and add an example

* review: describe globs more concretely

* review: make a future diff smaller
---
 BUILD.bazel                   |   2 +
 doc/configuration.md          |  25 +-
 doc/sampling.md               |  50 ++++
 include/datadog/opentracing.h |  74 +++++-
 src/glob.cpp                  |  59 +++++
 src/glob.h                    |  30 +++
 src/pending_trace.cpp         |  33 ++-
 src/pending_trace.h           |   7 +-
 src/sample.cpp                | 146 +++++++++++
 src/sample.h                  |  83 +++++++
 src/sampling_mechanism.h      |   7 +-
 src/span_buffer.cpp           |  14 +-
 src/span_buffer.h             |  18 +-
 src/tracer.cpp                |  19 +-
 src/tracer_factory.cpp        |   3 +
 src/tracer_options.cpp        |  57 +++++
 test/CMakeLists.txt           |   1 +
 test/glob_test.cpp            |  52 ++++
 test/mocks.h                  |   7 +-
 test/propagation_test.cpp     |   7 +-
 test/sample_test.cpp          | 443 +++++++++++++++++++++++++++++++++-
 test/span_buffer_test.cpp     |   3 +-
 test/tracer_factory_test.cpp  | 151 ++++++++++++
 23 files changed, 1253 insertions(+), 38 deletions(-)
 create mode 100644 src/glob.cpp
 create mode 100644 src/glob.h
 create mode 100644 test/glob_test.cpp

diff --git a/BUILD.bazel b/BUILD.bazel
index bfd8f338..f29fc556 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -6,6 +6,8 @@ cc_library(
         "src/clock.h",
         "src/encoder.cpp",
         "src/encoder.h",
+        "src/glob.cpp",
+        "src/glob.h",
         "src/limiter.cpp",
         "src/limiter.h",
         "src/logger.cpp",
diff --git a/doc/configuration.md b/doc/configuration.md
index 942bb02f..d6dca1c7 100644
--- a/doc/configuration.md
+++ b/doc/configuration.md
@@ -112,7 +112,7 @@ specified as a JSON array of objects.
 For more information about the configuration of trace sampling, see
 [sampling.md][6].
 
-- **TracerOptions member**: `std::string sampling_rules`
+- **TracerOptions member**: `std::string sampling_rules` _(JSON)_
 - **JSON property**: `"sampling_rules"` _(array of objects)_
 - **Environment variable**: `DD_TRACE_SAMPLING_RULES` _(JSON)_
 - **Default value**: `[]`
@@ -253,6 +253,28 @@ made, is propagated between services along the trace in the form of the
 order to prevent rejection by peers or other HTTP header policies.  This
 configuration option is that limit, in bytes.
 
+### Span Sampling Rules
+Span sampling rules allow spans to be sent to Datadog that otherwise would be
+dropped due to trace sampling.
+
+For more information about the configuration of span sampling, see the [Span
+Sampling][11] section of [sampling.md][6].
+
+- **TracerOptions member**: `std::string span_sampling_rules` _(JSON)_
+- **JSON property**: `"span_sampling_rules"` _(array of objects)_
+- **Environment variable**: `DD_SPAN_SAMPLING_RULES` _(JSON)_
+- **Default value**: `[]`
+
+### Span Sampling Rules File
+Span sampling rules (see above) can be specified in their own file.  The value
+of the `DD_SPAN_SAMPLING_RULES_FILE` environment variable is the path to a file
+whose contents are the span sampling rules JSON array.
+
+- **Environment variable**: `DD_SPAN_SAMPLING_RULES_FILE`
+
+Note that `DD_SPAN_SAMPLING_RULES_FILE` is ignored when
+`DD_SPAN_SAMPLING_RULES` is also in the environment.
+
 - **TracerOptions member**: `uint64_t tags_header_size`
 - **JSON property**: `tags_header_size` _(number)_
 - **Environment variable**: `DD_TRACE_TAGS_PROPAGATION_MAX_LENGTH`
@@ -267,3 +289,4 @@ configuration option is that limit, in bytes.
 [7]: https://github.com/openzipkin/b3-propagation
 [8]: https://pubs.opengroup.org/onlinepubs/9699919799/
 [9]: https://docs.datadoghq.com/getting_started/tagging/unified_service_tagging
+[11]: sampling.md#span-sampling
diff --git a/doc/sampling.md b/doc/sampling.md
index 9e44352a..67d3af72 100644
--- a/doc/sampling.md
+++ b/doc/sampling.md
@@ -120,5 +120,55 @@ This configuration option has the same meaning as the `DD_TRACE_RATE_LIMIT`
 environment variable.  Note that the environment variable overrides the
 `TracerOptions` field if both are specified.
 
+Span Sampling
+-------------
+Span sampling is used to select spans to keep even when the enclosing
+trace is dropped.
+
+Similar to _trace_ sampling rules, _span_ sampling rules are configured as a
+JSON array of object, where each object may contain the following properties:
+```
+[{
+    "service": <matches the span's service name, or any if absent>,
+    "name": <matches the span's operation name, or any if absent>,
+    "sample_rate": <the probability of sampling matching spans, or 1.0 if absent>,
+    "max_per_second": <limit in spans sampled by this rule each second, or unlimited if absent>
+}, ...]
+```
+
+The `service` and `name` are glob patterns, where "glob" here means:
+- `*` matches any substring, including the empty string,
+- `?` matches exactly one of any character, and
+- any other character matches exactly one of itself.
+
+Span sampling rules are examined only when the enclosing trace is to be
+dropped.
+
+The first span sampling rule that matches a span is used to make a span
+sampling decision for that span.  If the decision is "keep," then the span is
+sent to Datadog despite the enclosing trace having been dropped.
+
+Span sampling rules can be configured [directly][3] or [in a file][4].
+
+For example, consider the following span sampling rules:
+```shell
+export DD_SPAN_SAMPLING_RULES='[
+  {"service": "router", "name": "rack.request", "max_per_second": 2000},
+  {"service": "classic-mysql", "name": "mysql2.*"},
+  {"service": "authn?", "sample_rate": 0.5}
+]'
+```
+These rules state:
+
+- When a trace is dropped, keep spans whose service name is `router` and whose
+  operation name is `rack.request`, but keep at most 2000 such spans per
+  second.
+- When a trace is dropped, keep spans whose service name is `classic-mysql` and
+  whose operation name begins with `mysql2.`.
+- When a trace is dropped, keep 50% of spans whose service name is `authn`
+  followed by another character, e.g. `authny`, `authnj`. 
+
 [1]: https://docs.datadoghq.com/tracing/trace_ingestion/mechanisms/?tab=environmentvariables#in-the-agent
 [2]: https://docs.datadoghq.com/tracing/setup_overview/proxy_setup/?tab=nginx
+[3]: configuration.md#span-sampling-rules
+[4]: configuration.md#span-sampling-rules-file
diff --git a/include/datadog/opentracing.h b/include/datadog/opentracing.h
index 3283f2f5..08d3661d 100644
--- a/include/datadog/opentracing.h
+++ b/include/datadog/opentracing.h
@@ -76,19 +76,19 @@ struct TracerOptions {
   double sample_rate = std::nan("");
   // This option is deprecated, and may be removed in future releases.
   bool priority_sampling = true;
-  // Rules sampling is applied when initiating traces to determine the sampling
-  // rate.  Configuration is specified as a JSON array of objects. Each object
-  // must have a "sample_rate", while the "name" and "service" fields are
-  // optional. The "sample_rate" value must be between 0.0 and 1.0 (inclusive).
-  // Rules are checked in order, so a more specific rule should be specified
-  // before a less specific rule.  Note that if the `sample_rate` field of this
-  // `TracerOptions` has a non-NaN value, then there is an implicit rule at the
-  // end of the list that matches any trace unmatched by other rules, and
-  // applies a sampling rate of `sample_rate`.  If no rule matches a trace,
-  // then "priority sampling" is applied instead, where the sample rate is
-  // determined by the Datadog trace agent.  If any rules are invalid, they are
-  // ignored. This option is also configurable as the environment variable
-  // DD_TRACE_SAMPLING_RULES.
+  // Rule-based trace sampling is applied when initiating traces to determine
+  // the sampling rate.  Configuration is specified as a JSON array of objects.
+  // Each object must have a "sample_rate", while the "name" and "service"
+  // fields are optional. The "sample_rate" value must be between 0.0 and 1.0
+  // (inclusive).  Rules are checked in order, so a more specific rule should
+  // be specified before a less specific rule.  Note that if the `sample_rate`
+  // field of this `TracerOptions` has a non-NaN value, then there is an
+  // implicit rule at the end of the list that matches any trace unmatched by
+  // other rules, and applies a sampling rate of `sample_rate`.  If no rule
+  // matches a trace, then "priority sampling" is applied instead, where the
+  // sample rate is determined by the Datadog trace agent.  If any rules are
+  // invalid, they are ignored. This option is also configurable as the
+  // environment variable DD_TRACE_SAMPLING_RULES.
   std::string sampling_rules = "[]";
   // Max amount of time to wait between sending traces to agent, in ms. Agent discards traces older
   // than 10s, so that is the upper bound.
@@ -156,6 +156,54 @@ struct TracerOptions {
   // serialized tags allowed.  Trace-wide tags whose serialized length exceeds
   // this limit are not propagated.
   uint64_t tags_header_size = 512;
+  // Rule-based span sampling, which is distinct from rule-based trace
+  // sampling, is used to determine which spans to keep, if any, when trace
+  // sampling decides to drop the trace.
+  // When the trace is to be dropped, each span is matched against the
+  // `span_sampling_rules`.  For each span, the first rule to match, if any,
+  // applies to the span and a span-specific sampling decision is made.  If the
+  // decision for the span is to keep, then the span is sent to Datadog even
+  // though the enclosing trace is not.
+  // `span_sampling_rules` is a JSON array of objects, where each object has
+  // the following shape:
+  //
+  //     {
+  //       "service": <pattern>,
+  //       "name": <pattern>,
+  //       "sample_rate": <number between 0.0 and 1.0>,
+  //       "max_per_second": <positive number>
+  //     }
+  //
+  // The properties mean the following:
+  //
+  // - "service" is a glob pattern that must match a span's service name in
+  //   order for the rule to match.  If "service" is not specified, then its
+  //   default value is "*".  Glob patterns are described below.
+  // - "name" is a glob pattern that must match a span's operation name in
+  //   order for the rule to match.  If "name" is not specified, then its default
+  //   value is "*".  Glob patterns are described below.
+  // - "sample_rate" is the probability that a span matching the rule will be
+  //   kept.  If "sample_rate" is not specified, then its default value is 1.0.
+  // - "max_per_second" is the maximum number of spans that will be kept on
+  //   account of this rule each second.  Spans that would cause the limit to
+  //   be exceeded are dropped.  If "max_per_second" is not specified, then
+  //   there is no limit.
+  //
+  // Glob patterns are a simplified form of regular expressions.  Certain
+  // characters in a glob pattern have special meaning:
+  //
+  // - "*" matches any substring, including the empty string.
+  // - "?" matches exactly one instance of any character.
+  // - Other characters match exactly one instance of themselves.
+  //
+  // For example:
+  //
+  // - The glob pattern "foobar" is matched by "foobar" only.
+  // - The glob pattern "foo*" is matched by "foobar", "foo", and "fooop", but
+  //   not by "fond".
+  // - The glob pattern "a?b*e*" is matched by "amble" and "albedo", but not by
+  //   "albino".
+  std::string span_sampling_rules = "[]";
 };
 
 // TraceEncoder exposes the data required to encode and submit traces to the
diff --git a/src/glob.cpp b/src/glob.cpp
new file mode 100644
index 00000000..46bb61d4
--- /dev/null
+++ b/src/glob.cpp
@@ -0,0 +1,59 @@
+#include "glob.h"
+
+#include <cstdint>
+
+namespace datadog {
+namespace opentracing {
+
+bool glob_match(ot::string_view pattern, ot::string_view subject) {
+  // This is a backtracking implementation of the glob matching algorithm.
+  // The glob pattern language supports `*` and `?`, but no escape sequences.
+  //
+  // Based off of a Go example in <https://research.swtch.com/glob> accessed
+  // February 3, 2022.
+
+  using Index = std::size_t;
+  Index p = 0;       // [p]attern index
+  Index s = 0;       // [s]ubject index
+  Index next_p = 0;  // next [p]attern index
+  Index next_s = 0;  // next [s]ubject index
+
+  while (p < pattern.size() || s < subject.size()) {
+    if (p < pattern.size()) {
+      const char pattern_char = pattern[p];
+      switch (pattern_char) {
+        case '*':
+          // Try to match at `s`.  If that doesn't work out, restart at
+          // `s + 1` next.
+          next_p = p;
+          next_s = s + 1;
+          ++p;
+          continue;
+        case '?':
+          if (s < subject.size()) {
+            ++p;
+            ++s;
+            continue;
+          }
+          break;
+        default:
+          if (s < subject.size() && subject[s] == pattern_char) {
+            ++p;
+            ++s;
+            continue;
+          }
+      }
+    }
+    // Mismatch.  Maybe restart.
+    if (0 < next_s && next_s <= subject.size()) {
+      p = next_p;
+      s = next_s;
+      continue;
+    }
+    return false;
+  }
+  return true;
+}
+
+}  // namespace opentracing
+}  // namespace datadog
diff --git a/src/glob.h b/src/glob.h
new file mode 100644
index 00000000..46ff5cbd
--- /dev/null
+++ b/src/glob.h
@@ -0,0 +1,30 @@
+#ifndef DD_OPENTRACING_GLOB_H
+#define DD_OPENTRACING_GLOB_H
+
+// This component provides a string matching function, `glob_match`, that
+// returns whether a specified string matches a specified pattern, where the
+// pattern language is the following:
+//
+// - "*" matches any contiguous substring, including the empty string.
+// - "?" matches exactly one instance of any character.
+// - Other characters match exactly one instance of themselves.
+//
+// The patterns are here called "glob patterns," though they are different from
+// the patterns used in Unix shells.
+
+#include <opentracing/string_view.h>
+
+namespace ot = opentracing;
+
+namespace datadog {
+namespace opentracing {
+
+// Return whether the specified `subject` matches the specified glob `pattern`,
+// i.e.  whether `subject` is a member of the set of strings represented by the
+// glob `pattern`.
+bool glob_match(ot::string_view pattern, ot::string_view subject);
+
+}  // namespace opentracing
+}  // namespace datadog
+
+#endif
diff --git a/src/pending_trace.cpp b/src/pending_trace.cpp
index a7197554..6aa52941 100644
--- a/src/pending_trace.cpp
+++ b/src/pending_trace.cpp
@@ -17,6 +17,9 @@ const std::string event_sample_rate_metric = "_dd1.sr.eausr";
 const std::string rules_sampler_applied_rate = "_dd.rule_psr";
 const std::string rules_sampler_limiter_rate = "_dd.limit_psr";
 const std::string priority_sampler_applied_rate = "_dd.agent_psr";
+const std::string span_sampling_mechanism = "_dd.span_sampling.mechanism";
+const std::string span_sampling_rule_rate = "_dd.span_sampling.rule_rate";
+const std::string span_sampling_limit = "_dd.span_sampling.max_per_second";
 
 // Return whether the specified `span` is without a parent among the specified
 // `all_spans_in_trace`.
@@ -71,6 +74,25 @@ void finish_root_span(PendingTrace& trace, SpanData& span) {
   finish_span(trace, span);
 }
 
+// Determine whether the specified `span` matches a rule in the specified
+// `span_sampler` and the sampling decision of that rule is to keep the `span`.
+// If so, then add appropriate tags to `span`.
+void apply_span_sampling(SpanSampler& span_sampler, SpanData& span) {
+  SpanSampler::Rule* const rule = span_sampler.match(span);
+  if (!rule || !rule->sample(span)) {
+    return;
+  }
+
+  // The span matched a span rule, and the rule decided to keep the span.
+  // Add span-sampling-specific tags to the span.
+  span.metrics[span_sampling_mechanism] = int(SamplingMechanism::SpanRule);
+  span.metrics[span_sampling_rule_rate] = rule->config().sample_rate;
+  const double limit = rule->config().max_per_second;
+  if (!std::isnan(limit)) {
+    span.metrics[span_sampling_limit] = limit;
+  }
+}
+
 }  // namespace
 
 PendingTrace::PendingTrace(std::shared_ptr<const Logger> logger, uint64_t trace_id)
@@ -87,7 +109,7 @@ PendingTrace::PendingTrace(std::shared_ptr<const Logger> logger, uint64_t trace_
       all_spans(),
       sampling_priority(std::move(sampling_priority)) {}
 
-void PendingTrace::finish() {
+void PendingTrace::finish(SpanSampler* span_sampler) {
   // Apply changes to spans, in particular treating the root / local-root
   // span as special.
   for (const auto& span : *finished_spans) {
@@ -97,6 +119,15 @@ void PendingTrace::finish() {
       finish_span(*this, *span);
     }
   }
+
+  // If we have span sampling rules and are dropping the trace, see if any
+  // span sampling tags need to be added.
+  if (span_sampler && !span_sampler->rules().empty() && sampling_priority &&
+      int(*sampling_priority) <= 0) {
+    for (const auto& span : *finished_spans) {
+      apply_span_sampling(*span_sampler, *span);
+    }
+  }
 }
 
 void PendingTrace::applySamplingDecisionToTraceTags() {
diff --git a/src/pending_trace.h b/src/pending_trace.h
index 87754654..dccf9eea 100644
--- a/src/pending_trace.h
+++ b/src/pending_trace.h
@@ -25,7 +25,12 @@ struct PendingTrace {
   PendingTrace(std::shared_ptr<const Logger> logger, uint64_t trace_id,
                std::unique_ptr<SamplingPriority> sampling_priority);
 
-  void finish();
+  // Modify span tags in order to prepare this trace for serialization.  Use
+  // the optionally specified `span_sampler` to identify spans to keep should
+  // this trace be dropped.  If `span_sampler` is `nullptr`, then no span
+  // sampling is performed.
+  void finish(SpanSampler *span_sampler = nullptr);
+
   // If this tracer did not inherit a sampling decision from an upstream
   // service, but instead made a sampling decision, then record that decision
   // in the "_dd.p.dm" member of `trace_tags`.
diff --git a/src/sample.cpp b/src/sample.cpp
index caee98dc..01ab5d13 100644
--- a/src/sample.cpp
+++ b/src/sample.cpp
@@ -1,7 +1,15 @@
 #include "sample.h"
 
+#include <algorithm>
+#include <cmath>
+#include <limits>
 #include <sstream>
 
+#include "clock.h"
+#include "glob.h"
+#include "logger.h"
+#include "span.h"
+
 namespace datadog {
 namespace opentracing {
 
@@ -135,5 +143,143 @@ RuleResult RulesSampler::match(const std::string& service, const std::string& na
 
 void RulesSampler::updatePrioritySampler(json config) { priority_sampler_.configure(config); }
 
+SpanSampler::Rule::Config::Config()
+    : service_pattern("*"),
+      operation_name_pattern("*"),
+      sample_rate(1.0),
+      max_per_second(std::nan("")),
+      text() {}
+
+SpanSampler::Rule::Rule(const SpanSampler::Rule::Config& config, TimeProvider clock)
+    : config_(config) {
+  if (!std::isnan(config.max_per_second)) {
+    limiter_ = std::make_unique<Limiter>(clock, config.max_per_second);
+  }
+}
+
+bool SpanSampler::Rule::match(const SpanData& span) const {
+  const auto is_match = [](const std::string& pattern, const std::string& subject) {
+    // Since "*" is the default pattern, optimize for that case.
+    return pattern == "*" || glob_match(pattern, subject);
+  };
+
+  return is_match(config_.service_pattern, span.service) &&
+         is_match(config_.operation_name_pattern, span.name);
+}
+
+bool SpanSampler::Rule::sample(const SpanData& span) { return roll(span) && allow(); }
+
+bool SpanSampler::Rule::roll(const SpanData& span) const {
+  const uint64_t max_hash = maxIdFromSampleRate(config_.sample_rate);
+  // Use the span ID (not the trace ID), so that rolls can differ among spans
+  // within the same trace (given the same sample rate).
+  const uint64_t hashed_id = span.span_id * constant_rate_hash_factor;
+  return hashed_id < max_hash;
+}
+
+bool SpanSampler::Rule::allow() {
+  if (!limiter_) {
+    return true;
+  }
+
+  return limiter_->allow().allowed;
+}
+
+const SpanSampler::Rule::Config& SpanSampler::Rule::config() const { return config_; }
+
+void SpanSampler::configure(ot::string_view raw_json, const Logger& logger, TimeProvider clock) {
+  rules_.clear();
+
+  // `raw_json` is expected to be a JSON array of objects, where each object
+  // configures a `SpanSampler::Rule`.
+  try {
+    const auto log_invalid_json = [&](const std::string& description, const json& object) {
+      logger.Log(LogLevel::error, description + ": " + object.dump());
+    };
+
+    const json config_json = json::parse(raw_json);
+
+    for (const auto& item : config_json.items()) {
+      const auto rule_json = item.value();
+
+      if (!rule_json.is_object()) {
+        log_invalid_json("span sampler: unexpected element type in rules array", rule_json);
+        continue;
+      }
+
+      // Default values are enforced by the `Rule::Config` constructor.
+      Rule::Config config;
+
+      if (rule_json.contains("service")) {
+        if (!rule_json.at("service").is_string()) {
+          log_invalid_json("span sampler: invalid type for 'service' (expected string)",
+                           rule_json);
+          continue;
+        }
+        config.service_pattern = rule_json.at("service").get<std::string>();
+      }
+
+      if (rule_json.contains("name")) {
+        if (!rule_json.at("name").is_string()) {
+          log_invalid_json("span sampler: invalid type for 'name' (expected string)", rule_json);
+          continue;
+        }
+        config.operation_name_pattern = rule_json.at("name").get<std::string>();
+      }
+
+      if (rule_json.contains("sample_rate")) {
+        if (!rule_json.at("sample_rate").is_number()) {
+          log_invalid_json("span sampler: invalid type for 'sample_rate' (expected number)",
+                           rule_json);
+          continue;
+        }
+        const double sample_rate = rule_json.at("sample_rate").get<json::number_float_t>();
+        if (!(sample_rate >= 0.0 && sample_rate <= 1.0)) {
+          log_invalid_json(
+              "span sampler: invalid value for 'sample_rate' (expected value between 0.0 and 1.0)",
+              rule_json);
+          continue;
+        }
+        config.sample_rate = sample_rate;
+      }
+
+      if (rule_json.contains("max_per_second")) {
+        if (!rule_json.at("max_per_second").is_number()) {
+          log_invalid_json("span sampler: invalid type for 'max_per_second' (expected number)",
+                           rule_json);
+          continue;
+        }
+        const double max_per_second = rule_json.at("max_per_second").get<json::number_float_t>();
+        if (max_per_second <= 0) {
+          log_invalid_json(
+              "span sampler: invalid value for 'max_per_second' (expected positive value)",
+              rule_json);
+          continue;
+        }
+        config.max_per_second = max_per_second;
+      }
+
+      config.text = rule_json.dump();
+      rules_.emplace_back(config, clock);
+    }
+  } catch (const json::parse_error& error) {
+    std::string message;
+    message += "span sampler: unable to parse JSON config: ";
+    message += error.what();
+    logger.Log(LogLevel::error, message);
+  }
+}
+
+SpanSampler::Rule* SpanSampler::match(const SpanData& span) {
+  const auto found = std::find_if(rules_.begin(), rules_.end(),
+                                  [&](const Rule& rule) { return rule.match(span); });
+  if (found == rules_.end()) {
+    return nullptr;
+  }
+  return &*found;
+}
+
+const std::vector<SpanSampler::Rule>& SpanSampler::rules() const { return rules_; }
+
 }  // namespace opentracing
 }  // namespace datadog
diff --git a/src/sample.h b/src/sample.h
index 5bfeed35..0439c4c6 100644
--- a/src/sample.h
+++ b/src/sample.h
@@ -13,6 +13,7 @@
 #include <iostream>
 #include <limits>
 #include <map>
+#include <memory>
 #include <mutex>
 #include <nlohmann/json.hpp>
 
@@ -86,6 +87,88 @@ class RulesSampler {
   PrioritySampler priority_sampler_;
 };
 
+class Logger;
+struct SpanData;
+
+// `SpanSampler` is consulted for each span, but only after another sampler has
+// decided that the _trace_ will be dropped (i.e. sampling priority <= 0).
+// Span sampling might select individual spans to be kept anyway, based on
+// separately configured rules (`DD_SPAN_SAMPLING_RULES`).
+//
+// Configure `SpanSampling` by calling the `configure` member function.  Then,
+// see if a span matches one of the configured rules by calling the `match`
+// member function.  If the span matched a rule, then a pointer the rule is
+// returned.  The rule's `sample` member function then determines whether the
+// span is kept on account of the rule.
+class SpanSampler {
+ public:
+  // `Rule` contains the configuration parsed from a span sampling rule, as
+  // well as the associated rate limiter if so configured.
+  class Rule {
+   public:
+    // `Config` contains the parsed configuration for a span sampling rule, as
+    // well as the original text of the JSON configuration particular to the
+    // rule.
+    struct Config {
+      std::string service_pattern;         // glob pattern
+      std::string operation_name_pattern;  // glob pattern
+      double sample_rate;                  // never NaN
+      double max_per_second;               // NaN if there is no max
+      std::string text;                    // as the rule appeared in the JSON array
+
+      Config();
+    };
+
+   private:
+    Config config_;
+    std::unique_ptr<Limiter> limiter_;
+
+   public:
+    // Create a rule having the specified 'config'.  If `config` contains a
+    // non-NaN `max_per_second`, then configure the rule's limiter accordingly
+    // with the specified `clock`.
+    explicit Rule(const Config& config, TimeProvider clock);
+
+    // Return whether the specified `span` matches this rule.
+    bool match(const SpanData& span) const;
+
+    // Without checking whether the specified `span` matches this rule, return
+    // whether `span` is kept by the rule.
+    bool sample(const SpanData& span);
+
+    // Return this rule's configuration.
+    const Config& config() const;
+
+   private:
+    // Without checking whether the specified `span` matches this rule, and
+    // without consulting the limiter, return whether `span` is kept on account
+    // of this rule.
+    bool roll(const SpanData& span) const;
+
+    // Return whether another span is permitted past this rule's rate limiter.
+    // If there is no rate limiter associated with this rule, then this
+    // function always returns `true`.
+    bool allow();
+  };
+
+ private:
+  std::vector<Rule> rules_;
+
+ public:
+  // Overwrite this sampler's rules with those parsed from the specified
+  // `raw_json` configuration text.  Use the specified `clock` for rate
+  // limiting.  If an error occurs, skip the offending rule and emit a
+  // diagnostic using the specified `logger`.
+  void configure(ot::string_view raw_json, const Logger& logger, TimeProvider clock);
+
+  // Return a pointer to the first rule that the specified `span` matches, or
+  // return `nullptr` if `span` does not match any configured rule.
+  Rule* match(const SpanData& span);
+
+  // Return this sampler's rules.
+  const std::vector<Rule>& rules() const;
+};
+
 }  // namespace opentracing
 }  // namespace datadog
 
diff --git a/src/sampling_mechanism.h b/src/sampling_mechanism.h
index f0cfc293..eec501b5 100644
--- a/src/sampling_mechanism.h
+++ b/src/sampling_mechanism.h
@@ -59,7 +59,12 @@ enum class SamplingMechanism {
   // Reserved for future use.
   AppSec = 5,
   // Reserved for future use.
-  RemoteRateUserDefined = 6
+  RemoteRateUserDefined = 6,
+  // Reserved for future use.
+  RemoteRateEmergency = 7,
+  // Individual span kept by a matching span sampling rule when the enclosing
+  // trace was dropped.
+  SpanRule = 8,
 };
 
 // `OptionalSamplingMechanism` is either a `SamplingMechanism` or "empty,"
diff --git a/src/span_buffer.cpp b/src/span_buffer.cpp
index bd72ef5e..073e71b8 100644
--- a/src/span_buffer.cpp
+++ b/src/span_buffer.cpp
@@ -9,8 +9,13 @@ namespace datadog {
 namespace opentracing {
 
 SpanBuffer::SpanBuffer(std::shared_ptr<const Logger> logger, std::shared_ptr<Writer> writer,
-                       std::shared_ptr<RulesSampler> sampler, SpanBufferOptions options)
-    : logger_(logger), writer_(writer), sampler_(sampler), options_(options) {}
+                       std::shared_ptr<RulesSampler> trace_sampler,
+                       std::shared_ptr<SpanSampler> span_sampler, SpanBufferOptions options)
+    : logger_(logger),
+      writer_(writer),
+      trace_sampler_(trace_sampler),
+      span_sampler_(span_sampler),
+      options_(options) {}
 
 void SpanBuffer::registerSpan(const SpanContext& context) {
   std::lock_guard<std::mutex> lock_guard{mutex_};
@@ -52,7 +57,7 @@ void SpanBuffer::finishSpan(std::unique_ptr<SpanData> span) {
   trace.finished_spans->push_back(std::move(span));
   if (trace.finished_spans->size() == trace.all_spans.size()) {
     generateSamplingPriorityImpl(trace.finished_spans->back().get());
-    trace.finish();
+    trace.finish(span_sampler_.get());
     unbufferAndWriteTrace(trace_id);
   }
 }
@@ -171,7 +176,8 @@ OptionalSamplingPriority SpanBuffer::generateSamplingPriorityImpl(const SpanData
 
   // Consult the sampler for a decision, save the decision, and then return the
   // saved decision.
-  auto sampler_result = sampler_->sample(span->env(), span->service, span->name, span->trace_id);
+  auto sampler_result =
+      trace_sampler_->sample(span->env(), span->service, span->name, span->trace_id);
   setSamplerResult(span->trace_id, sampler_result);
   setSamplingPriorityFromSampler(span->trace_id, sampler_result);
   return getSamplingPriorityImpl(span->trace_id);
diff --git a/src/span_buffer.h b/src/span_buffer.h
index 3f0be003..bb942e08 100644
--- a/src/span_buffer.h
+++ b/src/span_buffer.h
@@ -18,6 +18,7 @@ namespace opentracing {
 
 class Writer;
 class SpanContext;
+class SpanSampler;
 
 struct SpanBufferOptions {
   bool enabled = true;
@@ -32,8 +33,20 @@ struct SpanBufferOptions {
 // traces to a Writer.
 class SpanBuffer {
  public:
+  // Create a span buffer that:
+  //
+  // - uses the specified `logger` to log diagnostics,
+  // - uses the specified `writer` to output completed trace segments,
+  // - uses the specified `trace_sampler` to make decisions about whether to keep traces,
+  // - uses the specified `span_sampler` to make decisions about whether to keep spans when a trace
+  //   is dropped,
+  // - is configured using the specified `options`.
+  //
+  // If `span_sampler` is `nullptr`, then span sampling is disabled (but
+  // `trace_sampler` is still consulted for trace sampling decisions).
   SpanBuffer(std::shared_ptr<const Logger> logger, std::shared_ptr<Writer> writer,
-             std::shared_ptr<RulesSampler> sampler, SpanBufferOptions options);
+             std::shared_ptr<RulesSampler> trace_sampler,
+             std::shared_ptr<SpanSampler> span_sampler, SpanBufferOptions options);
   virtual ~SpanBuffer() = default;
 
   void registerSpan(const SpanContext& context);
@@ -109,7 +122,8 @@ class SpanBuffer {
   std::shared_ptr<const Logger> logger_;
   std::shared_ptr<Writer> writer_;
   mutable std::mutex mutex_;
-  std::shared_ptr<RulesSampler> sampler_;
+  std::shared_ptr<RulesSampler> trace_sampler_;
+  std::shared_ptr<SpanSampler> span_sampler_;
 
  protected:
   // Exists to make it easy for a subclass (ie, our testing mock) to override on-trace-finish
diff --git a/src/tracer.cpp b/src/tracer.cpp
index f2527c8e..dfd81d6e 100644
--- a/src/tracer.cpp
+++ b/src/tracer.cpp
@@ -11,8 +11,8 @@
 #include <unistd.h>
 #endif
 
+#include <cassert>
 #include <cmath>
-#include <fstream>
 #include <random>
 #include <sstream>
 
@@ -137,7 +137,7 @@ uint64_t traceTagsPropagationMaxLength(const TracerOptions &options, const Logge
 void Tracer::configureRulesSampler(std::shared_ptr<RulesSampler> sampler) noexcept {
   try {
     auto log_invalid_json = [&](const std::string &description, json &object) {
-      logger_->Log(LogLevel::info, description + ": " + object.get<std::string>());
+      logger_->Log(LogLevel::error, description + ": " + object.dump());
     };
     json config = json::parse(opts_.sampling_rules);
     for (auto &item : config.items()) {
@@ -160,6 +160,7 @@ void Tracer::configureRulesSampler(std::shared_ptr<RulesSampler> sampler) noexce
         log_invalid_json(
             "rules sampler: invalid value for sample rate (expected value between 0.0 and 1.0)",
             rule);
+        continue;
       }
       // "service" and "name" are optional
       bool has_service = rule.contains("service") && rule.at("service").is_string();
@@ -223,16 +224,19 @@ Tracer::Tracer(TracerOptions options, std::shared_ptr<SpanBuffer> buffer, TimePr
       legacy_obfuscation_(legacyObfuscationEnabled()) {}
 
 Tracer::Tracer(TracerOptions options, std::shared_ptr<Writer> writer,
-               std::shared_ptr<RulesSampler> sampler, std::shared_ptr<const Logger> logger)
+               std::shared_ptr<RulesSampler> trace_sampler, std::shared_ptr<const Logger> logger)
     : logger_(logger),
       opts_(options),
       get_time_(getRealTime),
       get_id_(getId),
       legacy_obfuscation_(legacyObfuscationEnabled()) {
-  configureRulesSampler(sampler);
+  assert(logger_);
+  configureRulesSampler(trace_sampler);
+  auto span_sampler = std::make_shared<SpanSampler>();
+  span_sampler->configure(opts_.span_sampling_rules, *logger_, get_time_);
   startupLog(options);
   buffer_ = std::make_shared<SpanBuffer>(
-      logger_, writer, sampler,
+      logger_, writer, trace_sampler, span_sampler,
       SpanBufferOptions{isEnabled(), reportingHostname(options), analyticsRate(options),
                         options.service, traceTagsPropagationMaxLength(options, *logger_)});
 }
@@ -285,8 +289,9 @@ std::unique_ptr<ot::Span> Tracer::StartSpanWithOptions(ot::string_view operation
     span->SetTag(tag.first, tag.second);
   }
   return span;
-} catch (const std::bad_alloc &) {
-  // At least don't crash.
+} catch (const std::exception &error) {
+  logger_->Log(LogLevel::error,
+               std::string("Unexpected error in StartSpanWithOptions: ") + error.what());
   return nullptr;
 }
 
diff --git a/src/tracer_factory.cpp b/src/tracer_factory.cpp
index 5c899e0d..09f0122c 100644
--- a/src/tracer_factory.cpp
+++ b/src/tracer_factory.cpp
@@ -93,6 +93,9 @@ ot::expected<TracerOptions> optionsFromConfig(const char *configuration,
     if (config.find("tags_header_size") != config.end()) {
       config.at("tags_header_size").get_to(options.tags_header_size);
     }
+    if (config.find("span_sampling_rules") != config.end()) {
+      options.span_sampling_rules = config.at("span_sampling_rules").dump();
+    }
   } catch (const nlohmann::detail::type_error &) {
     error_message = "configuration has an argument with an incorrect type";
     return ot::make_unexpected(std::make_error_code(std::errc::invalid_argument));
diff --git a/src/tracer_options.cpp b/src/tracer_options.cpp
index da44940e..6ce0c615 100644
--- a/src/tracer_options.cpp
+++ b/src/tracer_options.cpp
@@ -4,6 +4,7 @@
 #include <datadog/version.h>
 #include <opentracing/ext/tags.h>
 
+#include <fstream>
 #include <iomanip>
 #include <limits>
 #include <nlohmann/json.hpp>
@@ -11,6 +12,7 @@
 #include <sstream>
 
 #include "bool.h"
+#include "logger.h"
 
 namespace ot = opentracing;
 
@@ -114,6 +116,59 @@ ot::expected<double, std::string> parseDouble(const std::string &text, double mi
   return ot::make_unexpected("not within the range of a double: " + text);
 }
 
+// Update `options.span_sampling_rules` if there are relevant environment
+// variables defined.  If an error occurs, log a diagnostic via
+// `options.log_func`.
+void applySpanSamplingRulesFromEnvironment(TracerOptions &options) {
+  // Prefer DD_SPAN_SAMPLING_RULES, if present.
+  // Next, prefer DD_SPAN_SAMPLING_RULES_FILE.
+  // If both are specified, log an error and use DD_SPAN_SAMPLING_RULES.
+  // If neither are specified, use `options.span_sampling_rules`.
+
+  const auto logger_ptr = makeLogger(options);
+  const auto &logger = *logger_ptr;
+
+  const char *const span_rules = std::getenv("DD_SPAN_SAMPLING_RULES");
+  const char *const span_rules_file = std::getenv("DD_SPAN_SAMPLING_RULES_FILE");
+  if (span_rules) {
+    options.span_sampling_rules = span_rules;
+    if (span_rules_file) {
+      logger.Log(LogLevel::error,
+                 "Both DD_SPAN_SAMPLING_RULES and DD_SPAN_SAMPLING_RULES_FILE have values in the "
+                 "environment.  DD_SPAN_SAMPLING_RULES will be used, and "
+                 "DD_SPAN_SAMPLING_RULES_FILE will be ignored.");
+    }
+    return;
+  }
+
+  if (span_rules_file) {
+    const auto log_file_error = [&](const char *operation) {
+      std::string message;
+      message += "Unable to ";
+      message += operation;
+      message += " file \"";
+      message += span_rules_file;
+      message += "\" specified as value of environment variable DD_SPAN_SAMPLING_RULES_FILE.";
+      logger.Log(LogLevel::error, message);
+    };
+
+    std::ifstream file(span_rules_file);
+    if (!file) {
+      log_file_error("open");
+      return;
+    }
+
+    std::stringstream span_rules;
+    span_rules << file.rdbuf();
+    if (!file) {
+      log_file_error("read");
+      return;
+    }
+
+    options.span_sampling_rules = span_rules.str();
+  }
+}
+
 }  // namespace
 
 ot::expected<std::set<PropagationStyle>> asPropagationStyle(
@@ -273,6 +328,8 @@ ot::expected<TracerOptions, std::string> applyTracerOptionsFromEnvironment(
     opts.sample_rate = maybe_value.value();
   }
 
+  applySpanSamplingRulesFromEnvironment(opts);
+
   return opts;
 }
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 42592164..67d0e22b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -24,3 +24,4 @@ _datadog_test(tracer_options_test tracer_options_test.cpp)
 _datadog_test(tracer_test tracer_test.cpp)
 _datadog_test(limiter_test limiter_test.cpp)
 _datadog_test(logger_test logger_test.cpp)
+_datadog_test(glob_test glob_test.cpp)
diff --git a/test/glob_test.cpp b/test/glob_test.cpp
new file mode 100644
index 00000000..973a9322
--- /dev/null
+++ b/test/glob_test.cpp
@@ -0,0 +1,52 @@
+// This test covers the glob-style string pattern matching function,
+// `glob_match`, defined in `glob.h`.
+
+#include "../src/glob.h"
+
+#include <catch2/catch.hpp>
+
+using namespace datadog::opentracing;
+
+TEST_CASE("glob") {
+  struct TestCase {
+    ot::string_view pattern;
+    ot::string_view subject;
+    bool expected;
+  };
+
+  auto test_case = GENERATE(values<TestCase>(
+      {// clang-format off
+    // from the reference implementation
+    // https://github.com/DataDog/tag-matching-sampling-rules/blob/master/glob.mjs
+    {"foo", "foo", true},
+    {"foo.*", "foo.you", true},
+    {"foo.*", "snafoo.", false},
+    {"hi*there", "hithere", true},
+    {"*stuff", "lots of stuff", true},
+    {"*stuff", "stuff to think about", false},
+    {"*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaax", false},
+    {"*a*a*a*a*a*a", "aaaaaaaarrrrrrraaaraaarararaarararaarararaaa", true},
+
+    // from deliberation with Zach Groves
+    {"aok*", "aok**", true},
+
+    // question marks
+    {"mysql??", "mysql01", true},
+    {"mysql??", "mysql1x", true},
+    {"n?-ingress-*", "ny-ingress-backup", true},
+    {"n?-ingress-*", "nj-ingress-leader", true},
+    {"n?-ingress-*", "nj-ingress", false},
+
+    // edge cases
+    {"", "", true},
+    {"", "a", false},
+    {"*", "", true},
+    {"?", "", false}
+  }));
+  // clang-format on
+
+  CAPTURE(test_case.pattern);
+  CAPTURE(test_case.subject);
+  CAPTURE(test_case.expected);
+  REQUIRE(glob_match(test_case.pattern, test_case.subject) == test_case.expected);
+}
diff --git a/test/mocks.h b/test/mocks.h
index fef2735f..ba3d5537 100644
--- a/test/mocks.h
+++ b/test/mocks.h
@@ -219,14 +219,15 @@ struct MockWriter : public Writer {
 struct MockBuffer : public SpanBuffer {
   MockBuffer()
       : SpanBuffer(std::make_shared<MockLogger>(), nullptr, std::make_shared<RulesSampler>(),
-                   SpanBufferOptions{}){};
+                   nullptr, SpanBufferOptions{}){};
   explicit MockBuffer(std::shared_ptr<RulesSampler> sampler)
-      : SpanBuffer(std::make_shared<MockLogger>(), nullptr, sampler, SpanBufferOptions{}){};
+      : SpanBuffer(std::make_shared<MockLogger>(), nullptr, sampler, nullptr,
+                   SpanBufferOptions{}){};
   // This constructor overload is provided for tests where the service name is
   // relevant.
   MockBuffer(std::shared_ptr<RulesSampler> sampler, std::string service,
              uint64_t tags_header_size = 512)
-      : SpanBuffer(std::make_shared<MockLogger>(), nullptr, sampler,
+      : SpanBuffer(std::make_shared<MockLogger>(), nullptr, sampler, nullptr,
                    SpanBufferOptions{true, "localhost", std::nan(""), service, tags_header_size}) {
   }
 
diff --git a/test/propagation_test.cpp b/test/propagation_test.cpp
index 9affd582..ebc51497 100644
--- a/test/propagation_test.cpp
+++ b/test/propagation_test.cpp
@@ -419,7 +419,8 @@ TEST_CASE("sampling behaviour") {
   auto logger = std::make_shared<MockLogger>();
   auto sampler = std::make_shared<MockRulesSampler>();
   auto writer = std::make_shared<MockWriter>(sampler);
-  auto buffer = std::make_shared<SpanBuffer>(logger, writer, sampler, SpanBufferOptions{});
+  auto buffer =
+      std::make_shared<SpanBuffer>(logger, writer, sampler, nullptr, SpanBufferOptions{});
   TracerOptions tracer_options{"", 0, "service_name", "web"};
   std::shared_ptr<Tracer> tracer{new Tracer{tracer_options, buffer, getRealTime, getId}};
   ot::Tracer::InitGlobal(tracer);
@@ -633,7 +634,8 @@ TEST_CASE("force tracing behaviour") {
   auto logger = std::make_shared<MockLogger>();
   auto sampler = std::make_shared<MockRulesSampler>();
   auto writer = std::make_shared<MockWriter>(sampler);
-  auto buffer = std::make_shared<SpanBuffer>(logger, writer, sampler, SpanBufferOptions{});
+  auto buffer =
+      std::make_shared<SpanBuffer>(logger, writer, sampler, nullptr, SpanBufferOptions{});
   TracerOptions tracer_options{"", 0, "service_name", "web"};
   std::shared_ptr<Tracer> tracer{new Tracer{tracer_options, buffer, getRealTime, getId}};
   ot::Tracer::InitGlobal(tracer);
@@ -788,7 +790,6 @@ TEST_CASE("propagated Datadog tags (x-datadog-tags)") {
         std::cerr << "Exception thrown in test: " << error.what() << '\n';
       }
     }
-
     SECTION("including a 'decision maker' for us, if we make the sampling decision") {
       const std::string serialized_tags = "_dd.p.hello=world";
       // Our sampler will make a decision.
diff --git a/test/sample_test.cpp b/test/sample_test.cpp
index 114f1956..53c93c03 100644
--- a/test/sample_test.cpp
+++ b/test/sample_test.cpp
@@ -1,13 +1,16 @@
 #include "../src/sample.h"
 
+#include <algorithm>
 #include <catch2/catch.hpp>
 #include <ctime>
+#include <nlohmann/json.hpp>
 
 #include "../src/agent_writer.h"
 #include "../src/span.h"
 #include "../src/tracer.h"
 #include "mocks.h"
 using namespace datadog::opentracing;
+using json = nlohmann::json;
 
 TEST_CASE("priority sampler unit test") {
   PrioritySampler sampler;
@@ -305,8 +308,446 @@ TEST_CASE("rules sampler") {
     const auto tag_found = span_data.meta.find("_dd.p.dm");
     REQUIRE(tag_found != span_data.meta.end());
     const std::string& decision_maker = tag_found->second;
-
     const std::string expected = "-" + std::to_string(int(SamplingMechanism::Rule));
     REQUIRE(decision_maker == expected);
   }
 }
+
+TEST_CASE("SpanSampler rule parsing") {
+  MockLogger logger;
+  const auto dummy_clock = []() { return TimePoint(); };
+  SpanSampler sampler;
+
+  SECTION("empty array means no rules") {
+    sampler.configure("[]", logger, dummy_clock);
+    REQUIRE(sampler.rules().size() == 0);
+    REQUIRE(logger.records.size() == 0);
+  }
+
+  SECTION("default values for rule properties") {
+    sampler.configure("[{}]", logger, dummy_clock);
+    REQUIRE(sampler.rules().size() == 1);
+    REQUIRE(logger.records.size() == 0);
+
+    const SpanSampler::Rule::Config& config = sampler.rules().front().config();
+    CAPTURE(config.max_per_second);
+    REQUIRE(std::isnan(config.max_per_second));
+    REQUIRE(config.operation_name_pattern == "*");
+    REQUIRE(config.sample_rate == 1.0);
+    REQUIRE(config.service_pattern == "*");
+    REQUIRE(config.text == "{}");
+  }
+
+  SECTION("valid values for rule properties") {
+    const std::string rule_json = R"json({
+      "service": "foosvc",
+      "name": "handle.thing",
+      "sample_rate": 0.1,
+      "max_per_second": 1000
+    })json";
+    sampler.configure("[" + rule_json + "]", logger, dummy_clock);
+    REQUIRE(sampler.rules().size() == 1);
+    REQUIRE(logger.records.size() == 0);
+
+    const SpanSampler::Rule::Config& config = sampler.rules().front().config();
+    REQUIRE(config.max_per_second == 1000);
+    REQUIRE(config.operation_name_pattern == "handle.thing");
+    REQUIRE(config.sample_rate == 0.1);
+    REQUIRE(config.service_pattern == "foosvc");
+    REQUIRE(json::parse(config.text) == json::parse(rule_json));
+  }
+
+  SECTION("invalid JSON yields no rules and logs error") {
+    auto bad_json = GENERATE(as<ot::string_view>{}, "this is not json", "[{'neither': 'is this'}]",
+                             "[{}, {4}, {}]");
+
+    CAPTURE(bad_json);
+    sampler.configure(bad_json, logger, dummy_clock);
+    REQUIRE(sampler.rules().size() == 0);
+    REQUIRE(logger.records.size() == 1);
+    const auto& log_record = logger.records.front();
+    REQUIRE(log_record.level == LogLevel::error);
+    CAPTURE(log_record.message);
+    REQUIRE(log_record.message.find("JSON") != std::string::npos);
+  }
+
+  SECTION("invalid rules are skipped and log error") {
+    struct TestCase {
+      ot::string_view rules_json;
+      unsigned expected_rule_count;
+      ot::string_view expected_error_excerpt;
+    };
+
+    auto test_case = GENERATE(values<TestCase>(
+        {// "sample_rate" has the wrong type
+         {R"json([{"sample_rate": "foo"}, {}, {}, {}])json", 3, "sample_rate"},
+         // "sample_rate" is out of range
+         {R"json([{"sample_rate": 1.2}, {}, {}, {}])json", 3, "sample_rate"},
+         // "max_per_second" has the wrong type
+         {R"json([{}, {"max_per_second": null}, {}, {}])json", 3, "max_per_second"},
+         // "max_per_second" is out of range
+         {R"json([{}, {"max_per_second": 0}, {}, {}])json", 3, "max_per_second"},
+         // "service" has the wrong type
+         {R"json([{}, {}, {"service": 10}, {}])json", 3, "service"},
+         // "name" has the wrong type
+         {R"json([{}, {}, {}, {"name": false}])json", 3, "name"}}));
+
+    CAPTURE(test_case.rules_json);
+    sampler.configure(test_case.rules_json, logger, dummy_clock);
+
+    REQUIRE(sampler.rules().size() == test_case.expected_rule_count);
+
+    REQUIRE(logger.records.size() == 1);
+    const auto& log_record = logger.records.front();
+    CAPTURE(log_record.message);
+    REQUIRE(log_record.level == LogLevel::error);
+
+    CAPTURE(test_case.expected_error_excerpt);
+    REQUIRE(log_record.message.find(test_case.expected_error_excerpt) != std::string::npos);
+  }
+}
+
+namespace {
+
+// `join({"x", "y", "z"}, " -> ") == "x -> y -> z"`
+std::string join(const std::vector<std::string>& pieces, ot::string_view separator) {
+  std::string result;
+  auto iter = pieces.begin();
+  const auto end = pieces.end();
+  if (iter != end) {
+    result += *iter;
+    for (++iter; iter != end; ++iter) {
+      result.append(separator.data(), separator.size());
+      result += *iter;
+    }
+  }
+  return result;
+}
+
+}  // namespace
+
+TEST_CASE("SpanSampler matching") {
+  MockLogger logger;
+  const auto dummy_clock = []() { return TimePoint(); };
+  SpanSampler sampler;
+
+  const std::vector<std::string> json_rules = {
+      R"json({"service": "mysql", "name": "exec.*", "sample_rate": 1.0})json",
+      R"json({"service": "mysql*", "sample_rate": 0.1})json",
+      R"json({"name": "super.auth", "sample_rate": 1.0})json",
+      R"json({"name": "super.auth??", "sample_rate": 1.0})json",
+  };
+
+  sampler.configure("[" + join(json_rules, ", ") + "]", logger, dummy_clock);
+  REQUIRE(logger.records.size() == 0);
+  REQUIRE(sampler.rules().size() == json_rules.size());
+
+  SECTION("span can match multiple rules, but the first matching rule is chosen") {
+    SpanData span;
+    span.service = "mysql";
+    span.name = "exec.query";
+    // `span` matches both `json_rules[0]` and `json_rules[1]`, but the earlier
+    // rule will be chosen (`json_rules[0]`).
+
+    // First check that two rules could match.
+    const unsigned match_count =
+        std::count_if(sampler.rules().begin(), sampler.rules().end(),
+                      [&](const SpanSampler::Rule& rule) { return rule.match(span); });
+    REQUIRE(match_count == 2);
+
+    // Then check that the first one is chosen.
+    SpanSampler::Rule* rule = sampler.match(span);
+    REQUIRE(rule == &sampler.rules()[0]);
+  }
+
+  SECTION("no match") {
+    SpanData span;
+    span.service = "table";
+    span.name = "check.please";
+    REQUIRE(sampler.match(span) == nullptr);
+  }
+
+  SECTION("match by service name") {
+    SpanData span;
+    span.service = "mysql123";
+    span.name = "cache.lookup";
+    // matches `json_rules[1]`
+    REQUIRE(sampler.match(span) == &sampler.rules()[1]);
+  }
+
+  SECTION("match by operation name") {
+    SpanData span;
+    span.service = "langley";
+    span.name = "super.auth";
+    // matches `json_rules[2]`
+    REQUIRE(sampler.match(span) == &sampler.rules()[2]);
+  }
+
+  SECTION("match by service name and operation name") {
+    SpanData span;
+    span.service = "mysql";
+    span.name = "exec.query";
+    // matches `json_rules[0]` (as before)
+    SpanSampler::Rule* rule = sampler.match(span);
+    REQUIRE(rule == &sampler.rules()[0]);
+  }
+
+  SECTION("match involving question marks") {
+    SpanData span;
+    span.service = "roswell";
+    span.name = "super.auth51";
+    // matches `json_rules[3]` (not `json_rules[2]`)
+    REQUIRE(sampler.match(span) == &sampler.rules()[3]);
+  }
+}
+
+TEST_CASE("SpanSampler sampling") {
+  // Starting calendar time 2022-07-01 00:00:00 local time
+  std::tm start{};
+  start.tm_year = 122;
+  start.tm_mon = 7;
+  start.tm_mday = 1;
+  TimePoint now{std::chrono::system_clock::from_time_t(std::mktime(&start)),
+                std::chrono::steady_clock::time_point{}};
+  // Note: Use `advanceTime(now, ...)` to advance the clock.
+  const auto clock = [&now]() { return now; };
+  std::uint64_t next_id = 1;
+  const auto make_id = [&next_id]() { return next_id++; };
+  const auto trace_sampler = std::make_shared<MockRulesSampler>();
+  const auto writer = std::make_shared<MockWriter>(trace_sampler);
+  const auto span_sampler = std::make_shared<SpanSampler>();
+  const auto logger = std::make_shared<MockLogger>();
+  const auto span_buffer = std::make_shared<SpanBuffer>(logger, writer, trace_sampler,
+                                                        span_sampler, SpanBufferOptions{});
+  TracerOptions tracer_options;
+  tracer_options.service = "foosvc";
+  const auto tracer =
+      std::make_shared<Tracer>(tracer_options, span_buffer, clock, make_id, logger);
+  const auto has_span_sampling_tag = [](const auto& span_ptr) {
+    const auto& numeric_tags = span_ptr->metrics;
+    return numeric_tags.count("_dd.span_sampling.mechanism") ||
+           numeric_tags.count("_dd.span_sampling.rule_rate") ||
+           numeric_tags.count("_dd.span_sampling.max_per_second");
+  };
+
+  SECTION("no span_sampling tags when there are no span sampling rules") {
+    // Make sure that the trace sampler is dropping the trace, otherwise we
+    // wouldn't expect the span sampling rules to matter.
+    trace_sampler->rule_rate = 0;
+    trace_sampler->sampling_mechanism = SamplingMechanism::Manual;
+    trace_sampler->sampling_priority =
+        std::make_unique<SamplingPriority>(SamplingPriority::UserDrop);
+
+    // We expect an empty array of rules to mean that span sampling won't
+    // happen.
+    span_sampler->configure("[]", *logger, clock);
+    {
+      const auto root = tracer->StartSpan("root");
+      REQUIRE(root);
+      advanceTime(now, std::chrono::milliseconds(8));
+      const auto child1 = tracer->StartSpan("child1", {ot::ChildOf(&root->context())});
+      REQUIRE(child1);
+      advanceTime(now, std::chrono::milliseconds(5));
+      const auto child2 = tracer->StartSpan("child2", {ot::ChildOf(&root->context())});
+      REQUIRE(child2);
+      advanceTime(now, std::chrono::milliseconds(31));
+      const auto grandchild = tracer->StartSpan("grandchild", {ot::ChildOf(&child1->context())});
+      REQUIRE(grandchild);
+      advanceTime(now, std::chrono::milliseconds(2));
+    }
+
+    REQUIRE(writer->traces.size() == 1);
+    const auto& trace = writer->traces.front();
+    REQUIRE(trace.size() == 4);
+
+    REQUIRE(std::none_of(trace.begin(), trace.end(), has_span_sampling_tag));
+  }
+
+  SECTION("no span_sampling tags when the trace is kept") {
+    // When the trace is kept, span sampling rules aren't consulted (even if
+    // they would match and keep spans).
+    trace_sampler->rule_rate = 1;
+    trace_sampler->sampling_mechanism = SamplingMechanism::Manual;
+    trace_sampler->sampling_priority =
+        std::make_unique<SamplingPriority>(SamplingPriority::UserKeep);
+
+    const auto rules_json = R"json([
+      {"service": "foosvc", "name": "grandchild"},
+      {"name": "child*"},
+      {"service": "foosvc", "max_per_second": 1000}
+    ])json";
+    span_sampler->configure(rules_json, *logger, clock);
+    {
+      const auto root = tracer->StartSpan("root");
+      REQUIRE(root);
+      advanceTime(now, std::chrono::milliseconds(8));
+      const auto child1 = tracer->StartSpan("child1", {ot::ChildOf(&root->context())});
+      REQUIRE(child1);
+      advanceTime(now, std::chrono::milliseconds(5));
+      const auto child2 = tracer->StartSpan("child2", {ot::ChildOf(&root->context())});
+      REQUIRE(child2);
+      advanceTime(now, std::chrono::milliseconds(31));
+      const auto grandchild = tracer->StartSpan("grandchild", {ot::ChildOf(&child1->context())});
+      REQUIRE(grandchild);
+      advanceTime(now, std::chrono::milliseconds(2));
+    }
+
+    REQUIRE(writer->traces.size() == 1);
+    const auto& trace = writer->traces.front();
+    REQUIRE(trace.size() == 4);
+
+    REQUIRE(std::none_of(trace.begin(), trace.end(), has_span_sampling_tag));
+  }
+
+  SECTION("expected span_sampling tags when the trace is dropped") {
+    // Make sure that the trace sampler is dropping the trace, otherwise we
+    // wouldn't expect the span sampling rules to matter.
+    trace_sampler->rule_rate = 0;
+    trace_sampler->sampling_mechanism = SamplingMechanism::Manual;
+    trace_sampler->sampling_priority =
+        std::make_unique<SamplingPriority>(SamplingPriority::UserDrop);
+
+    const auto rules_json = R"json([
+      {"service": "foosvc", "name": "grandchild", "max_per_second": 999},
+      {"name": "child*"},
+      {"service": "foosvc", "max_per_second": 1000}
+    ])json";
+    span_sampler->configure(rules_json, *logger, clock);
+    {
+      const auto root = tracer->StartSpan("root");
+      REQUIRE(root);
+      advanceTime(now, std::chrono::milliseconds(8));
+      const auto child1 = tracer->StartSpan("child1", {ot::ChildOf(&root->context())});
+      REQUIRE(child1);
+      advanceTime(now, std::chrono::milliseconds(5));
+      const auto child2 = tracer->StartSpan("child2", {ot::ChildOf(&root->context())});
+      REQUIRE(child2);
+      advanceTime(now, std::chrono::milliseconds(31));
+      const auto grandchild = tracer->StartSpan("grandchild", {ot::ChildOf(&child1->context())});
+      REQUIRE(grandchild);
+      advanceTime(now, std::chrono::milliseconds(2));
+    }
+
+    REQUIRE(writer->traces.size() == 1);
+    const auto& trace = writer->traces.front();
+    REQUIRE(trace.size() == 4);
+
+    for (const auto& span_ptr : trace) {
+      const auto& span = *span_ptr;
+      const auto& numeric_tags = span.metrics;
+      if (span.name == "root") {
+        // `root` matches the rule: {"service": "foosvc", "max_per_second": 1000}
+        REQUIRE(numeric_tags.count("_dd.span_sampling.mechanism") == 1);
+        REQUIRE(numeric_tags.at("_dd.span_sampling.mechanism") ==
+                int(SamplingMechanism::SpanRule));
+        REQUIRE(numeric_tags.count("_dd.span_sampling.rule_rate") == 1);
+        REQUIRE(numeric_tags.at("_dd.span_sampling.rule_rate") == 1.0);
+        REQUIRE(numeric_tags.count("_dd.span_sampling.max_per_second") == 1);
+        REQUIRE(numeric_tags.at("_dd.span_sampling.max_per_second") == 1000);
+      } else if (span.name == "child1" || span.name == "child2") {
+        // `child1` and `child2` match the rule: {"name": "child*"}
+        REQUIRE(numeric_tags.count("_dd.span_sampling.mechanism") == 1);
+        REQUIRE(numeric_tags.at("_dd.span_sampling.mechanism") ==
+                int(SamplingMechanism::SpanRule));
+        REQUIRE(numeric_tags.count("_dd.span_sampling.rule_rate") == 1);
+        REQUIRE(numeric_tags.at("_dd.span_sampling.rule_rate") == 1.0);
+        REQUIRE(numeric_tags.count("_dd.span_sampling.max_per_second") == 0);
+      } else {
+        REQUIRE(span.name == "grandchild");
+        // `grandchild` matches the rule: {"service": "foosvc", "name": "grandchild"}
+        REQUIRE(numeric_tags.count("_dd.span_sampling.mechanism") == 1);
+        REQUIRE(numeric_tags.at("_dd.span_sampling.mechanism") ==
+                int(SamplingMechanism::SpanRule));
+        REQUIRE(numeric_tags.count("_dd.span_sampling.rule_rate") == 1);
+        REQUIRE(numeric_tags.at("_dd.span_sampling.rule_rate") == 1.0);
+        REQUIRE(numeric_tags.count("_dd.span_sampling.max_per_second") == 1);
+        REQUIRE(numeric_tags.at("_dd.span_sampling.max_per_second") == 999);
+      }
+    }
+  }
+
+  SECTION("probabilistic sampling for span rules") {
+    // Make sure that the trace sampler is dropping the trace, otherwise we
+    // wouldn't expect the span sampling rules to matter.
+    trace_sampler->rule_rate = 0;
+    trace_sampler->sampling_mechanism = SamplingMechanism::Manual;
+    trace_sampler->sampling_priority =
+        std::make_unique<SamplingPriority>(SamplingPriority::UserDrop);
+
+    const auto rules_json = R"json([
+      {"name": "mysql.*", "sample_rate": 0.5}
+    ])json";
+    span_sampler->configure(rules_json, *logger, clock);
+    const int child_count = 10000;
+    {
+      const auto root = tracer->StartSpan("root");
+      REQUIRE(root);
+      advanceTime(now, std::chrono::milliseconds(8));
+      // Generate a lot of spans that match the rule, so that our 50%
+      // probability can be measured.
+      for (int i = 0; i < child_count; ++i) {
+        const auto child = tracer->StartSpan("mysql.query", {ot::ChildOf(&root->context())});
+        REQUIRE(child);
+        advanceTime(now, std::chrono::milliseconds(100));
+      }
+    }
+
+    REQUIRE(writer->traces.size() == 1);
+    const auto& trace = writer->traces.front();
+    REQUIRE(trace.size() == child_count + 1);
+
+    // `root` did not match any rule.
+    const auto root_iter = std::find_if(
+        trace.begin(), trace.end(), [](const auto& span_ptr) { return span_ptr->name == "root"; });
+    REQUIRE(root_iter != trace.end());
+    REQUIRE(!has_span_sampling_tag(*root_iter));
+
+    const int kept_children_count =
+        std::count_if(trace.begin(), trace.end(), has_span_sampling_tag);
+    // 50% of `child_count` would be 5000.  Let's say within 5% of that -> 5000 +/- 10.
+    REQUIRE(kept_children_count >= 5000 - 10);
+    REQUIRE(kept_children_count <= 5000 + 10);
+  }
+
+  SECTION("rate limiting for span rules") {
+    // Make sure that the trace sampler is dropping the trace, otherwise we
+    // wouldn't expect the span sampling rules to matter.
+    trace_sampler->rule_rate = 0;
+    trace_sampler->sampling_mechanism = SamplingMechanism::Manual;
+    trace_sampler->sampling_priority =
+        std::make_unique<SamplingPriority>(SamplingPriority::UserDrop);
+
+    const auto rules_json = R"json([
+      {"name": "mysql.*", "max_per_second": 10}
+    ])json";
+    span_sampler->configure(rules_json, *logger, clock);
+
+    // Each trace's 20 children will hit the limiter after 10.
+    // Then a second goes by, so the limiter recharges.
+    // We'd expect 10 kept per trace, or 1000 total.
+    const int num_traces = 100;
+    const int children_per_trace = 20;
+    const int milliseconds_between_traces = 1000;
+    {
+      for (int i = 0; i < num_traces; ++i) {
+        advanceTime(now, std::chrono::milliseconds(milliseconds_between_traces));
+        const auto root = tracer->StartSpan("root");
+        REQUIRE(root);
+        for (int j = 0; j < children_per_trace; ++j) {
+          const auto child = tracer->StartSpan("mysql.query", {ot::ChildOf(&root->context())});
+          REQUIRE(child);
+        }
+      }
+    }
+
+    REQUIRE(writer->traces.size() == num_traces);
+
+    int kept_spans_count = 0;
+    for (const auto& trace : writer->traces) {
+      kept_spans_count += std::count_if(trace.begin(), trace.end(), has_span_sampling_tag);
+    }
+
+    // 10 is the configured `max_per_second`.
+    REQUIRE(kept_spans_count == num_traces * 10);
+  }
+}
diff --git a/test/span_buffer_test.cpp b/test/span_buffer_test.cpp
index 61f5bf26..18a63138 100644
--- a/test/span_buffer_test.cpp
+++ b/test/span_buffer_test.cpp
@@ -10,7 +10,8 @@ TEST_CASE("span buffer") {
   auto logger = std::make_shared<MockLogger>();
   auto sampler = std::make_shared<RulesSampler>();
   auto writer = std::make_shared<MockWriter>(sampler);
-  auto buffer = std::make_shared<SpanBuffer>(logger, writer, sampler, SpanBufferOptions{});
+  auto buffer =
+      std::make_shared<SpanBuffer>(logger, writer, sampler, nullptr, SpanBufferOptions{});
 
   auto context_from_span = [](const TestSpanData& span) -> SpanContext {
     auto logger = std::make_shared<const MockLogger>();
diff --git a/test/tracer_factory_test.cpp b/test/tracer_factory_test.cpp
index 611e7c2a..9ebddc8a 100644
--- a/test/tracer_factory_test.cpp
+++ b/test/tracer_factory_test.cpp
@@ -3,12 +3,80 @@
 #include <datadog/tags.h>
 
 #include <catch2/catch.hpp>
+#include <cstdio>
+#include <fstream>
+#include <stdexcept>
 
 #include "../src/tracer.h"
 #include "mocks.h"
 
 using namespace datadog::opentracing;
 
+namespace {
+
+// `EnvGuard` sets an environment variable to a specified value for the scope
+// of the `EnvGuard`, restoring the environment variable's previous value
+// afterward.
+class EnvGuard {
+  std::string name_;
+  bool had_value_;
+  std::string old_value_;
+
+ public:
+  explicit EnvGuard(std::string name, const char *value) : name_(name) {
+    if (const char *const old_value = std::getenv(name.c_str())) {
+      had_value_ = true;
+      old_value_ = old_value;
+    } else {
+      had_value_ = false;
+    }
+
+    const bool overwrite = true;
+    ::setenv(name.c_str(), value, overwrite);
+  }
+
+  ~EnvGuard() {
+    if (had_value_) {
+      const bool overwrite = true;
+      ::setenv(name_.c_str(), old_value_.c_str(), overwrite);
+    } else {
+      ::unsetenv(name_.c_str());
+    }
+  }
+};
+
+// `TmpFile` creates a temporary file having the specified content, and deletes
+// the file at the end of the `TmpFile` scope.  A path to the temporary file is
+// accesible via the `name` member function.
+class TmpFile {
+  std::string name_;
+
+ public:
+  explicit TmpFile(ot::string_view content) {
+    char name_buffer[L_tmpnam];
+    if (std::tmpnam(name_buffer) == nullptr) {
+      throw std::runtime_error("Unable to create a temporary file name.");
+    }
+    name_ = name_buffer;
+
+    std::ofstream file(name_buffer);
+    if (!file) {
+      throw std::runtime_error("unable to open temporary file for writing: " + name_);
+    }
+
+    file.write(content.data(), content.size());
+    if (!file) {
+      throw std::runtime_error("unable to write to temporary file named: " + name_);
+    }
+  }
+
+  ~TmpFile() { std::remove(name_.c_str()); }
+
+  const std::string &name() const { return name_; }
+};
+
+}  // namespace
+
 TEST_CASE("tracer factory") {
   TracerFactory<MockTracer> factory;
 
@@ -529,5 +597,88 @@ TEST_CASE("tracer factory") {
       REQUIRE(!tracer->opts.analytics_enabled);
       REQUIRE(std::isnan(tracer->opts.analytics_rate));
     }
+    SECTION("DD_SPAN_SAMPLING_RULES") {
+      const auto value = "[{}]";
+      EnvGuard guard("DD_SPAN_SAMPLING_RULES", value);
+
+      SECTION("overrides default") {
+        const auto input = R"json({
+          "service": "my-service"
+        })json";
+        std::string error;
+        const auto result = factory.MakeTracer(input, error);
+
+        REQUIRE(error == "");
+        REQUIRE(result);
+        REQUIRE(*result);
+        auto &tracer = static_cast<MockTracer &>(**result);
+        REQUIRE(tracer.opts.span_sampling_rules == value);
+      }
+
+      SECTION("overrides config") {
+        const auto input = R"json({
+          "service": "my-service",
+          "span_sampling_rules": [{}, {}]
+        })json";
+        std::string error;
+        const auto result = factory.MakeTracer(input, error);
+
+        REQUIRE(error == "");
+        REQUIRE(result);
+        REQUIRE(*result);
+        auto &tracer = static_cast<MockTracer &>(**result);
+        REQUIRE(tracer.opts.span_sampling_rules == value);
+      }
+    }
+    SECTION("DD_SPAN_SAMPLING_RULES_FILE") {
+      const std::string value = "[{}]";
+      TmpFile file(value);
+      EnvGuard guard("DD_SPAN_SAMPLING_RULES_FILE", file.name().c_str());
+
+      SECTION("overrides default") {
+        const auto input = R"json({
+          "service": "my-service"
+        })json";
+        std::string error;
+        const auto result = factory.MakeTracer(input, error);
+
+        REQUIRE(error == "");
+        REQUIRE(result);
+        REQUIRE(*result);
+        auto &tracer = static_cast<MockTracer &>(**result);
+        REQUIRE(tracer.opts.span_sampling_rules == value);
+      }
+
+      SECTION("overrides config") {
+        const auto input = R"json({
+          "service": "my-service",
+          "span_sampling_rules": [{}, {}]
+        })json";
+        std::string error;
+        const auto result = factory.MakeTracer(input, error);
+
+        REQUIRE(error == "");
+        REQUIRE(result);
+        REQUIRE(*result);
+        auto &tracer = static_cast<MockTracer &>(**result);
+        REQUIRE(tracer.opts.span_sampling_rules == value);
+      }
+
+      SECTION("does not override DD_SPAN_SAMPLING_RULES") {
+        const auto override_value = "[{}, {}, {}]";
+        EnvGuard guard("DD_SPAN_SAMPLING_RULES", override_value);
+        const auto input = R"json({
+          "service": "my-service"
+        })json";
+        std::string error;
+        const auto result = factory.MakeTracer(input, error);
+
+        REQUIRE(error == "");
+        REQUIRE(result);
+        REQUIRE(*result);
+        auto &tracer = static_cast<MockTracer &>(**result);
+        REQUIRE(tracer.opts.span_sampling_rules == override_value);
+      }
+    }
   }
 }