Skip to content

Commit

Permalink
Improvements to the new privacy budget API in Privacy on Beam
Browse files Browse the repository at this point in the history
Privacy on Beam:
* Merge SelectPartitionsParams & PartitionSelectionParams. Both SelectPartitions and private partition selection of other aggregations now use PartitionSelectionParams. SelectPartitionsParams will be deprecated
* Improve documentation of PublicPartitions & PartitionSelectionParams and place them next to each other: clients have to specify one or the other.
* Take as input PartitionSelectionDelta instead of PartitionSelectionParams in DistinctPrivacyID

PiperOrigin-RevId: 560076986
Change-Id: Id773a5ad7c7a0d3196f9afcd19d27b96aa387f8c
GitOrigin-RevId: c75304a91acad3377109c88124c95a32c98a5880
  • Loading branch information
Differential Privacy Team authored and MashaTelyatnikova committed Aug 28, 2023
1 parent 0e7ea88 commit 5e09d51
Show file tree
Hide file tree
Showing 18 changed files with 366 additions and 228 deletions.
14 changes: 12 additions & 2 deletions privacy-on-beam/pbeam/aggregations.go
Original file line number Diff line number Diff line change
Expand Up @@ -1009,11 +1009,21 @@ func checkPartitionSelectionDelta(delta float64, publicPartitions any) error {
return checks.CheckDeltaStrict(delta)
}

// checkMaxPartitionsContributed returns a maxPartitionsContributed parameter
// if it greater than zero, otherwise it fails.
// checkMaxPartitionsContributed returns an error if maxPartitionsContributed parameter of an aggregation
// is smaller than or equal to 0.
func checkMaxPartitionsContributed(maxPartitionsContributed int64) error {
if maxPartitionsContributed <= 0 {
return fmt.Errorf("MaxPartitionsContributed must be set to a positive value, was %d instead", maxPartitionsContributed)
}
return nil
}

// checkMaxPartitionsContributed returns an error if maxPartitionsContributed parameter of a PartitionSelectionParams
// is set to anything other than 0.
func checkMaxPartitionsContributedPartitionSelection(maxPartitionsContributed int64) error {
if maxPartitionsContributed != 0 {
return fmt.Errorf("Separate contribution bounding for partition selection is not supported. "+
"PartitionSelectionParams.MaxPartitionsContributed must be unset (i.e. 0), was %d instead", maxPartitionsContributed)
}
return nil
}
2 changes: 1 addition & 1 deletion privacy-on-beam/pbeam/aggregations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ func TestNewBoundedSumFnTemp(t *testing.T) {
SumParams{
AggregationEpsilon: tc.aggregationEpsilon,
AggregationDelta: tc.aggregationDelta,
PartitionSelectionParams: PartitionSelectionParams{tc.partitionSelectionEpsilon, tc.partitionSelectionDelta},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: tc.partitionSelectionEpsilon, Delta: tc.partitionSelectionDelta},
MaxPartitionsContributed: 17,
MinValue: tc.lower,
MaxValue: tc.upper,
Expand Down
54 changes: 32 additions & 22 deletions privacy-on-beam/pbeam/count.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,33 +46,18 @@ type CountParams struct {
// Uses the new privacy budget API.
AggregationEpsilon, AggregationDelta float64
// Differential privacy budget consumed by partition selection of this
// aggregation. If PublicPartitions are specified, this needs to be left unset.
// aggregation.
//
// If PublicPartitions are specified, this needs to be left unset.
//
// If there is only one aggregation, this can be left unset; in that case
// the entire budget reserved for partition selection in the PrivacySpec
// is consumed.
//
// Uses the new privacy budget API.
PartitionSelectionParams PartitionSelectionParams

This comment has been minimized.

// The maximum number of distinct values that a given privacy identifier
// can influence. If a privacy identifier is associated with more values,
// random values will be dropped. There is an inherent trade-off when
// choosing this parameter: a larger MaxPartitionsContributed leads to less
// data loss due to contribution bounding, but since the noise added in
// aggregations is scaled according to maxPartitionsContributed, it also
// means that more noise is added to each count.
//
// Required.
MaxPartitionsContributed int64
// The maximum number of times that a privacy identifier can contribute to
// a single count (or, equivalently, the maximum value that a privacy
// identifier can add to a single count in total). If MaxValue=10 and a
// privacy identifier is associated with the same value in 15 records, Count
// ignores 5 of these records and only adds 10 to the count for this value.
// There is an inherent trade-off when choosing MaxValue: a larger
// parameter means that fewer records are lost, but a larger noise is added.
//
// Required.
MaxValue int64
// Optional.
PartitionSelectionParams PartitionSelectionParams
// You can input the list of partitions present in the output if you know
// them in advance. When you specify partitions, partition selection /
// thresholding will be disabled and partitions will appear in the output
Expand All @@ -95,9 +80,30 @@ type CountParams struct {
// can fit into memory (e.g., up to a million). Prefer beam.PCollection
// otherwise.
//
// If PartitionSelectionParams are specified, this needs to be left unset.
//
// Optional.
// TODO: Move PublicPartitions to PartitionSelectionParams.
PublicPartitions any
// The maximum number of distinct values that a given privacy identifier
// can influence. If a privacy identifier is associated with more values,
// random values will be dropped. There is an inherent trade-off when
// choosing this parameter: a larger MaxPartitionsContributed leads to less
// data loss due to contribution bounding, but since the noise added in
// aggregations is scaled according to maxPartitionsContributed, it also
// means that more noise is added to each count.
//
// Required.
MaxPartitionsContributed int64
// The maximum number of times that a privacy identifier can contribute to
// a single count (or, equivalently, the maximum value that a privacy
// identifier can add to a single count in total). If MaxValue=10 and a
// privacy identifier is associated with the same value in 15 records, Count
// ignores 5 of these records and only adds 10 to the count for this value.
// There is an inherent trade-off when choosing MaxValue: a larger
// parameter means that fewer records are lost, but a larger noise is added.
//
// Required.
MaxValue int64
}

// Count counts the number of times a value appears in a PrivatePCollection,
Expand Down Expand Up @@ -228,6 +234,10 @@ func checkCountParams(params CountParams, usesNewPrivacyBudgetAPI bool, noiseKin
if err != nil {
return err
}
err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed)
if err != nil {
return err
}
} else {
err = checks.CheckEpsilon(params.Epsilon)
if err != nil {
Expand Down
32 changes: 22 additions & 10 deletions privacy-on-beam/pbeam/count_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ func TestCheckCountParams(t *testing.T) {
desc: "new API, valid parameters w/o public partitions",
params: CountParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{1.0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
Expand All @@ -656,12 +656,24 @@ func TestCheckCountParams(t *testing.T) {
partitionType: nil,
wantErr: false,
},
{
desc: "new API, PartitionSelectionParams.MaxPartitionsContributed set",
params: CountParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5, MaxPartitionsContributed: 1},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
noiseKind: noise.LaplaceNoise,
partitionType: nil,
wantErr: true,
},
{
desc: "new API, valid parameters w/ Gaussian noise w/o public partitions",
params: CountParams{
AggregationEpsilon: 1.0,
AggregationDelta: 1e-5,
PartitionSelectionParams: PartitionSelectionParams{1.0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
Expand All @@ -674,7 +686,7 @@ func TestCheckCountParams(t *testing.T) {
desc: "new API, zero aggregationDelta w/ Gaussian noise w/o public partitions",
params: CountParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{1.0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
Expand All @@ -701,7 +713,7 @@ func TestCheckCountParams(t *testing.T) {
params: CountParams{
AggregationEpsilon: 1.0,
AggregationDelta: 1e-5,
PartitionSelectionParams: PartitionSelectionParams{1.0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
Expand All @@ -714,7 +726,7 @@ func TestCheckCountParams(t *testing.T) {
desc: "new API, negative aggregationEpsilon",
params: CountParams{
AggregationEpsilon: -1.0,

This comment has been minimized.

Copy link
@mmaryambahmani12133
PartitionSelectionParams: PartitionSelectionParams{1.0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
Expand All @@ -727,7 +739,7 @@ func TestCheckCountParams(t *testing.T) {
desc: "new API, negative partitionSelectionEpsilon",
params: CountParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{-1.0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: -1.0, Delta: 1e-5},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
Expand All @@ -740,7 +752,7 @@ func TestCheckCountParams(t *testing.T) {
desc: "new API, zero partitionSelectionDelta w/o public partitions",
params: CountParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{1.0, 0},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
Expand All @@ -753,7 +765,7 @@ func TestCheckCountParams(t *testing.T) {
desc: "new API, zero partitionSelectionEpsilon w/o public partitions",
params: CountParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0, Delta: 1e-5},
MaxPartitionsContributed: 1,
MaxValue: 1,
},
Expand Down Expand Up @@ -818,7 +830,7 @@ func TestCheckCountParams(t *testing.T) {
desc: "new API, unset MaxPartitionsContributed",
params: CountParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{1.0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5},
MaxValue: 1,
},
usesNewPrivacyBudgetAPI: true,
Expand All @@ -830,7 +842,7 @@ func TestCheckCountParams(t *testing.T) {
desc: "new API, negative max value",
params: CountParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{1.0, 1e-5},
PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5},
MaxPartitionsContributed: 1,
MaxValue: -1,
},
Expand Down
35 changes: 20 additions & 15 deletions privacy-on-beam/pbeam/distinct_id.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,24 +56,18 @@ type DistinctPrivacyIDParams struct {
AggregationEpsilon, AggregationDelta float64
// Differential privacy budget consumed by partition selection of this
// aggregation. Note that DistinctPrivacyID doesn't consume epsilon for
// partition selection, so leave epsilon unset.
// partition selection, so you can only specify a delta.
//
// If PublicPartitions are specified, this needs to be left unset.
//
// If there is only one aggregation, this can be left unset; in that case
// the entire budget reserved for partition selection in the PrivacySpec
// is consumed.
//
// Uses the new privacy budget API.
PartitionSelectionParams PartitionSelectionParams
// The maximum number of distinct values that a given privacy identifier
// can influence. If a privacy identifier is associated with more values,
// random values will be dropped. There is an inherent trade-off when
// choosing this parameter: a larger MaxPartitionsContributed leads to less
// data loss due to contribution bounding, but since the noise added in
// aggregations is scaled according to maxPartitionsContributed, it also
// means that more noise is added to each count.
//
// Required.
MaxPartitionsContributed int64
// Optional.
PartitionSelectionDelta float64
// You can input the list of partitions present in the output if you know
// them in advance. When you specify partitions, partition selection /
// thresholding will be disabled and partitions will appear in the output
Expand All @@ -96,9 +90,20 @@ type DistinctPrivacyIDParams struct {
// can fit into memory (e.g., up to a million). Prefer beam.PCollection
// otherwise.
//
// If PartitionSelectionDelta is specified, this needs to be left unset.
//
// Optional.
// TODO: Move PublicPartitions to PartitionSelectionParams.
PublicPartitions any
// The maximum number of distinct values that a given privacy identifier
// can influence. If a privacy identifier is associated with more values,
// random values will be dropped. There is an inherent trade-off when
// choosing this parameter: a larger MaxPartitionsContributed leads to less
// data loss due to contribution bounding, but since the noise added in
// aggregations is scaled according to maxPartitionsContributed, it also
// means that more noise is added to each count.
//
// Required.
MaxPartitionsContributed int64
}

// DistinctPrivacyID counts the number of distinct privacy identifiers
Expand Down Expand Up @@ -137,7 +142,7 @@ func DistinctPrivacyID(s beam.Scope, pcol PrivatePCollection, params DistinctPri
log.Fatalf("Couldn't consume aggregation budget for DistinctPrivacyID: %v", err)
}
if params.PublicPartitions == nil {
_, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.consume(0, params.PartitionSelectionParams.Delta)
_, params.PartitionSelectionDelta, err = spec.partitionSelectionBudget.consume(0, params.PartitionSelectionDelta)
if err != nil {
log.Fatalf("Couldn't consume partition selection budget for DistinctPrivacyID: %v", err)
}
Expand Down Expand Up @@ -237,7 +242,7 @@ func checkDistinctPrivacyIDParams(params DistinctPrivacyIDParams, usesNewPrivacy
if err != nil {
return err
}
err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions)
err = checkPartitionSelectionDelta(params.PartitionSelectionDelta, params.PublicPartitions)
if err != nil {
return err
}
Expand Down Expand Up @@ -305,7 +310,7 @@ func newCountFnTemp(spec PrivacySpec, params DistinctPrivacyIDParams, noiseKind
return &countFn{
Epsilon: params.AggregationEpsilon,
NoiseDelta: params.AggregationDelta,
ThresholdDelta: params.PartitionSelectionParams.Delta,
ThresholdDelta: params.PartitionSelectionDelta,
PreThreshold: spec.preThreshold,
MaxPartitionsContributed: params.MaxPartitionsContributed,
NoiseKind: noiseKind,
Expand Down
16 changes: 8 additions & 8 deletions privacy-on-beam/pbeam/distinct_id_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ func TestNewCountFnTemp(t *testing.T) {
DistinctPrivacyIDParams{
AggregationEpsilon: tc.aggregationEpsilon,
AggregationDelta: tc.aggregationDelta,
PartitionSelectionParams: PartitionSelectionParams{0, tc.partitionSelectionDelta},
PartitionSelectionDelta: tc.partitionSelectionDelta,
MaxPartitionsContributed: 17,
}, tc.noiseKind, false)
if err != nil {
Expand Down Expand Up @@ -943,7 +943,7 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) {
desc: "new API, valid parameters w/o public partitions",
params: DistinctPrivacyIDParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{0, 1e-5},
PartitionSelectionDelta: 1e-5,
MaxPartitionsContributed: 1,
},
usesNewPrivacyBudgetAPI: true,
Expand All @@ -956,7 +956,7 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) {
params: DistinctPrivacyIDParams{
AggregationEpsilon: 1.0,
AggregationDelta: 1e-5,
PartitionSelectionParams: PartitionSelectionParams{0, 1e-5},
PartitionSelectionDelta: 1e-5,
MaxPartitionsContributed: 1,
},
usesNewPrivacyBudgetAPI: true,
Expand All @@ -968,7 +968,7 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) {
desc: "new API, zero aggregationDelta w/ gaussian noise w/o public partitions",
params: DistinctPrivacyIDParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{0, 1e-5},
PartitionSelectionDelta: 1e-5,
MaxPartitionsContributed: 1,
},
usesNewPrivacyBudgetAPI: true,
Expand All @@ -992,7 +992,7 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) {
desc: "new API, negative epsilon",
params: DistinctPrivacyIDParams{
AggregationEpsilon: -1.0,
PartitionSelectionParams: PartitionSelectionParams{0, 1e-5},
PartitionSelectionDelta: 1e-5,
MaxPartitionsContributed: 1,
},
usesNewPrivacyBudgetAPI: true,
Expand All @@ -1015,7 +1015,7 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) {
desc: "new API, non-zero partitionSelectionDelta w/ laplace noise",
params: DistinctPrivacyIDParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{0, 1e-5},
PartitionSelectionDelta: 1e-5,
MaxPartitionsContributed: 1,
PublicPartitions: []int{},
},
Expand All @@ -1027,8 +1027,8 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) {
{
desc: "new API, unset MaxPartitionsContributed",
params: DistinctPrivacyIDParams{
AggregationEpsilon: 1.0,
PartitionSelectionParams: PartitionSelectionParams{0, 1e-5},
AggregationEpsilon: 1.0,
PartitionSelectionDelta: 1e-5,
},
usesNewPrivacyBudgetAPI: true,
noiseKind: noise.LaplaceNoise,
Expand Down
Loading

0 comments on commit 5e09d51

Please sign in to comment.