From 63f922bd09971e9a54aa3efe9ae7c99d2a906295 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Wed, 19 Jul 2023 17:34:18 -0700 Subject: [PATCH 01/10] CI: build base --- .github/workflows/perf.yml | 13 ++++++++++++- Cargo.toml | 5 ----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml index f2ee2a57..d73f8b0c 100644 --- a/.github/workflows/perf.yml +++ b/.github/workflows/perf.yml @@ -17,11 +17,17 @@ jobs: steps: - uses: actions/checkout@v3 - name: Checkout out gh-pages report - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'build_base') uses: actions/checkout@v3 with: ref: gh-pages path: main/_out + - name: Checkout out base branch + if: github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'build_base') + uses: actions/checkout@v3 + with: + ref: ${{ github.base_ref }} + path: main/ - uses: actions-rs/toolchain@v1 with: profile: minimal @@ -63,6 +69,11 @@ jobs: - name: Start dfx run: | dfx start --background + - name: Run perf for base branch + if: github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'build_base') + run: | + cd main + make - name: Run perf run: make - name: Generate table diff --git a/Cargo.toml b/Cargo.toml index 926d64b6..cb32a038 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,11 +14,6 @@ members = [ "pub-sub/rust/subscriber", ] -[profile.release] -panic = "abort" -lto = true -opt-level = 3 - [workspace.dependencies] ic-cdk = "0.10.0" ic-cdk-timers = "0.4.0" From 5d125d33d25379507ac02fc3eae24cf6e06bdbd6 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Wed, 19 Jul 2023 18:27:54 -0700 Subject: [PATCH 02/10] fix --- .github/workflows/perf.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml index d73f8b0c..6b8d0050 100644 --- a/.github/workflows/perf.yml +++ b/.github/workflows/perf.yml @@ -74,6 +74,8 @@ jobs: run: | cd main make + dfx stop + dfx start --clean --background - name: Run perf run: make - name: Generate table From 5feaf9b9f2384067a20cc1ce32645e2f4ed6f67a Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Thu, 20 Jul 2023 15:55:56 -0700 Subject: [PATCH 03/10] change default to -O2 --- Cargo.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index cb32a038..00169588 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,11 @@ members = [ "pub-sub/rust/subscriber", ] +[profile.release] +panic = "abort" +lto = true +opt-level = 2 + [workspace.dependencies] ic-cdk = "0.10.0" ic-cdk-timers = "0.4.0" From df2db62a6ac784a104a94c1baa13750f33834fa1 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Thu, 20 Jul 2023 21:14:33 -0700 Subject: [PATCH 04/10] add confidence interval --- .github/workflows/diff.py | 30 ++++++++++++++++++++++++++++++ .github/workflows/perf.yml | 7 +++++-- collections/perf.sh | 2 +- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/.github/workflows/diff.py b/.github/workflows/diff.py index c77d055a..6fa1742b 100644 --- a/.github/workflows/diff.py +++ b/.github/workflows/diff.py @@ -2,6 +2,7 @@ import pandas as pd import markdown import re +from scipy.stats import t if len(sys.argv) < 3: print("Usage: python diff.py [current.md] [main.md]") @@ -46,6 +47,19 @@ def read_tables(file): print(f"> **Warning**\n> Skip {file}. File not found.\n") sys.exit(0) +def stats(array, ignoreZeros=True): + if ignoreZeros: + array = [x for x in array if x != 0.] + df = pd.DataFrame(array) + mean = df.mean() + std = df.std() + conf_level = 0.9 + t_value = t.ppf(1 - (1 - conf_level) / 2, len(array) - 1) + interval = t_value * std / len(array)**0.5 + l, r = mean - interval, mean + interval + res = f"{mean[0]:.2f} [{l[0]:.2f}, {r[0]:.2f}]" + return res + current = read_tables(sys.argv[1]) main = read_tables(sys.argv[2]) @@ -53,6 +67,11 @@ def read_tables(file): print(f"> **Warning**\n> Skip {sys.argv[1]}, due to the number of tables mismatches from main branch.\n") sys.exit(0) +flaky_benchmarks = ["Heartbeat"] +binary_size = [] +max_mem = [] +cycles = [] + for i, ((header, current), (header2, main)) in enumerate(zip(current, main)): if header == header2 and current.shape == main.shape and all(current.columns == main.columns) and all(current.index == main.index): result = pd.DataFrame(index=current.index, columns=current.columns) @@ -72,8 +91,19 @@ def read_tables(file): result.loc[idx, col] = f"{x:_} ($\\textcolor{{red}}{{{d:.2f}\\\\%}}$)" else: result.loc[idx, col] = f"{x:_}" + if header in flaky_benchmarks: + continue + if col.endswith("binary_size"): + binary_size.append(d) + elif col.endswith("max mem"): + max_mem.append(d) + else: + cycles.append(d) print(result.to_markdown()) print(f"\n") else: print(f"> **Warning**\n> Skip table {i} {header} from {sys.argv[1]}, due to table shape mismatches from main branch.\n") +print(f"## Statistics\n\n") +print(f"binary size: {binary_size}\nmax_mem: {max_mem}\ncycles: {cycles}\n") +print(f"binary size: {stats(binary_size)}\nmax_mem: {stats(max_mem)}\ncycles: {stats(cycles)}\n") diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml index f2ee2a57..d75e6d6b 100644 --- a/.github/workflows/perf.yml +++ b/.github/workflows/perf.yml @@ -14,6 +14,7 @@ jobs: DFX_VERSION: 0.14.3 IC_REPL_VERSION: 0.4.1 MOC_VERSION: 0.9.7 + IC_WASM_VERSION: 0.4.0 steps: - uses: actions/checkout@v3 - name: Checkout out gh-pages report @@ -44,7 +45,7 @@ jobs: if: github.event_name == 'pull_request' run: | python -m pip install --upgrade pip - pip install pandas markdown lxml html5lib bs4 tabulate + pip install pandas markdown lxml html5lib bs4 tabulate scipy - uses: actions/setup-node@v3 with: node-version: 18 @@ -59,7 +60,9 @@ jobs: cd $(dfx cache show) wget https://github.com/dfinity/motoko/releases/download/$MOC_VERSION/motoko-linux64-$MOC_VERSION.tar.gz tar zxvf motoko-linux64-$MOC_VERSION.tar.gz - cargo install --git https://github.com/dfinity/ic-wasm.git + wget https://github.com/dfinity/ic-wasm/releases/download/$IC_WASM_VERSION/ic-wasm-linux64 + cp ./ic-wasm-linux64 /usr/local/bin/ic-wasm + chmod a+x /usr/local/bin/ic-wasm - name: Start dfx run: | dfx start --background diff --git a/collections/perf.sh b/collections/perf.sh index f6fe69f6..8690d717 100644 --- a/collections/perf.sh +++ b/collections/perf.sh @@ -56,7 +56,7 @@ perf(zhenya, "zhenya_hashmap", init_size); perf(btreemap_rs, "btreemap_rs", init_size); perf(hashmap_rs, "hashmap_rs", init_size); -output(file, "\n## Priority queue\n\n| |binary_size|heapify 50k|mem|pop_min 50|put 50|\n|--:|--:|--:|--:|--:|--:|\n"); +output(file, "\n## Priority queue\n\n| |binary_size|heapify 50k|max mem|pop_min 50|put 50|\n|--:|--:|--:|--:|--:|--:|\n"); perf(heap, "heap", init_size); perf(heap_rs, "heap_rs", init_size); From 118485782f0cb900917605439bd254d84e748b32 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Thu, 20 Jul 2023 22:07:35 -0700 Subject: [PATCH 05/10] try --- .github/workflows/diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/diff.py b/.github/workflows/diff.py index 6fa1742b..34ed25ac 100644 --- a/.github/workflows/diff.py +++ b/.github/workflows/diff.py @@ -57,7 +57,7 @@ def stats(array, ignoreZeros=True): t_value = t.ppf(1 - (1 - conf_level) / 2, len(array) - 1) interval = t_value * std / len(array)**0.5 l, r = mean - interval, mean + interval - res = f"{mean[0]:.2f} [{l[0]:.2f}, {r[0]:.2f}]" + res = f"{mean:.2f} [{l:.2f}, {r:.2f}]" return res current = read_tables(sys.argv[1]) From 54beefcc9004fe7ee95ca4fadf9f401c889378d6 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Fri, 21 Jul 2023 08:04:37 -0700 Subject: [PATCH 06/10] fix --- .github/workflows/diff.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/diff.py b/.github/workflows/diff.py index 34ed25ac..b7c84fe1 100644 --- a/.github/workflows/diff.py +++ b/.github/workflows/diff.py @@ -2,6 +2,7 @@ import pandas as pd import markdown import re +import statistics from scipy.stats import t if len(sys.argv) < 3: @@ -50,9 +51,12 @@ def read_tables(file): def stats(array, ignoreZeros=True): if ignoreZeros: array = [x for x in array if x != 0.] - df = pd.DataFrame(array) - mean = df.mean() - std = df.std() + if len(array) == 0: + return f"no change" + elif len(array) == 1: + return f"{array[0]}" + mean = statistics.mean(array) + std = statistics.stdev(array) conf_level = 0.9 t_value = t.ppf(1 - (1 - conf_level) / 2, len(array) - 1) interval = t_value * std / len(array)**0.5 From 348fe0e65276b7fb2617532d163ce528c3620851 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Fri, 21 Jul 2023 08:31:01 -0700 Subject: [PATCH 07/10] fix --- .github/workflows/diff.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/diff.py b/.github/workflows/diff.py index b7c84fe1..1a29aa00 100644 --- a/.github/workflows/diff.py +++ b/.github/workflows/diff.py @@ -61,7 +61,7 @@ def stats(array, ignoreZeros=True): t_value = t.ppf(1 - (1 - conf_level) / 2, len(array) - 1) interval = t_value * std / len(array)**0.5 l, r = mean - interval, mean + interval - res = f"{mean:.2f} [{l:.2f}, {r:.2f}]" + res = f"{mean:.2f}% [{l:.2f}%, {r:.2f}%]" return res current = read_tables(sys.argv[1]) @@ -71,7 +71,7 @@ def stats(array, ignoreZeros=True): print(f"> **Warning**\n> Skip {sys.argv[1]}, due to the number of tables mismatches from main branch.\n") sys.exit(0) -flaky_benchmarks = ["Heartbeat"] +flaky_benchmarks = ["## Heartbeat"] binary_size = [] max_mem = [] cycles = [] @@ -109,5 +109,4 @@ def stats(array, ignoreZeros=True): print(f"> **Warning**\n> Skip table {i} {header} from {sys.argv[1]}, due to table shape mismatches from main branch.\n") print(f"## Statistics\n\n") -print(f"binary size: {binary_size}\nmax_mem: {max_mem}\ncycles: {cycles}\n") -print(f"binary size: {stats(binary_size)}\nmax_mem: {stats(max_mem)}\ncycles: {stats(cycles)}\n") +print(f"* binary size: {stats(binary_size)}\n* max_mem: {stats(max_mem)}\n* cycles: {stats(cycles)}\n") From 7119cd5df82468c684220631ca9ebd666b327e96 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Fri, 21 Jul 2023 09:40:34 -0700 Subject: [PATCH 08/10] fix --- .github/workflows/diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/diff.py b/.github/workflows/diff.py index 1a29aa00..ef17a17a 100644 --- a/.github/workflows/diff.py +++ b/.github/workflows/diff.py @@ -54,7 +54,7 @@ def stats(array, ignoreZeros=True): if len(array) == 0: return f"no change" elif len(array) == 1: - return f"{array[0]}" + return f"{array[0]:.2f}%" mean = statistics.mean(array) std = statistics.stdev(array) conf_level = 0.9 From 9a10b3f91db8255d08ae41b314deefc78ff3b793 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Fri, 21 Jul 2023 13:32:47 -0700 Subject: [PATCH 09/10] persist perf data --- .github/workflows/diff.py | 41 ++++++++++++++++---------------------- .github/workflows/perf.yml | 1 + .github/workflows/stats.py | 41 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/stats.py diff --git a/.github/workflows/diff.py b/.github/workflows/diff.py index ef17a17a..f00083f3 100644 --- a/.github/workflows/diff.py +++ b/.github/workflows/diff.py @@ -1,9 +1,17 @@ +import stats import sys import pandas as pd import markdown import re -import statistics -from scipy.stats import t + +print(sys.argv) + +if len(sys.argv) == 2 and sys.argv[1] == "final": + stats.load_stats() + print("# Overall Statistics\n") + print(f"{stats.data['binary_size']}") + stats.output_stats() + sys.exit(0) if len(sys.argv) < 3: print("Usage: python diff.py [current.md] [main.md]") @@ -48,21 +56,7 @@ def read_tables(file): print(f"> **Warning**\n> Skip {file}. File not found.\n") sys.exit(0) -def stats(array, ignoreZeros=True): - if ignoreZeros: - array = [x for x in array if x != 0.] - if len(array) == 0: - return f"no change" - elif len(array) == 1: - return f"{array[0]:.2f}%" - mean = statistics.mean(array) - std = statistics.stdev(array) - conf_level = 0.9 - t_value = t.ppf(1 - (1 - conf_level) / 2, len(array) - 1) - interval = t_value * std / len(array)**0.5 - l, r = mean - interval, mean + interval - res = f"{mean:.2f}% [{l:.2f}%, {r:.2f}%]" - return res + current = read_tables(sys.argv[1]) main = read_tables(sys.argv[2]) @@ -72,9 +66,6 @@ def stats(array, ignoreZeros=True): sys.exit(0) flaky_benchmarks = ["## Heartbeat"] -binary_size = [] -max_mem = [] -cycles = [] for i, ((header, current), (header2, main)) in enumerate(zip(current, main)): if header == header2 and current.shape == main.shape and all(current.columns == main.columns) and all(current.index == main.index): @@ -98,15 +89,17 @@ def stats(array, ignoreZeros=True): if header in flaky_benchmarks: continue if col.endswith("binary_size"): - binary_size.append(d) + stats.data["binary_size"].append(d) elif col.endswith("max mem"): - max_mem.append(d) + stats.data["max_mem"].append(d) else: - cycles.append(d) + stats.data["cycles"].append(d) print(result.to_markdown()) print(f"\n") else: print(f"> **Warning**\n> Skip table {i} {header} from {sys.argv[1]}, due to table shape mismatches from main branch.\n") print(f"## Statistics\n\n") -print(f"* binary size: {stats(binary_size)}\n* max_mem: {stats(max_mem)}\n* cycles: {stats(cycles)}\n") +stats.output_stats() + +stats.save_stats() diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml index 5c60f820..b2ddf3c4 100644 --- a/.github/workflows/perf.yml +++ b/.github/workflows/perf.yml @@ -119,6 +119,7 @@ jobs: fi fi done + python .github/workflows/diff.py final >> DIFF.md - name: Read table if: github.event_name == 'pull_request' id: perf diff --git a/.github/workflows/stats.py b/.github/workflows/stats.py new file mode 100644 index 00000000..3674da46 --- /dev/null +++ b/.github/workflows/stats.py @@ -0,0 +1,41 @@ +import statistics +import pickle +from scipy.stats import t + +data = {'binary_size': [], 'max_mem': [], 'cycles': []} +file = "stats.pk" + +def stats(array, ignoreZeros=True): + if ignoreZeros: + array = [x for x in array if x != 0.] + if len(array) == 0: + return f"no change" + elif len(array) == 1: + return f"{array[0]:.2f}%" + mean = statistics.mean(array) + std = statistics.stdev(array) + conf_level = 0.9 + t_value = t.ppf(1 - (1 - conf_level) / 2, len(array) - 1) + interval = t_value * std / len(array)**0.5 + l, r = mean - interval, mean + interval + res = f"{mean:.2f}% [{l:.2f}%, {r:.2f}%]" + return res + +def output_stats(): + for name, array in data.items(): + print(f"* {name}: {stats(array)}") + +def save_stats(): + with open(file, 'ab') as fp: + pickle.dump(data, fp) + +def load_stats(): + with open(file, 'rb') as fr: + try: + while True: + d = pickle.load(fr) + for name, array in d.items(): + data[name].extend(array) + except EOFError: + pass + From 3988b9f0300ab4c45a16086f5bb6cef42df134aa Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Fri, 21 Jul 2023 13:55:22 -0700 Subject: [PATCH 10/10] fix --- .github/workflows/diff.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/diff.py b/.github/workflows/diff.py index f00083f3..f021d577 100644 --- a/.github/workflows/diff.py +++ b/.github/workflows/diff.py @@ -4,12 +4,9 @@ import markdown import re -print(sys.argv) - if len(sys.argv) == 2 and sys.argv[1] == "final": stats.load_stats() print("# Overall Statistics\n") - print(f"{stats.data['binary_size']}") stats.output_stats() sys.exit(0)