lint
diff --git a/benchmarks/apple_arm_events.h b/benchmarks/apple_arm_events.h
index 3a94081..cd63176 100644
--- a/benchmarks/apple_arm_events.h
+++ b/benchmarks/apple_arm_events.h
@@ -101,8 +101,6 @@
                               a.instructions - b.instructions);
 }
 
-
-
 typedef float f32;
 typedef double f64;
 typedef int8_t i8;
@@ -616,9 +614,7 @@
 
 #define lib_nelems(x) (sizeof(x) / sizeof((x)[0]))
 #define lib_symbol_def(name)                                                   \
-  {                                                                            \
-#name, (void **)&name                                                      \
-  }
+  { #name, (void **)&name }
 
 static const lib_symbol lib_symbols_kperf[] = {
     lib_symbol_def(kpc_pmu_version),
@@ -933,7 +929,7 @@
 static const event_alias profile_events[] = {
     {"cycles",
      {
-         "FIXED_CYCLES", // Apple A7-A15//CORE_ACTIVE_CYCLE
+         "FIXED_CYCLES",            // Apple A7-A15//CORE_ACTIVE_CYCLE
          "CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th
          "CPU_CLK_UNHALTED.CORE",   // Intel Yonah, Merom
      }},
@@ -976,7 +972,6 @@
 u64 counters_1[KPC_MAX_COUNTERS] = {0};
 const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]);
 
-
 bool setup_performance_counters() {
   static bool init = false;
   static bool worked = false;
@@ -995,7 +990,7 @@
   // check permission
   int force_ctrs = 0;
   if (kpc_force_all_ctrs_get(&force_ctrs)) {
-    //printf("Permission denied, xnu/kpc requires root privileges.\n");
+    // printf("Permission denied, xnu/kpc requires root privileges.\n");
     return (worked = false);
   }
   int ret;
@@ -1101,17 +1096,16 @@
     }
     return 1;
   }
- /*printf("counters value:\n");
-    for (usize i = 0; i < ev_count; i++) {
-        const event_alias *alias = profile_events + i;
-        usize idx = counter_map[i];
-        u64 val = counters_1[idx] - counters_0[idx];
-        printf("%14s: %llu\n", alias->alias, val);
-    }*/
+  /*printf("counters value:\n");
+     for (usize i = 0; i < ev_count; i++) {
+         const event_alias *alias = profile_events + i;
+         usize idx = counter_map[i];
+         u64 val = counters_1[idx] - counters_0[idx];
+         printf("%14s: %llu\n", alias->alias, val);
+     }*/
   return performance_counters{
       counters_0[counter_map[0]], counters_0[counter_map[2]],
-      counters_0[counter_map[3]],
-      counters_0[counter_map[1]]};
+      counters_0[counter_map[3]], counters_0[counter_map[1]]};
 }
 
 #endif
diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp
index c6b091f..993411e 100644
--- a/benchmarks/benchmark.cpp
+++ b/benchmarks/benchmark.cpp
@@ -1,4 +1,4 @@
-#if defined(__linux__) || (__APPLE__ &&  __aarch64__)
+#if defined(__linux__) || (__APPLE__ && __aarch64__)
 #define USING_COUNTERS
 #include "event_counter.h"
 #endif
@@ -22,7 +22,6 @@
 #include <vector>
 #include <locale.h>
 
-
 template <typename CharT>
 double findmax_fastfloat64(std::vector<std::basic_string<CharT>> &s) {
   double answer = 0;
@@ -55,8 +54,9 @@
 
 #ifdef USING_COUNTERS
 template <class T, class CharT>
-std::vector<event_count> time_it_ns(std::vector<std::basic_string<CharT>> &lines,
-                                     T const &function, size_t repeat) {
+std::vector<event_count>
+time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
+           size_t repeat) {
   std::vector<event_count> aggregate;
   bool printed_bug = false;
   for (size_t i = 0; i < repeat; i++) {
@@ -71,7 +71,8 @@
   return aggregate;
 }
 
-void pretty_print(double volume, size_t number_of_floats, std::string name, std::vector<event_count> events) {
+void pretty_print(double volume, size_t number_of_floats, std::string name,
+                  std::vector<event_count> events) {
   double volumeMB = volume / (1024. * 1024.);
   double average_ns{0};
   double min_ns{DBL_MAX};
@@ -83,7 +84,7 @@
   double branches_avg{0};
   double branch_misses_min{0};
   double branch_misses_avg{0};
-  for(event_count e : events) {
+  for (event_count e : events) {
     double ns = e.elapsed_ns();
     average_ns += ns;
     min_ns = min_ns < ns ? min_ns : ns;
@@ -94,7 +95,8 @@
 
     double instructions = e.instructions();
     instructions_avg += instructions;
-    instructions_min = instructions_min < instructions ? instructions_min : instructions;
+    instructions_min =
+        instructions_min < instructions ? instructions_min : instructions;
 
     double branches = e.branches();
     branches_avg += branches;
@@ -102,43 +104,37 @@
 
     double branch_misses = e.missed_branches();
     branch_misses_avg += branch_misses;
-    branch_misses_min = branch_misses_min < branch_misses ? branch_misses_min : branch_misses;
+    branch_misses_min =
+        branch_misses_min < branch_misses ? branch_misses_min : branch_misses;
   }
   cycles_avg /= events.size();
   instructions_avg /= events.size();
   average_ns /= events.size();
   branches_avg /= events.size();
   printf("%-40s: %8.2f MB/s (+/- %.1f %%) ", name.data(),
-           volumeMB * 1000000000 / min_ns,
-           (average_ns - min_ns) * 100.0 / average_ns);
-  printf("%8.2f Mfloat/s  ", 
-           number_of_floats * 1000 / min_ns);
-  if(instructions_min > 0) {
-    printf(" %8.2f i/B %8.2f i/f (+/- %.1f %%) ", 
-           instructions_min / volume,
-           instructions_min / number_of_floats, 
+         volumeMB * 1000000000 / min_ns,
+         (average_ns - min_ns) * 100.0 / average_ns);
+  printf("%8.2f Mfloat/s  ", number_of_floats * 1000 / min_ns);
+  if (instructions_min > 0) {
+    printf(" %8.2f i/B %8.2f i/f (+/- %.1f %%) ", instructions_min / volume,
+           instructions_min / number_of_floats,
            (instructions_avg - instructions_min) * 100.0 / instructions_avg);
 
-    printf(" %8.2f c/B %8.2f c/f (+/- %.1f %%) ", 
-           cycles_min / volume,
-           cycles_min / number_of_floats, 
+    printf(" %8.2f c/B %8.2f c/f (+/- %.1f %%) ", cycles_min / volume,
+           cycles_min / number_of_floats,
            (cycles_avg - cycles_min) * 100.0 / cycles_avg);
-    printf(" %8.2f i/c ", 
-           instructions_min /cycles_min);
-    printf(" %8.2f b/f ",
-           branches_avg /number_of_floats);
-    printf(" %8.2f bm/f ",
-           branch_misses_avg /number_of_floats);
-    printf(" %8.2f GHz ", 
-           cycles_min / min_ns);
+    printf(" %8.2f i/c ", instructions_min / cycles_min);
+    printf(" %8.2f b/f ", branches_avg / number_of_floats);
+    printf(" %8.2f bm/f ", branch_misses_avg / number_of_floats);
+    printf(" %8.2f GHz ", cycles_min / min_ns);
   }
   printf("\n");
-
 }
 #else
 template <class T, class CharT>
-std::pair<double, double> time_it_ns(std::vector<std::basic_string<CharT>> &lines,
-                                     T const &function, size_t repeat) {
+std::pair<double, double>
+time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
+           size_t repeat) {
   std::chrono::high_resolution_clock::time_point t1, t2;
   double average = 0;
   double min_value = DBL_MAX;
@@ -160,21 +156,16 @@
   return std::make_pair(min_value, average);
 }
 
-
-
-
-void pretty_print(double volume, size_t number_of_floats, std::string name, std::pair<double,double> result) {
+void pretty_print(double volume, size_t number_of_floats, std::string name,
+                  std::pair<double, double> result) {
   double volumeMB = volume / (1024. * 1024.);
   printf("%-40s: %8.2f MB/s (+/- %.1f %%) ", name.data(),
-           volumeMB * 1000000000 / result.first,
-           (result.second - result.first) * 100.0 / result.second);
-  printf("%8.2f Mfloat/s  ", 
-           number_of_floats * 1000 / result.first);
-  printf(" %8.2f ns/f \n", 
-           double(result.first) /number_of_floats );
+         volumeMB * 1000000000 / result.first,
+         (result.second - result.first) * 100.0 / result.second);
+  printf("%8.2f Mfloat/s  ", number_of_floats * 1000 / result.first);
+  printf(" %8.2f ns/f \n", double(result.first) / number_of_floats);
 }
-#endif 
-
+#endif
 
 // this is okay, all chars are ASCII
 inline std::u16string widen(std::string line) {
@@ -195,21 +186,23 @@
   return u16lines;
 }
 
-
 void process(std::vector<std::string> &lines, size_t volume) {
   size_t repeat = 100;
   double volumeMB = volume / (1024. * 1024.);
   std::cout << "ASCII volume = " << volumeMB << " MB " << std::endl;
-  pretty_print(volume, lines.size(), "fastfloat (64)", time_it_ns(lines, findmax_fastfloat64<char>, repeat));
-  pretty_print(volume, lines.size(), "fastfloat (32)", time_it_ns(lines, findmax_fastfloat32<char>, repeat));
+  pretty_print(volume, lines.size(), "fastfloat (64)",
+               time_it_ns(lines, findmax_fastfloat64<char>, repeat));
+  pretty_print(volume, lines.size(), "fastfloat (32)",
+               time_it_ns(lines, findmax_fastfloat32<char>, repeat));
 
   std::vector<std::u16string> lines16 = widen(lines);
   volume = 2 * volume;
   volumeMB = volume / (1024. * 1024.);
   std::cout << "UTF-16 volume = " << volumeMB << " MB " << std::endl;
-  pretty_print(volume, lines.size(), "fastfloat (64)", time_it_ns(lines16, findmax_fastfloat64<char16_t>, repeat));
-  pretty_print(volume, lines.size(), "fastfloat (32)", time_it_ns(lines16, findmax_fastfloat32<char16_t>, repeat));
-
+  pretty_print(volume, lines.size(), "fastfloat (64)",
+               time_it_ns(lines16, findmax_fastfloat64<char16_t>, repeat));
+  pretty_print(volume, lines.size(), "fastfloat (32)",
+               time_it_ns(lines16, findmax_fastfloat32<char16_t>, repeat));
 }
 
 void fileload(std::string filename) {
@@ -233,13 +226,14 @@
   process(lines, volume);
 }
 
-
 int main(int argc, char **argv) {
-  if(collector.has_events()) {
+  if (collector.has_events()) {
     std::cout << "# Using hardware counters" << std::endl;
   } else {
-#if defined(__linux__) || (__APPLE__ &&  __aarch64__)
-    std::cout << "# Hardware counters not available, try to run in privileged mode (e.g., sudo)." << std::endl;
+#if defined(__linux__) || (__APPLE__ && __aarch64__)
+    std::cout << "# Hardware counters not available, try to run in privileged "
+                 "mode (e.g., sudo)."
+              << std::endl;
 #endif
   }
   fileload(std::string(BENCHMARK_DATA_DIR) + "/canada.txt");
diff --git a/benchmarks/event_counter.h b/benchmarks/event_counter.h
index fb6db3a..3b7bb69 100644
--- a/benchmarks/event_counter.h
+++ b/benchmarks/event_counter.h
@@ -17,16 +17,19 @@
 #include <libgen.h>
 #endif
 
-#if __APPLE__ &&  __aarch64__
+#if __APPLE__ && __aarch64__
 #include "apple_arm_events.h"
 #endif
 
 struct event_count {
   std::chrono::duration<double> elapsed;
   std::vector<unsigned long long> event_counts;
-  event_count() : elapsed(0), event_counts{0,0,0,0,0} {}
-  event_count(const std::chrono::duration<double> _elapsed, const std::vector<unsigned long long> _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {}
-  event_count(const event_count& other): elapsed(other.elapsed), event_counts(other.event_counts) { }
+  event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {}
+  event_count(const std::chrono::duration<double> _elapsed,
+              const std::vector<unsigned long long> _event_counts)
+      : elapsed(_elapsed), event_counts(_event_counts) {}
+  event_count(const event_count &other)
+      : elapsed(other.elapsed), event_counts(other.event_counts) {}
 
   // The types of counters (so we can read the getter more easily)
   enum event_counter_types {
@@ -36,31 +39,42 @@
     MISSED_BRANCHES = 3
   };
 
-  double elapsed_sec() const { return std::chrono::duration<double>(elapsed).count(); }
-  double elapsed_ns() const { return std::chrono::duration<double, std::nano>(elapsed).count(); }
-  double cycles() const { return static_cast<double>(event_counts[CPU_CYCLES]); }
-  double instructions() const { return static_cast<double>(event_counts[INSTRUCTIONS]); }
-  double branches() const { return static_cast<double>(event_counts[BRANCHES]); }
-  double missed_branches() const { return static_cast<double>(event_counts[MISSED_BRANCHES]); }
+  double elapsed_sec() const {
+    return std::chrono::duration<double>(elapsed).count();
+  }
+  double elapsed_ns() const {
+    return std::chrono::duration<double, std::nano>(elapsed).count();
+  }
+  double cycles() const {
+    return static_cast<double>(event_counts[CPU_CYCLES]);
+  }
+  double instructions() const {
+    return static_cast<double>(event_counts[INSTRUCTIONS]);
+  }
+  double branches() const {
+    return static_cast<double>(event_counts[BRANCHES]);
+  }
+  double missed_branches() const {
+    return static_cast<double>(event_counts[MISSED_BRANCHES]);
+  }
 
-  event_count& operator=(const event_count& other) {
+  event_count &operator=(const event_count &other) {
     this->elapsed = other.elapsed;
     this->event_counts = other.event_counts;
     return *this;
   }
-  event_count operator+(const event_count& other) const {
-    return event_count(elapsed+other.elapsed, {
-      event_counts[0]+other.event_counts[0],
-      event_counts[1]+other.event_counts[1],
-      event_counts[2]+other.event_counts[2],
-      event_counts[3]+other.event_counts[3],
-      event_counts[4]+other.event_counts[4],
-    });
+  event_count operator+(const event_count &other) const {
+    return event_count(elapsed + other.elapsed,
+                       {
+                           event_counts[0] + other.event_counts[0],
+                           event_counts[1] + other.event_counts[1],
+                           event_counts[2] + other.event_counts[2],
+                           event_counts[3] + other.event_counts[3],
+                           event_counts[4] + other.event_counts[4],
+                       });
   }
 
-  void operator+=(const event_count& other) {
-    *this = *this + other;
-  }
+  void operator+=(const event_count &other) { *this = *this + other; }
 };
 
 struct event_aggregate {
@@ -72,7 +86,7 @@
 
   event_aggregate() = default;
 
-  void operator<<(const event_count& other) {
+  void operator<<(const event_count &other) {
     if (iterations == 0 || other.elapsed < best.elapsed) {
       best = other;
     }
@@ -88,53 +102,48 @@
   double cycles() const { return total.cycles() / iterations; }
   double instructions() const { return total.instructions() / iterations; }
   double branches() const { return total.branches() / iterations; }
-  double missed_branches() const { return total.missed_branches() / iterations; }
+  double missed_branches() const {
+    return total.missed_branches() / iterations;
+  }
 };
 
 struct event_collector {
   event_count count{};
   std::chrono::time_point<std::chrono::steady_clock> start_clock{};
 
-#if defined(__linux__) 
+#if defined(__linux__)
   LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
-  event_collector() : linux_events(std::vector<int>{
-    PERF_COUNT_HW_CPU_CYCLES,
-    PERF_COUNT_HW_INSTRUCTIONS,
-    PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions
-    PERF_COUNT_HW_BRANCH_MISSES
-  }) {}
-  bool has_events() {
-    return linux_events.is_working();
-  }
-#elif __APPLE__ &&  __aarch64__
+  event_collector()
+      : linux_events(std::vector<int>{
+            PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS,
+            PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions
+            PERF_COUNT_HW_BRANCH_MISSES}) {}
+  bool has_events() { return linux_events.is_working(); }
+#elif __APPLE__ && __aarch64__
   performance_counters diff;
-  event_collector() : diff(0) {
-    setup_performance_counters();
-  }
-  bool has_events() {
-    return setup_performance_counters();
-  }
+  event_collector() : diff(0) { setup_performance_counters(); }
+  bool has_events() { return setup_performance_counters(); }
 #else
   event_collector() {}
-  bool has_events() {
-    return false;
-  }
+  bool has_events() { return false; }
 #endif
 
   inline void start() {
 #if defined(__linux)
     linux_events.start();
-#elif __APPLE__ &&  __aarch64__
-    if(has_events()) { diff = get_counters(); }
+#elif __APPLE__ && __aarch64__
+    if (has_events()) {
+      diff = get_counters();
+    }
 #endif
     start_clock = std::chrono::steady_clock::now();
   }
-  inline event_count& end() {
+  inline event_count &end() {
     const auto end_clock = std::chrono::steady_clock::now();
 #if defined(__linux)
     linux_events.end(count.event_counts);
-#elif __APPLE__ &&  __aarch64__
-    if(has_events()) {
+#elif __APPLE__ && __aarch64__
+    if (has_events()) {
       performance_counters end = get_counters();
       diff = end - diff;
     }
diff --git a/benchmarks/linux-perf-events.h b/benchmarks/linux-perf-events.h
index 73cfbaf..0a9e553 100644
--- a/benchmarks/linux-perf-events.h
+++ b/benchmarks/linux-perf-events.h
@@ -42,7 +42,8 @@
     uint32_t i = 0;
     for (auto config : config_vec) {
       attribs.config = config;
-      int _fd = static_cast<int>(syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags));
+      int _fd = static_cast<int>(
+          syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags));
       if (_fd == -1) {
         report_error("perf_event_open");
       }
@@ -56,7 +57,11 @@
     temp_result_vec.resize(num_events * 2 + 1);
   }
 
-  ~LinuxEvents() { if (fd != -1) { close(fd); } }
+  ~LinuxEvents() {
+    if (fd != -1) {
+      close(fd);
+    }
+  }
 
   inline void start() {
     if (fd != -1) {
@@ -85,19 +90,15 @@
       results[i / 2] = temp_result_vec[i];
     }
     for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) {
-      if(ids[i/2-1] != temp_result_vec[i]) {
+      if (ids[i / 2 - 1] != temp_result_vec[i]) {
         report_error("event mismatch");
       }
     }
   }
 
-  bool is_working() {
-    return working;
-  }
+  bool is_working() { return working; }
 
 private:
-  void report_error(const std::string &) {
-    working = false;
-  }
+  void report_error(const std::string &) { working = false; }
 };
 #endif
\ No newline at end of file