Feature Extraction Benchmarks¶
This walkthrough serves as a benchmark for comparing functime
with tsfresh
feature extraction functions. We begin the analysis by evaluating the speed of feature extraction across time series of three different sizes: 100K, 1M, and 9M. Next, we assess the speed in a groupby and aggregation context, making a performance comparison between functime with polars and tsfresh using pandas.
%%capture
%pip install perfplot
%pip install pandas
%pip install tsfresh
%pip install functime
from typing import Callable
import pandas as pd
import perfplot
import polars as pl
from tsfresh.feature_extraction import feature_calculators as tsfresh
from functime import feature_extractors as fe
pl.Config.set_tbl_rows(100)
pl.Config.set_fmt_str_lengths(60)
pl.Config.set_tbl_hide_column_data_types(True)
polars.config.Config
1. Setup for the comparison¶
We are using the M4 dataset. We create a pd.DataFrame
and pl.DataFrame
and we define a list of dictionnary with the following structure:
(
<functime_function>
,
<tsfresh_function>
,
<functime_parameters>
,
<tsfresh_parameters>
)
_M4_DATASET = "../../data/m4_1d_train.parquet"
DF_PANDAS = (
pd.melt(pd.read_parquet(_M4_DATASET))
.drop(columns=["variable"])
.dropna()
.reset_index(drop=True)
)
DF_PL_EAGER = (
pl.read_parquet(_M4_DATASET).drop("V1").melt().drop("variable").drop_nulls()
)
FUNC_PARAMS_BENCH = [
(fe.absolute_energy, tsfresh.abs_energy, {}, {}),
(fe.absolute_maximum, tsfresh.absolute_maximum, {}, {}),
(fe.absolute_sum_of_changes, tsfresh.absolute_sum_of_changes, {}, {}),
(fe.lempel_ziv_complexity, tsfresh.lempel_ziv_complexity, {"threshold": (pl.col("value").max() - pl.col("value").min()) / 2}, {"bins": 2}),
(
fe.approximate_entropy,
tsfresh.approximate_entropy,
{"run_length": 2, "filtering_level": 0.5},
{"m": 2, "r": 0.5},
),
# (fe.augmented_dickey_fuller, tsfresh.augmented_dickey_fuller, "param")
(fe.autocorrelation, tsfresh.autocorrelation, {"n_lags": 4}, {"lag": 4}),
(
fe.autoregressive_coefficients,
tsfresh.ar_coefficient,
{"n_lags": 4},
{"param": [{"coeff": i, "k": 4}] for i in range(5)},
),
(fe.benford_correlation, tsfresh.benford_correlation, {}, {}),
(fe.binned_entropy, tsfresh.binned_entropy, {"bin_count": 10}, {"max_bins": 10}),
(fe.c3, tsfresh.c3, {"n_lags": 10}, {"lag": 10}),
(
fe.change_quantiles,
tsfresh.change_quantiles,
{"q_low": 0.1, "q_high": 0.9, "is_abs": True},
{"ql": 0.1, "qh": 0.9, "isabs": True, "f_agg": "mean"},
),
(fe.cid_ce, tsfresh.cid_ce, {"normalize": True}, {"normalize": True}),
(fe.count_above, tsfresh.count_above, {"threshold": 0.0}, {"t": 0.0}),
(fe.count_above_mean, tsfresh.count_above_mean, {}, {}),
(fe.count_below, tsfresh.count_below, {"threshold": 0.0}, {"t": 0.0}),
(fe.count_below_mean, tsfresh.count_below_mean, {}, {}),
# (fe.cwt_coefficients, tsfresh.cwt_coefficients, {"widths": (1, 2, 3), "n_coefficients": 2},{"param": {"widths": (1, 2, 3), "coeff": 2, "w": 1}}),
(
fe.energy_ratios,
tsfresh.energy_ratio_by_chunks,
{"n_chunks": 6},
{"param": [{"num_segments": 6, "segment_focus": i} for i in range(6)]},
),
(fe.first_location_of_maximum, tsfresh.first_location_of_maximum, {}, {}),
(fe.first_location_of_minimum, tsfresh.first_location_of_minimum, {}, {}),
# (fe.fourier_entropy, tsfresh.fourier_entropy, {"n_bins": 10}, {"bins": 10}),
# (fe.friedrich_coefficients, tsfresh.friedrich_coefficients, {"polynomial_order": 3, "n_quantiles": 30}, {"params": [{"m": 3, "r": 30}]}),
(fe.has_duplicate, tsfresh.has_duplicate, {}, {}),
(fe.has_duplicate_max, tsfresh.has_duplicate_max, {}, {}),
(fe.has_duplicate_min, tsfresh.has_duplicate_min, {}, {}),
(
fe.index_mass_quantile,
tsfresh.index_mass_quantile,
{"q": 0.5},
{"param": [{"q": 0.5}]},
),
(
fe.large_standard_deviation,
tsfresh.large_standard_deviation,
{"ratio": 0.25},
{"r": 0.25},
),
(fe.last_location_of_maximum, tsfresh.last_location_of_maximum, {}, {}),
(fe.last_location_of_minimum, tsfresh.last_location_of_minimum, {}, {}),
# (fe.lempel_ziv_complexity, tsfresh.lempel_ziv_complexity, {"n_bins": 5}, {"bins": 5}),
(
fe.linear_trend,
tsfresh.linear_trend,
{},
{
"param": [
{"attr": "pvalue"},
{"attr": "rvalue"},
{"attr": "intercept"},
{"attr": "slope"},
{"attr": "stderr"},
]
},
),
(fe.longest_streak_above_mean, tsfresh.longest_strike_above_mean, {}, {}),
(fe.longest_streak_below_mean, tsfresh.longest_strike_below_mean, {}, {}),
(fe.mean_abs_change, tsfresh.mean_abs_change, {}, {}),
(fe.mean_change, tsfresh.mean_change, {}, {}),
(
fe.mean_n_absolute_max,
tsfresh.mean_n_absolute_max,
{"n_maxima": 20},
{"number_of_maxima": 20},
),
(
fe.mean_second_derivative_central,
tsfresh.mean_second_derivative_central,
{},
{},
),
(
fe.number_crossings,
tsfresh.number_crossing_m,
{"crossing_value": 0.0},
{"m": 0.0},
),
(fe.number_cwt_peaks, tsfresh.number_cwt_peaks, {"max_width": 5}, {"n": 5}),
(fe.number_peaks, tsfresh.number_peaks, {"support": 5}, {"n": 5}),
# (fe.partial_autocorrelation, tsfresh.partial_autocorrelation, "param"),
(
fe.percent_reoccurring_values,
tsfresh.percentage_of_reoccurring_values_to_all_values,
{},
{},
),
(
fe.percent_reoccurring_points,
tsfresh.percentage_of_reoccurring_datapoints_to_all_datapoints,
{},
{},
),
(
fe.permutation_entropy,
tsfresh.permutation_entropy,
{"tau": 1, "n_dims": 3},
{"tau": 1, "dimension": 3},
),
(
fe.range_count,
tsfresh.range_count,
{"lower": 0, "upper": 9, "closed": "none"},
{"min": 0, "max": 9},
),
(fe.ratio_beyond_r_sigma, tsfresh.ratio_beyond_r_sigma, {"ratio": 2}, {"r": 2}),
(
fe.ratio_n_unique_to_length,
tsfresh.ratio_value_number_to_time_series_length,
{},
{},
),
(fe.root_mean_square, tsfresh.root_mean_square, {}, {}),
(fe.sample_entropy, tsfresh.sample_entropy, {}, {}),
(
fe.spkt_welch_density,
tsfresh.spkt_welch_density,
{"n_coeffs": 10},
{"param": [{"coeff": i} for i in range(10)]},
),
(fe.sum_reoccurring_points, tsfresh.sum_of_reoccurring_data_points, {}, {}),
(fe.sum_reoccurring_values, tsfresh.sum_of_reoccurring_values, {}, {}),
(
fe.symmetry_looking,
tsfresh.symmetry_looking,
{"ratio": 0.25},
{"param": [{"r": 0.25}]},
),
(
fe.time_reversal_asymmetry_statistic,
tsfresh.time_reversal_asymmetry_statistic,
{"n_lags": 3},
{"lag": 3},
),
(fe.variation_coefficient, tsfresh.variation_coefficient, {}, {}),
(fe.var_gt_std, tsfresh.variance_larger_than_standard_deviation, {}, {}),
]
2 Benchmark core functions¶
Benchmark core function for time series' length of 100_000, 1_000_000 and 9_000_000. (Except 10_000 for approximate_entropy
and 10_000/100_000 for number_cwt_peaks
and sample_entropy
). all_benchmarks()
iterates through the elements in the FUNC_PARAMS_BENCH
list and invoke benchmark()
for each function.
def benchmark(
f_feat: Callable, ts_feat: Callable, f_params: dict, ts_params: dict, is_expr: bool
):
if f_feat.__name__ == "approximate_entropy":
n_range = [10_000]
elif f_feat.__name__ in ("number_cwt_peaks", "sample_entropy", "lempel_ziv_complexity"):
n_range = [10_000, 100_000]
else:
n_range = [10_000, 100_000, 1_000_000, 9_000_000]
benchmark = perfplot.bench(
setup=lambda n: (DF_PL_EAGER.head(n), DF_PANDAS.head(n)),
kernels=[
lambda x, _y: f_feat(x["value"], **f_params)
if not is_expr
else x.select(f_feat(pl.col("value"), **f_params)),
lambda _x, y: ts_feat(y["value"], **ts_params),
],
n_range=n_range,
equality_check=False,
labels=["functime", "tsfresh"],
)
return benchmark
def all_benchmarks(params: list[tuple], is_expr: bool) -> list:
bench_df = pl.DataFrame(
schema={
"Feature name": pl.Utf8,
"n": pl.Int64,
"functime (ms)": pl.Float64,
"tfresh (ms)": pl.Float64,
"diff (ms)": pl.Float64,
"diff %": pl.Float64,
"speedup": pl.Float64,
}
)
for x in params:
try:
f_feat = x[0]
print(f"Feature: {f_feat.__name__}")
bench = benchmark(
f_feat=f_feat,
ts_feat=x[1],
f_params=x[2],
ts_params=x[3],
is_expr=is_expr,
)
bench_df = pl.concat(
[
pl.DataFrame(
{
"Feature name": [x[0].__name__] * len(bench.n_range),
"n": bench.n_range,
"functime (ms)": bench.timings_s[0] * 1_000,
"tfresh (ms)": bench.timings_s[1] * 1_000,
"diff (ms)": (bench.timings_s[0] - bench.timings_s[1])
* 1_000,
"diff %": 100
* (bench.timings_s[0] - bench.timings_s[1])
/ bench.timings_s[1],
"speedup": bench.timings_s[1] / bench.timings_s[0],
}
),
bench_df,
]
)
except ValueError:
print(f"Failed to compute feature {x[0].__name__}")
except ImportError:
print(f"Failed to import feature {x[0].__name__}")
except TypeError:
print(f"Feature {x[0].__name__} not implemented for pl.Expr")
return bench_df
3. Run benchmarks¶
# Code to prettify benchmark results
def table_prettifier(df: pl.DataFrame, n: int):
table = (
df.filter(pl.col("n") == n)
.drop("n")
.sort("speedup", descending=True)
.with_columns(
pl.when(pl.exclude("Feature name").abs() < 0.1)
.then(pl.exclude("Feature name").round(4))
.when(pl.exclude("Feature name").abs() < 1)
.then(pl.exclude("Feature name").round(2))
.when(pl.exclude("Feature name").abs() < 30)
.then(pl.exclude("Feature name").round(1))
.otherwise(pl.exclude("Feature name").round(1))
)
.with_columns(speedup="x " + pl.col("speedup").cast(pl.Utf8))
)
return table
%%capture
bench_expr = all_benchmarks(params = FUNC_PARAMS_BENCH, is_expr = True)
bench_series = all_benchmarks(params = FUNC_PARAMS_BENCH, is_expr = False)
# Lazy benchmarks
df_expr_10k = table_prettifier(bench_expr, n=10_000)
df_expr_100k = table_prettifier(bench_expr, n=100_000)
df_expr_1m = table_prettifier(bench_expr, n=1_000_000)
df_expr_9m = table_prettifier(bench_expr, n=9_000_000)
# Eager benchmarks
df_series_10k = table_prettifier(bench_series, n=10_000)
df_series_100k = table_prettifier(bench_series, n=100_000)
df_series_1m = table_prettifier(bench_series, n=1_000_000)
df_series_9m = table_prettifier(bench_series, n=9_000_000)
INFO:functime.feature_extractors:Expression version of approximate_entropy is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extractors:Expression version of autoregressive_coefficients is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extractors:Expression version of number_cwt_peaks is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extractors:Expression version of sample_entropy is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extractors:Expression version of spkt_welch_density is not yet implemented due to technical difficulty regarding Polars Expression Plugins.
4. Benchmark results¶
Display 8 tables:
- For
pl.Series
: 10k, 100k, 1M and 9M rows - For
pl.Expr
: 10k, 100k, 1M and 9M rows
Each table contains the execution time (ms) for tsfresh and functime, the difference, the difference in % and the speedup:
4.1 Results for pl.Expr
¶
10k expr¶
df_expr_10k
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"lempel_ziv_complexity" | 1.6 | 49.2 | -47.6 | -96.8 | "x 31.1" |
"benford_correlation" | 1.4 | 15.1 | -13.7 | -91.0 | "x 11.1" |
"energy_ratios" | 0.47 | 3.4 | -2.9 | -86.2 | "x 7.2" |
"mean_n_absolute_max" | 0.13 | 0.65 | -0.52 | -79.4 | "x 4.8" |
"longest_streak_below_mean" | 0.44 | 1.7 | -1.3 | -73.9 | "x 3.8" |
"range_count" | 0.0771 | 0.25 | -0.18 | -69.7 | "x 3.3" |
"change_quantiles" | 0.45 | 1.3 | -0.9 | -67.0 | "x 3.0" |
"longest_streak_above_mean" | 0.59 | 1.7 | -1.1 | -65.2 | "x 2.9" |
"number_peaks" | 0.65 | 1.7 | -1.1 | -62.0 | "x 2.6" |
"ratio_beyond_r_sigma" | 0.18 | 0.42 | -0.25 | -58.4 | "x 2.4" |
"count_below_mean" | 0.0783 | 0.16 | -0.0852 | -52.1 | "x 2.1" |
"count_above_mean" | 0.0803 | 0.16 | -0.0837 | -51.0 | "x 2.0" |
"ratio_n_unique_to_length" | 0.34 | 0.7 | -0.35 | -50.6 | "x 2.0" |
"percent_reoccurring_points" | 0.68 | 1.3 | -0.65 | -48.6 | "x 1.9" |
"large_standard_deviation" | 0.0815 | 0.14 | -0.0536 | -39.7 | "x 1.7" |
"has_duplicate_max" | 0.11 | 0.18 | -0.0656 | -37.3 | "x 1.6" |
"has_duplicate_min" | 0.11 | 0.17 | -0.065 | -37.2 | "x 1.6" |
"count_above" | 0.0781 | 0.12 | -0.0434 | -35.7 | "x 1.6" |
"count_below" | 0.0785 | 0.12 | -0.0428 | -35.3 | "x 1.5" |
"has_duplicate" | 0.47 | 0.69 | -0.23 | -32.7 | "x 1.5" |
"binned_entropy" | 0.32 | 0.46 | -0.15 | -31.6 | "x 1.5" |
"sum_reoccurring_points" | 0.6 | 0.81 | -0.22 | -26.5 | "x 1.4" |
"c3" | 0.23 | 0.27 | -0.0353 | -13.2 | "x 1.2" |
"root_mean_square" | 0.12 | 0.14 | -0.0159 | -11.8 | "x 1.1" |
"variation_coefficient" | 0.1 | 0.11 | -0.0023 | -2.2 | "x 1.0" |
"symmetry_looking" | 0.33 | 0.34 | -0.0065 | -1.9 | "x 1.0" |
"percent_reoccurring_values" | 0.8 | 0.77 | 0.0255 | 3.3 | "x 0.97" |
"mean_abs_change" | 0.0521 | 0.0494 | 0.0027 | 5.4 | "x 0.95" |
"var_gt_std" | 0.0573 | 0.0538 | 0.0034 | 6.4 | "x 0.94" |
"absolute_sum_of_changes" | 0.0521 | 0.0448 | 0.0073 | 16.3 | "x 0.86" |
"permutation_entropy" | 20.6 | 16.3 | 4.3 | 26.2 | "x 0.79" |
"absolute_maximum" | 0.13 | 0.1 | 0.0277 | 27.7 | "x 0.78" |
"cid_ce" | 0.51 | 0.36 | 0.15 | 39.7 | "x 0.72" |
"linear_trend" | 1.6 | 1.1 | 0.51 | 47.5 | "x 0.68" |
"number_crossings" | 0.16 | 0.0932 | 0.0646 | 69.3 | "x 0.59" |
"autocorrelation" | 0.28 | 0.16 | 0.12 | 75.8 | "x 0.57" |
"sum_reoccurring_values" | 1.6 | 0.85 | 0.79 | 92.6 | "x 0.52" |
"first_location_of_maximum" | 0.0868 | 0.0424 | 0.0445 | 105.0 | "x 0.49" |
"first_location_of_minimum" | 0.0876 | 0.0418 | 0.0458 | 109.7 | "x 0.48" |
"time_reversal_asymmetry_statistic" | 0.24 | 0.0953 | 0.15 | 156.7 | "x 0.39" |
"index_mass_quantile" | 0.31 | 0.12 | 0.19 | 165.9 | "x 0.38" |
"absolute_energy" | 0.0846 | 0.0307 | 0.0539 | 175.9 | "x 0.36" |
"last_location_of_minimum" | 0.13 | 0.0297 | 0.1 | 340.4 | "x 0.23" |
"last_location_of_maximum" | 0.13 | 0.0296 | 0.1 | 348.4 | "x 0.22" |
"mean_change" | 0.18 | 0.0086 | 0.17 | 2016.9 | "x 0.0472" |
"mean_second_derivative_central" | 0.26 | 0.0091 | 0.25 | 2731.7 | "x 0.0353" |
100k expr¶
df_expr_100k
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"lempel_ziv_complexity" | 37.2 | 2403.7 | -2366.5 | -98.5 | "x 64.6" |
"mean_n_absolute_max" | 0.55 | 7.3 | -6.7 | -92.5 | "x 13.3" |
"benford_correlation" | 11.6 | 144.6 | -133.0 | -92.0 | "x 12.5" |
"longest_streak_above_mean" | 2.5 | 15.4 | -12.9 | -84.1 | "x 6.3" |
"longest_streak_below_mean" | 2.4 | 15.3 | -12.9 | -84.0 | "x 6.3" |
"energy_ratios" | 1.0 | 5.0 | -3.9 | -78.9 | "x 4.7" |
"ratio_n_unique_to_length" | 2.8 | 8.2 | -5.4 | -66.3 | "x 3.0" |
"change_quantiles" | 2.3 | 6.4 | -4.1 | -64.6 | "x 2.8" |
"absolute_maximum" | 0.15 | 0.37 | -0.21 | -58.5 | "x 2.4" |
"count_above_mean" | 0.18 | 0.42 | -0.23 | -56.0 | "x 2.3" |
"count_below_mean" | 0.19 | 0.42 | -0.23 | -55.6 | "x 2.3" |
"large_standard_deviation" | 0.36 | 0.77 | -0.42 | -53.9 | "x 2.2" |
"variation_coefficient" | 0.22 | 0.46 | -0.24 | -51.4 | "x 2.1" |
"has_duplicate_max" | 0.24 | 0.49 | -0.25 | -50.3 | "x 2.0" |
"has_duplicate_min" | 0.24 | 0.49 | -0.24 | -49.8 | "x 2.0" |
"symmetry_looking" | 0.94 | 1.8 | -0.88 | -48.4 | "x 1.9" |
"ratio_beyond_r_sigma" | 0.7 | 1.2 | -0.53 | -43.0 | "x 1.8" |
"percent_reoccurring_points" | 5.5 | 9.4 | -3.9 | -41.8 | "x 1.7" |
"sum_reoccurring_points" | 5.2 | 8.8 | -3.6 | -41.3 | "x 1.7" |
"var_gt_std" | 0.17 | 0.29 | -0.12 | -41.2 | "x 1.7" |
"permutation_entropy" | 175.2 | 287.2 | -112.0 | -39.0 | "x 1.6" |
"linear_trend" | 6.5 | 9.9 | -3.4 | -34.3 | "x 1.5" |
"has_duplicate" | 5.1 | 7.8 | -2.6 | -33.9 | "x 1.5" |
"number_peaks" | 2.5 | 3.6 | -1.1 | -30.8 | "x 1.4" |
"root_mean_square" | 0.25 | 0.35 | -0.0981 | -28.1 | "x 1.4" |
"binned_entropy" | 1.7 | 2.4 | -0.64 | -26.7 | "x 1.4" |
"c3" | 0.7 | 0.85 | -0.15 | -17.5 | "x 1.2" |
"first_location_of_maximum" | 0.12 | 0.14 | -0.0215 | -15.4 | "x 1.2" |
"first_location_of_minimum" | 0.12 | 0.14 | -0.0185 | -13.3 | "x 1.2" |
"count_below" | 0.18 | 0.2 | -0.0244 | -12.0 | "x 1.1" |
"count_above" | 0.18 | 0.2 | -0.0239 | -11.8 | "x 1.1" |
"mean_abs_change" | 0.23 | 0.24 | -0.0101 | -4.2 | "x 1.0" |
"absolute_sum_of_changes" | 0.23 | 0.23 | 0.0042 | 1.8 | "x 0.98" |
"time_reversal_asymmetry_statistic" | 0.91 | 0.83 | 0.0751 | 9.0 | "x 0.92" |
"autocorrelation" | 0.77 | 0.67 | 0.0975 | 14.5 | "x 0.87" |
"percent_reoccurring_values" | 11.0 | 8.2 | 2.8 | 34.5 | "x 0.74" |
"last_location_of_minimum" | 0.24 | 0.15 | 0.0939 | 62.6 | "x 0.62" |
"last_location_of_maximum" | 0.24 | 0.15 | 0.0952 | 63.5 | "x 0.61" |
"index_mass_quantile" | 1.4 | 0.81 | 0.63 | 77.5 | "x 0.56" |
"number_crossings" | 0.27 | 0.14 | 0.13 | 90.3 | "x 0.53" |
"sum_reoccurring_values" | 17.3 | 9.0 | 8.3 | 92.1 | "x 0.52" |
"cid_ce" | 3.3 | 1.3 | 2.0 | 150.3 | "x 0.4" |
"absolute_energy" | 0.24 | 0.0443 | 0.2 | 447.0 | "x 0.18" |
"mean_change" | 0.16 | 0.0086 | 0.16 | 1811.6 | "x 0.0523" |
"mean_second_derivative_central" | 0.26 | 0.0091 | 0.25 | 2776.1 | "x 0.0348" |
"range_count" | 632.3 | 0.41 | 631.9 | 154619.9 | "x 0.0006" |
1M expr¶
df_expr_1m
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"benford_correlation" | 150.9 | 3102.5 | -2951.6 | -95.1 | "x 20.6" |
"mean_n_absolute_max" | 7.4 | 93.5 | -86.1 | -92.1 | "x 12.6" |
"longest_streak_below_mean" | 22.4 | 157.0 | -134.6 | -85.7 | "x 7.0" |
"longest_streak_above_mean" | 22.5 | 154.0 | -131.6 | -85.4 | "x 6.9" |
"absolute_maximum" | 0.82 | 4.9 | -4.1 | -83.3 | "x 6.0" |
"energy_ratios" | 8.3 | 36.4 | -28.1 | -77.3 | "x 4.4" |
"count_below_mean" | 1.3 | 4.7 | -3.3 | -71.8 | "x 3.5" |
"number_peaks" | 22.2 | 76.5 | -54.3 | -70.9 | "x 3.4" |
"ratio_n_unique_to_length" | 30.6 | 96.0 | -65.3 | -68.1 | "x 3.1" |
"has_duplicate_max" | 1.3 | 4.1 | -2.7 | -67.0 | "x 3.0" |
"symmetry_looking" | 9.2 | 27.6 | -18.4 | -66.6 | "x 3.0" |
"has_duplicate_min" | 1.3 | 3.9 | -2.6 | -66.5 | "x 3.0" |
"count_above_mean" | 1.3 | 3.9 | -2.6 | -66.0 | "x 2.9" |
"large_standard_deviation" | 3.7 | 7.7 | -4.0 | -52.3 | "x 2.1" |
"root_mean_square" | 2.1 | 4.5 | -2.3 | -51.9 | "x 2.1" |
"first_location_of_maximum" | 0.85 | 1.7 | -0.81 | -48.8 | "x 2.0" |
"change_quantiles" | 31.8 | 60.3 | -28.6 | -47.3 | "x 1.9" |
"first_location_of_minimum" | 0.85 | 1.6 | -0.73 | -46.1 | "x 1.9" |
"count_above" | 0.83 | 1.5 | -0.67 | -44.7 | "x 1.8" |
"variation_coefficient" | 2.7 | 4.8 | -2.1 | -43.2 | "x 1.8" |
"count_below" | 0.83 | 1.5 | -0.62 | -42.9 | "x 1.8" |
"mean_abs_change" | 3.5 | 5.7 | -2.2 | -38.3 | "x 1.6" |
"c3" | 7.6 | 12.0 | -4.4 | -36.9 | "x 1.6" |
"binned_entropy" | 14.6 | 22.9 | -8.2 | -36.0 | "x 1.6" |
"has_duplicate" | 59.3 | 90.6 | -31.3 | -34.6 | "x 1.5" |
"ratio_beyond_r_sigma" | 8.0 | 11.8 | -3.9 | -32.5 | "x 1.5" |
"sum_reoccurring_points" | 70.4 | 102.5 | -32.2 | -31.4 | "x 1.5" |
"absolute_sum_of_changes" | 3.7 | 5.0 | -1.3 | -26.6 | "x 1.4" |
"last_location_of_minimum" | 2.4 | 3.2 | -0.84 | -26.0 | "x 1.4" |
"last_location_of_maximum" | 2.4 | 3.2 | -0.82 | -25.6 | "x 1.3" |
"autocorrelation" | 9.5 | 12.3 | -2.7 | -22.4 | "x 1.3" |
"percent_reoccurring_points" | 69.7 | 88.8 | -19.1 | -21.5 | "x 1.3" |
"time_reversal_asymmetry_statistic" | 12.5 | 15.8 | -3.3 | -20.7 | "x 1.3" |
"linear_trend" | 90.2 | 110.4 | -20.2 | -18.3 | "x 1.2" |
"permutation_entropy" | 2354.9 | 2796.6 | -441.7 | -15.8 | "x 1.2" |
"var_gt_std" | 2.6 | 2.9 | -0.3 | -10.5 | "x 1.1" |
"index_mass_quantile" | 12.0 | 10.4 | 1.6 | 15.2 | "x 0.87" |
"percent_reoccurring_values" | 164.6 | 99.9 | 64.7 | 64.8 | "x 0.61" |
"number_crossings" | 2.0 | 1.2 | 0.82 | 70.5 | "x 0.59" |
"sum_reoccurring_values" | 193.3 | 98.8 | 94.5 | 95.6 | "x 0.51" |
"cid_ce" | 41.4 | 14.9 | 26.5 | 177.6 | "x 0.36" |
"absolute_energy" | 2.1 | 0.44 | 1.7 | 384.0 | "x 0.21" |
"mean_change" | 0.16 | 0.0086 | 0.16 | 1797.6 | "x 0.0527" |
"mean_second_derivative_central" | 0.24 | 0.0091 | 0.23 | 2583.7 | "x 0.0373" |
"range_count" | 687.1 | 3.2 | 683.9 | 21435.6 | "x 0.0046" |
9M expr¶
df_expr_9m
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"mean_n_absolute_max" | 84.1 | 1466.7 | -1382.6 | -94.3 | "x 17.4" |
"benford_correlation" | 1907.6 | 19368.9 | -17461.4 | -90.2 | "x 10.2" |
"longest_streak_above_mean" | 197.0 | 1437.9 | -1240.9 | -86.3 | "x 7.3" |
"longest_streak_below_mean" | 197.4 | 1403.2 | -1205.8 | -85.9 | "x 7.1" |
"energy_ratios" | 86.7 | 530.6 | -444.0 | -83.7 | "x 6.1" |
"absolute_maximum" | 8.0 | 48.0 | -40.0 | -83.3 | "x 6.0" |
"time_reversal_asymmetry_statistic" | 146.3 | 722.4 | -576.1 | -79.7 | "x 4.9" |
"change_quantiles" | 298.0 | 1146.8 | -848.8 | -74.0 | "x 3.8" |
"count_above_mean" | 10.3 | 39.4 | -29.1 | -73.8 | "x 3.8" |
"count_below_mean" | 10.0 | 38.0 | -28.1 | -73.8 | "x 3.8" |
"has_duplicate_min" | 9.2 | 34.3 | -25.1 | -73.2 | "x 3.7" |
"has_duplicate_max" | 9.5 | 34.8 | -25.3 | -72.8 | "x 3.7" |
"first_location_of_minimum" | 6.4 | 16.7 | -10.3 | -61.6 | "x 2.6" |
"ratio_beyond_r_sigma" | 68.9 | 168.9 | -99.9 | -59.2 | "x 2.4" |
"large_standard_deviation" | 30.9 | 72.1 | -41.1 | -57.1 | "x 2.3" |
"c3" | 66.0 | 152.6 | -86.5 | -56.7 | "x 2.3" |
"count_below" | 5.3 | 12.0 | -6.7 | -55.7 | "x 2.3" |
"count_above" | 5.4 | 11.9 | -6.5 | -54.8 | "x 2.2" |
"ratio_n_unique_to_length" | 497.9 | 1007.5 | -509.6 | -50.6 | "x 2.0" |
"first_location_of_maximum" | 8.4 | 15.9 | -7.5 | -47.1 | "x 1.9" |
"root_mean_square" | 21.3 | 39.6 | -18.3 | -46.2 | "x 1.9" |
"variation_coefficient" | 27.6 | 47.2 | -19.6 | -41.5 | "x 1.7" |
"number_peaks" | 282.4 | 480.9 | -198.5 | -41.3 | "x 1.7" |
"symmetry_looking" | 120.7 | 184.4 | -63.7 | -34.6 | "x 1.5" |
"mean_abs_change" | 31.6 | 45.2 | -13.6 | -30.0 | "x 1.4" |
"last_location_of_maximum" | 19.7 | 27.5 | -7.9 | -28.5 | "x 1.4" |
"last_location_of_minimum" | 20.2 | 28.0 | -7.8 | -28.0 | "x 1.4" |
"binned_entropy" | 153.6 | 211.5 | -57.9 | -27.4 | "x 1.4" |
"range_count" | 33.9 | 46.2 | -12.3 | -26.6 | "x 1.4" |
"linear_trend" | 922.1 | 1199.2 | -277.1 | -23.1 | "x 1.3" |
"permutation_entropy" | 23256.9 | 29691.4 | -6434.5 | -21.7 | "x 1.3" |
"absolute_sum_of_changes" | 40.1 | 50.1 | -10.1 | -20.1 | "x 1.3" |
"autocorrelation" | 103.5 | 125.7 | -22.2 | -17.7 | "x 1.2" |
"var_gt_std" | 23.2 | 27.8 | -4.6 | -16.6 | "x 1.2" |
"sum_reoccurring_points" | 1154.1 | 1194.4 | -40.2 | -3.4 | "x 1.0" |
"percent_reoccurring_points" | 1760.3 | 1801.2 | -40.9 | -2.3 | "x 1.0" |
"has_duplicate" | 1011.8 | 963.9 | 47.9 | 5.0 | "x 0.95" |
"number_crossings" | 14.1 | 11.7 | 2.4 | 20.5 | "x 0.83" |
"index_mass_quantile" | 150.9 | 89.1 | 61.8 | 69.3 | "x 0.59" |
"percent_reoccurring_values" | 2433.7 | 1248.3 | 1185.4 | 95.0 | "x 0.51" |
"sum_reoccurring_values" | 2884.4 | 1377.8 | 1506.7 | 109.4 | "x 0.48" |
"cid_ce" | 571.0 | 122.2 | 448.8 | 367.4 | "x 0.21" |
"absolute_energy" | 19.1 | 4.0 | 15.1 | 378.6 | "x 0.21" |
"mean_change" | 0.16 | 0.0086 | 0.16 | 1815.6 | "x 0.0522" |
"mean_second_derivative_central" | 0.25 | 0.0091 | 0.25 | 2681.4 | "x 0.036" |
4.2 Results for pl.Series
¶
10k series¶
df_series_10k
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"approximate_entropy" | 256.8 | 37776.4 | -37519.6 | -99.3 | "x 147.1" |
"lempel_ziv_complexity" | 1.6 | 46.2 | -44.6 | -96.6 | "x 29.3" |
"count_below_mean" | 0.0177 | 0.17 | -0.15 | -89.3 | "x 9.3" |
"count_above_mean" | 0.0178 | 0.16 | -0.14 | -89.0 | "x 9.1" |
"has_duplicate_max" | 0.0203 | 0.18 | -0.16 | -88.8 | "x 8.9" |
"has_duplicate_min" | 0.0204 | 0.18 | -0.16 | -88.6 | "x 8.8" |
"energy_ratios" | 0.41 | 3.3 | -2.9 | -87.8 | "x 8.2" |
"sample_entropy" | 1268.6 | 10169.0 | -8900.4 | -87.5 | "x 8.0" |
"count_above" | 0.0164 | 0.12 | -0.1 | -86.5 | "x 7.4" |
"benford_correlation" | 1.8 | 13.5 | -11.6 | -86.5 | "x 7.4" |
"count_below" | 0.0167 | 0.12 | -0.11 | -86.3 | "x 7.3" |
"first_location_of_minimum" | 0.0071 | 0.0413 | -0.0343 | -82.9 | "x 5.9" |
"first_location_of_maximum" | 0.0072 | 0.0419 | -0.0347 | -82.9 | "x 5.8" |
"symmetry_looking" | 0.0671 | 0.33 | -0.27 | -79.8 | "x 4.9" |
"absolute_maximum" | 0.0218 | 0.0961 | -0.0742 | -77.3 | "x 4.4" |
"longest_streak_below_mean" | 0.39 | 1.7 | -1.3 | -77.1 | "x 4.4" |
"longest_streak_above_mean" | 0.39 | 1.7 | -1.3 | -76.9 | "x 4.3" |
"autoregressive_coefficients" | 1.4 | 5.9 | -4.5 | -76.6 | "x 4.3" |
"mean_n_absolute_max" | 0.18 | 0.65 | -0.47 | -72.2 | "x 3.6" |
"range_count" | 0.0792 | 0.25 | -0.18 | -68.9 | "x 3.2" |
"change_quantiles" | 0.46 | 1.3 | -0.86 | -64.9 | "x 2.8" |
"linear_trend" | 0.4 | 1.1 | -0.67 | -62.7 | "x 2.7" |
"ratio_n_unique_to_length" | 0.26 | 0.69 | -0.43 | -62.2 | "x 2.6" |
"number_peaks" | 0.64 | 1.7 | -1.0 | -62.1 | "x 2.6" |
"percent_reoccurring_points" | 0.47 | 1.2 | -0.74 | -61.0 | "x 2.6" |
"absolute_energy" | 0.0119 | 0.0301 | -0.0181 | -60.3 | "x 2.5" |
"large_standard_deviation" | 0.0604 | 0.13 | -0.0742 | -55.1 | "x 2.2" |
"mean_change" | 0.0039 | 0.0086 | -0.0047 | -54.8 | "x 2.2" |
"mean_second_derivative_central" | 0.005 | 0.0091 | -0.0041 | -45.2 | "x 1.8" |
"cid_ce" | 0.2 | 0.36 | -0.16 | -45.1 | "x 1.8" |
"variation_coefficient" | 0.0616 | 0.11 | -0.0439 | -41.6 | "x 1.7" |
"root_mean_square" | 0.0897 | 0.14 | -0.0482 | -35.0 | "x 1.5" |
"ratio_beyond_r_sigma" | 0.28 | 0.43 | -0.15 | -34.9 | "x 1.5" |
"sum_reoccurring_points" | 0.57 | 0.81 | -0.24 | -29.6 | "x 1.4" |
"has_duplicate" | 0.51 | 0.69 | -0.18 | -26.5 | "x 1.4" |
"percent_reoccurring_values" | 0.58 | 0.77 | -0.19 | -24.5 | "x 1.3" |
"binned_entropy" | 0.37 | 0.46 | -0.0894 | -19.4 | "x 1.2" |
"number_cwt_peaks" | 537.4 | 636.3 | -99.0 | -15.6 | "x 1.2" |
"var_gt_std" | 0.0503 | 0.0534 | -0.0031 | -5.7 | "x 1.1" |
"spkt_welch_density" | 0.59 | 0.57 | 0.0124 | 2.2 | "x 0.98" |
"c3" | 0.28 | 0.27 | 0.0147 | 5.5 | "x 0.95" |
"sum_reoccurring_values" | 0.96 | 0.85 | 0.11 | 12.4 | "x 0.89" |
"permutation_entropy" | 13.6 | 11.1 | 2.6 | 23.1 | "x 0.81" |
"last_location_of_minimum" | 0.0477 | 0.0293 | 0.0184 | 62.8 | "x 0.61" |
"last_location_of_maximum" | 0.0482 | 0.0295 | 0.0187 | 63.6 | "x 0.61" |
"mean_abs_change" | 0.0886 | 0.0495 | 0.0391 | 79.0 | "x 0.56" |
"absolute_sum_of_changes" | 0.0905 | 0.0446 | 0.0459 | 103.1 | "x 0.49" |
"number_crossings" | 0.2 | 0.0931 | 0.11 | 113.5 | "x 0.47" |
"autocorrelation" | 0.38 | 0.16 | 0.22 | 137.3 | "x 0.42" |
"index_mass_quantile" | 0.28 | 0.12 | 0.16 | 139.7 | "x 0.42" |
"time_reversal_asymmetry_statistic" | 0.29 | 0.0938 | 0.19 | 206.8 | "x 0.33" |
100k series¶
df_series_100k
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"sample_entropy" | 5101.4 | 1385099.1 | -1.3800e6 | -99.6 | "x 271.5" |
"lempel_ziv_complexity" | 38.8 | 2457.4 | -2418.6 | -98.4 | "x 63.3" |
"mean_n_absolute_max" | 0.59 | 7.4 | -6.8 | -92.0 | "x 12.6" |
"benford_correlation" | 11.3 | 136.0 | -124.7 | -91.7 | "x 12.1" |
"energy_ratios" | 0.61 | 5.1 | -4.5 | -88.0 | "x 8.3" |
"longest_streak_below_mean" | 2.4 | 15.8 | -13.4 | -84.9 | "x 6.6" |
"longest_streak_above_mean" | 2.4 | 15.5 | -13.1 | -84.7 | "x 6.5" |
"has_duplicate_min" | 0.0815 | 0.5 | -0.42 | -83.6 | "x 6.1" |
"has_duplicate_max" | 0.0808 | 0.49 | -0.41 | -83.4 | "x 6.0" |
"count_below_mean" | 0.0725 | 0.43 | -0.36 | -83.1 | "x 5.9" |
"count_above_mean" | 0.0724 | 0.41 | -0.34 | -82.5 | "x 5.7" |
"autoregressive_coefficients" | 8.9 | 49.4 | -40.6 | -82.1 | "x 5.6" |
"linear_trend" | 1.8 | 9.1 | -7.3 | -79.9 | "x 5.0" |
"absolute_maximum" | 0.0792 | 0.37 | -0.29 | -78.6 | "x 4.7" |
"count_below" | 0.0487 | 0.2 | -0.16 | -76.1 | "x 4.2" |
"count_above" | 0.0484 | 0.2 | -0.15 | -76.1 | "x 4.2" |
"large_standard_deviation" | 0.23 | 0.78 | -0.55 | -70.6 | "x 3.4" |
"root_mean_square" | 0.1 | 0.33 | -0.22 | -68.4 | "x 3.2" |
"ratio_n_unique_to_length" | 2.6 | 7.7 | -5.1 | -66.8 | "x 3.0" |
"range_count" | 0.14 | 0.4 | -0.26 | -64.6 | "x 2.8" |
"first_location_of_maximum" | 0.0496 | 0.14 | -0.0895 | -64.3 | "x 2.8" |
"first_location_of_minimum" | 0.0496 | 0.14 | -0.0891 | -64.3 | "x 2.8" |
"symmetry_looking" | 0.67 | 1.8 | -1.1 | -63.0 | "x 2.7" |
"change_quantiles" | 2.2 | 5.5 | -3.3 | -59.2 | "x 2.5" |
"variation_coefficient" | 0.2 | 0.46 | -0.26 | -57.0 | "x 2.3" |
"mean_change" | 0.0039 | 0.0086 | -0.0048 | -55.3 | "x 2.2" |
"ratio_beyond_r_sigma" | 0.66 | 1.2 | -0.56 | -46.1 | "x 1.9" |
"mean_second_derivative_central" | 0.005 | 0.0091 | -0.0041 | -45.0 | "x 1.8" |
"var_gt_std" | 0.16 | 0.29 | -0.13 | -44.8 | "x 1.8" |
"percent_reoccurring_points" | 4.8 | 8.2 | -3.5 | -42.2 | "x 1.7" |
"sum_reoccurring_points" | 5.0 | 8.5 | -3.5 | -41.5 | "x 1.7" |
"has_duplicate" | 4.8 | 7.9 | -3.1 | -39.0 | "x 1.6" |
"number_peaks" | 2.5 | 3.6 | -1.0 | -29.4 | "x 1.4" |
"cid_ce" | 0.97 | 1.3 | -0.34 | -26.2 | "x 1.4" |
"c3" | 0.71 | 0.84 | -0.12 | -14.9 | "x 1.2" |
"percent_reoccurring_values" | 7.2 | 8.1 | -0.88 | -10.9 | "x 1.1" |
"binned_entropy" | 2.2 | 2.4 | -0.18 | -7.6 | "x 1.1" |
"permutation_entropy" | 139.3 | 147.1 | -7.8 | -5.3 | "x 1.1" |
"spkt_welch_density" | 3.7 | 3.7 | 0.0373 | 1.0 | "x 0.99" |
"last_location_of_minimum" | 0.15 | 0.15 | 0.0042 | 2.8 | "x 0.97" |
"last_location_of_maximum" | 0.15 | 0.15 | 0.005 | 3.4 | "x 0.97" |
"sum_reoccurring_values" | 9.6 | 9.1 | 0.52 | 5.7 | "x 0.95" |
"number_cwt_peaks" | 35319.7 | 32317.9 | 3001.8 | 9.3 | "x 0.92" |
"mean_abs_change" | 0.26 | 0.24 | 0.0287 | 12.2 | "x 0.89" |
"absolute_sum_of_changes" | 0.27 | 0.24 | 0.038 | 16.1 | "x 0.86" |
"time_reversal_asymmetry_statistic" | 1.1 | 0.79 | 0.32 | 40.6 | "x 0.71" |
"index_mass_quantile" | 1.2 | 0.81 | 0.37 | 45.4 | "x 0.69" |
"autocorrelation" | 1.0 | 0.67 | 0.34 | 51.0 | "x 0.66" |
"number_crossings" | 0.23 | 0.14 | 0.0909 | 64.6 | "x 0.61" |
"absolute_energy" | 0.0904 | 0.0438 | 0.0466 | 106.4 | "x 0.48" |
1M series¶
df_series_1m
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"benford_correlation" | 114.4 | 1435.3 | -1320.9 | -92.0 | "x 12.5" |
"mean_n_absolute_max" | 7.6 | 87.6 | -80.0 | -91.3 | "x 11.6" |
"energy_ratios" | 4.3 | 35.8 | -31.5 | -88.0 | "x 8.3" |
"root_mean_square" | 0.55 | 4.3 | -3.7 | -87.1 | "x 7.8" |
"longest_streak_below_mean" | 22.3 | 164.4 | -142.0 | -86.4 | "x 7.4" |
"autoregressive_coefficients" | 86.0 | 619.3 | -533.3 | -86.1 | "x 7.2" |
"longest_streak_above_mean" | 22.5 | 161.5 | -139.0 | -86.1 | "x 7.2" |
"linear_trend" | 20.5 | 104.6 | -84.1 | -80.4 | "x 5.1" |
"absolute_maximum" | 1.1 | 4.8 | -3.7 | -77.7 | "x 4.5" |
"count_below_mean" | 1.1 | 4.7 | -3.6 | -76.5 | "x 4.3" |
"ratio_n_unique_to_length" | 23.4 | 91.5 | -68.1 | -74.4 | "x 3.9" |
"has_duplicate_min" | 1.1 | 4.1 | -3.0 | -73.2 | "x 3.7" |
"has_duplicate_max" | 1.1 | 4.0 | -2.9 | -72.3 | "x 3.6" |
"count_above_mean" | 1.1 | 3.8 | -2.7 | -71.6 | "x 3.5" |
"first_location_of_minimum" | 0.58 | 1.6 | -1.0 | -63.4 | "x 2.7" |
"first_location_of_maximum" | 0.58 | 1.6 | -1.0 | -63.2 | "x 2.7" |
"count_below" | 0.58 | 1.5 | -0.88 | -60.3 | "x 2.5" |
"count_above" | 0.58 | 1.5 | -0.88 | -60.3 | "x 2.5" |
"symmetry_looking" | 9.0 | 21.8 | -12.8 | -58.8 | "x 2.4" |
"change_quantiles" | 23.3 | 53.6 | -30.3 | -56.5 | "x 2.3" |
"range_count" | 1.4 | 3.2 | -1.8 | -55.7 | "x 2.3" |
"mean_change" | 0.0039 | 0.0087 | -0.0048 | -55.3 | "x 2.2" |
"number_peaks" | 21.2 | 45.6 | -24.4 | -53.5 | "x 2.2" |
"large_standard_deviation" | 3.7 | 7.8 | -4.2 | -53.2 | "x 2.1" |
"mean_second_derivative_central" | 0.005 | 0.0091 | -0.0041 | -45.0 | "x 1.8" |
"c3" | 6.0 | 10.3 | -4.3 | -42.2 | "x 1.7" |
"sum_reoccurring_points" | 65.7 | 102.5 | -36.8 | -35.9 | "x 1.6" |
"variation_coefficient" | 3.2 | 4.8 | -1.6 | -34.2 | "x 1.5" |
"absolute_sum_of_changes" | 3.7 | 5.3 | -1.7 | -30.9 | "x 1.4" |
"ratio_beyond_r_sigma" | 8.0 | 11.6 | -3.5 | -30.5 | "x 1.4" |
"time_reversal_asymmetry_statistic" | 10.7 | 14.7 | -4.0 | -27.1 | "x 1.4" |
"last_location_of_minimum" | 2.3 | 3.0 | -0.77 | -25.2 | "x 1.3" |
"cid_ce" | 10.1 | 13.5 | -3.4 | -24.9 | "x 1.3" |
"has_duplicate" | 71.2 | 94.7 | -23.6 | -24.9 | "x 1.3" |
"percent_reoccurring_points" | 63.6 | 84.5 | -20.9 | -24.8 | "x 1.3" |
"last_location_of_maximum" | 2.3 | 3.0 | -0.74 | -24.6 | "x 1.3" |
"mean_abs_change" | 3.7 | 4.8 | -1.2 | -24.1 | "x 1.3" |
"permutation_entropy" | 1534.8 | 1968.6 | -433.9 | -22.0 | "x 1.3" |
"autocorrelation" | 9.6 | 11.8 | -2.2 | -18.8 | "x 1.2" |
"var_gt_std" | 2.6 | 2.9 | -0.31 | -10.6 | "x 1.1" |
"number_crossings" | 1.0 | 1.1 | -0.0473 | -4.3 | "x 1.0" |
"binned_entropy" | 20.4 | 21.1 | -0.76 | -3.6 | "x 1.0" |
"spkt_welch_density" | 43.3 | 43.7 | -0.41 | -0.93 | "x 1.0" |
"index_mass_quantile" | 10.3 | 10.1 | 0.17 | 1.7 | "x 0.98" |
"percent_reoccurring_values" | 108.1 | 93.7 | 14.4 | 15.4 | "x 0.87" |
"sum_reoccurring_values" | 127.9 | 100.2 | 27.8 | 27.7 | "x 0.78" |
"absolute_energy" | 0.59 | 0.45 | 0.14 | 30.6 | "x 0.77" |
9M series¶
df_series_9m
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"mean_n_absolute_max" | 47.4 | 1059.4 | -1012.0 | -95.5 | "x 22.4" |
"root_mean_square" | 3.8 | 37.7 | -33.9 | -89.9 | "x 9.9" |
"energy_ratios" | 39.9 | 374.9 | -335.0 | -89.4 | "x 9.4" |
"longest_streak_above_mean" | 188.5 | 1676.1 | -1487.6 | -88.8 | "x 8.9" |
"benford_correlation" | 1658.2 | 13073.8 | -11415.6 | -87.3 | "x 7.9" |
"longest_streak_below_mean" | 206.5 | 1459.7 | -1253.3 | -85.9 | "x 7.1" |
"linear_trend" | 185.2 | 1224.0 | -1038.8 | -84.9 | "x 6.6" |
"absolute_maximum" | 9.5 | 43.0 | -33.5 | -77.8 | "x 4.5" |
"autoregressive_coefficients" | 1259.4 | 5526.1 | -4266.7 | -77.2 | "x 4.4" |
"count_above_mean" | 9.6 | 34.7 | -25.1 | -72.3 | "x 3.6" |
"has_duplicate_max" | 10.1 | 34.4 | -24.3 | -70.6 | "x 3.4" |
"has_duplicate_min" | 10.8 | 36.7 | -25.9 | -70.5 | "x 3.4" |
"count_below_mean" | 12.2 | 40.1 | -27.9 | -69.7 | "x 3.3" |
"first_location_of_minimum" | 5.2 | 16.6 | -11.3 | -68.4 | "x 3.2" |
"first_location_of_maximum" | 5.1 | 15.6 | -10.4 | -67.1 | "x 3.0" |
"ratio_n_unique_to_length" | 330.2 | 894.1 | -563.9 | -63.1 | "x 2.7" |
"change_quantiles" | 232.3 | 600.2 | -367.8 | -61.3 | "x 2.6" |
"count_below" | 5.0 | 12.4 | -7.3 | -59.5 | "x 2.5" |
"count_above" | 5.0 | 11.6 | -6.6 | -57.1 | "x 2.3" |
"mean_change" | 0.0039 | 0.0086 | -0.0048 | -55.1 | "x 2.2" |
"large_standard_deviation" | 34.3 | 74.0 | -39.7 | -53.6 | "x 2.2" |
"number_peaks" | 204.6 | 416.1 | -211.5 | -50.8 | "x 2.0" |
"range_count" | 16.3 | 32.8 | -16.5 | -50.2 | "x 2.0" |
"symmetry_looking" | 86.8 | 166.5 | -79.7 | -47.9 | "x 1.9" |
"c3" | 50.5 | 91.8 | -41.3 | -44.9 | "x 1.8" |
"mean_second_derivative_central" | 0.005 | 0.009 | -0.0041 | -44.9 | "x 1.8" |
"variation_coefficient" | 29.0 | 46.2 | -17.1 | -37.1 | "x 1.6" |
"absolute_sum_of_changes" | 31.6 | 45.9 | -14.3 | -31.2 | "x 1.5" |
"time_reversal_asymmetry_statistic" | 97.8 | 137.4 | -39.6 | -28.8 | "x 1.4" |
"mean_abs_change" | 32.0 | 44.9 | -12.8 | -28.6 | "x 1.4" |
"number_crossings" | 7.1 | 9.8 | -2.7 | -27.7 | "x 1.4" |
"last_location_of_minimum" | 20.2 | 27.7 | -7.5 | -27.2 | "x 1.4" |
"last_location_of_maximum" | 20.1 | 27.1 | -7.1 | -26.1 | "x 1.4" |
"permutation_entropy" | 16945.7 | 22762.9 | -5817.2 | -25.6 | "x 1.3" |
"cid_ce" | 87.8 | 115.7 | -27.9 | -24.1 | "x 1.3" |
"ratio_beyond_r_sigma" | 80.4 | 102.1 | -21.7 | -21.3 | "x 1.3" |
"var_gt_std" | 23.1 | 27.7 | -4.6 | -16.5 | "x 1.2" |
"has_duplicate" | 1033.5 | 1172.6 | -139.0 | -11.9 | "x 1.1" |
"percent_reoccurring_points" | 862.4 | 953.4 | -90.9 | -9.5 | "x 1.1" |
"sum_reoccurring_points" | 1039.3 | 1145.3 | -105.9 | -9.2 | "x 1.1" |
"binned_entropy" | 182.8 | 193.7 | -10.9 | -5.6 | "x 1.1" |
"absolute_energy" | 3.8 | 3.7 | 0.13 | 3.4 | "x 0.97" |
"autocorrelation" | 552.8 | 526.4 | 26.4 | 5.0 | "x 0.95" |
"index_mass_quantile" | 110.9 | 89.8 | 21.0 | 23.4 | "x 0.81" |
"spkt_welch_density" | 1012.4 | 763.7 | 248.7 | 32.6 | "x 0.75" |
"sum_reoccurring_values" | 1687.5 | 1123.4 | 564.1 | 50.2 | "x 0.67" |
"percent_reoccurring_values" | 1629.8 | 953.5 | 676.3 | 70.9 | "x 0.59" |
5. Benchmark Group by / Aggregation
context¶
Benchmark combining functime's feature extraction and polars' Group by / Aggregation
context.
_SP500_DATASET = "../../data/sp500.parquet"
SP500_PANDAS = pd.read_parquet(_SP500_DATASET)
SP500_PL_EAGER = pl.read_parquet(_SP500_DATASET)
SP500_PANDAS
ticker | time | price | |
---|---|---|---|
0 | A | 2022-06-01 | 122.278214 |
1 | A | 2022-06-02 | 128.248581 |
2 | A | 2022-06-03 | 127.642609 |
3 | A | 2022-06-06 | 126.788277 |
4 | A | 2022-06-07 | 128.049881 |
... | ... | ... | ... |
126248 | ZTS | 2023-05-24 | 169.139999 |
126249 | ZTS | 2023-05-25 | 165.240005 |
126250 | ZTS | 2023-05-26 | 164.740005 |
126251 | ZTS | 2023-05-30 | 160.940002 |
126252 | ZTS | 2023-05-31 | 163.009995 |
126253 rows × 3 columns
We want to compare tsfresh
using pandas' groupby
with functime
using polars' groupby
such as:
%%timeit
SP500_PANDAS.groupby(
by = "ticker"
)["price"].agg(
tsfresh.number_peaks,
n = 5
)
908 ms ± 33.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%%timeit
SP500_PL_EAGER.group_by(
pl.col("ticker")
).agg(
pl.col("price").ts.number_peaks(support = 5)
)
52.8 ms ± 2.13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
If we examine the previous benchmark, we can see that the number_peaks
operation is approximately 2.5 times faster when using functime
compared to tsfresh
.
In the groupby
context, it's 10 times faster!
def benchmark_groupby_context(
f_feat: Callable, ts_feat: Callable, f_params: dict, ts_params: dict
):
if f_feat.__name__ == "lempel_ziv_complexity":
f_params = {"threshold": (pl.col("price").max() - pl.col("price").min()) / 2}
benchmark = perfplot.bench(
setup=lambda _n: (SP500_PL_EAGER, SP500_PANDAS),
kernels=[
lambda x, _y: x.group_by(pl.col("ticker")).agg(
f_feat(pl.col("price"), **f_params)
), # functime + polars groupby
lambda _x, y: y.groupby("ticker")["price"].agg(
ts_feat, **ts_params
), # tsfresh + pandas groupby
],
n_range=[1],
equality_check=False,
labels=["functime", "tsfresh"],
)
return benchmark
def all_benchmarks_groupby(params: list[tuple]) -> list:
bench_df = pl.DataFrame(
schema={
"Feature name": pl.Utf8,
"n": pl.Int64,
"functime + pl groupby (ms)": pl.Float64,
"tfresh + pd groupby (ms)": pl.Float64,
"diff (ms)": pl.Float64,
"diff %": pl.Float64,
"speedup": pl.Float64,
}
)
for x in params:
try:
print(f"Feature: {x[0].__name__}")
bench = benchmark_groupby_context(
f_feat=x[0], ts_feat=x[1], f_params=x[2], ts_params=x[3]
)
bench_df = pl.concat(
[
pl.DataFrame(
{
"Feature name": [x[0].__name__] * len(bench.n_range),
"n": bench.n_range,
"functime + pl groupby (ms)": bench.timings_s[0] * 1_000,
"tfresh + pd groupby (ms)": bench.timings_s[1] * 1_000,
"diff (ms)": (bench.timings_s[0] - bench.timings_s[1])
* 1_000,
"diff %": 100
* (bench.timings_s[0] - bench.timings_s[1])
/ bench.timings_s[1],
"speedup": bench.timings_s[1] / bench.timings_s[0],
}
),
bench_df,
]
)
except ValueError:
print(f"Failed to compute feature {x[0].__name__}")
except ImportError:
print(f"Failed to import feature {x[0].__name__}")
except TypeError:
print(f"Feature {x[0].__name__} not implemented for pl.Expr")
return bench_df
%%capture
bench_groupby = all_benchmarks_groupby(params=FUNC_PARAMS_BENCH)
df_groupby = table_prettifier(df=bench_groupby, n=1)
INFO:functime.feature_extractors:Expression version of approximate_entropy is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extractors:Expression version of autoregressive_coefficients is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extractors:Expression version of number_cwt_peaks is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extractors:Expression version of sample_entropy is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extractors:Expression version of spkt_welch_density is not yet implemented due to technical difficulty regarding Polars Expression Plugins.
S&P500 groupby¶
df_groupby
Feature name | functime + pl groupby (ms) | tfresh + pd groupby (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"energy_ratios" | 9.1 | 2024.5 | -2015.4 | -99.5 | "x 222.1" |
"index_mass_quantile" | 7.2 | 544.3 | -537.1 | -98.7 | "x 75.8" |
"range_count" | 2.8 | 154.3 | -151.6 | -98.2 | "x 56.0" |
"symmetry_looking" | 3.1 | 114.4 | -111.4 | -97.3 | "x 37.3" |
"percent_reoccurring_points" | 6.8 | 246.0 | -239.2 | -97.2 | "x 36.2" |
"ratio_beyond_r_sigma" | 6.0 | 215.1 | -209.1 | -97.2 | "x 35.8" |
"root_mean_square" | 2.4 | 83.8 | -81.4 | -97.1 | "x 34.3" |
"count_above" | 2.7 | 80.0 | -77.4 | -96.6 | "x 29.7" |
"count_below" | 2.7 | 78.5 | -75.8 | -96.5 | "x 28.9" |
"lempel_ziv_complexity" | 10.5 | 293.2 | -282.8 | -96.4 | "x 28.0" |
"change_quantiles" | 21.0 | 560.7 | -539.8 | -96.3 | "x 26.7" |
"variation_coefficient" | 2.5 | 56.7 | -54.2 | -95.7 | "x 23.1" |
"absolute_maximum" | 2.5 | 56.9 | -54.4 | -95.6 | "x 22.9" |
"binned_entropy" | 8.8 | 190.6 | -181.8 | -95.4 | "x 21.6" |
"c3" | 7.1 | 150.2 | -143.0 | -95.2 | "x 21.0" |
"large_standard_deviation" | 2.8 | 57.6 | -54.8 | -95.1 | "x 20.6" |
"has_duplicate_max" | 4.9 | 99.2 | -94.3 | -95.1 | "x 20.4" |
"has_duplicate_min" | 4.9 | 99.7 | -94.7 | -95.1 | "x 20.2" |
"benford_correlation" | 24.5 | 478.3 | -453.8 | -94.9 | "x 19.5" |
"count_below_mean" | 4.9 | 93.2 | -88.3 | -94.8 | "x 19.1" |
"autocorrelation" | 4.7 | 89.8 | -85.1 | -94.7 | "x 18.9" |
"count_above_mean" | 4.9 | 92.2 | -87.3 | -94.7 | "x 18.9" |
"number_peaks" | 54.1 | 909.6 | -855.6 | -94.1 | "x 16.8" |
"cid_ce" | 12.1 | 182.6 | -170.5 | -93.4 | "x 15.1" |
"var_gt_std" | 2.5 | 36.4 | -33.9 | -93.0 | "x 14.4" |
"absolute_energy" | 2.5 | 34.4 | -31.9 | -92.6 | "x 13.6" |
"mean_n_absolute_max" | 4.6 | 60.7 | -56.1 | -92.5 | "x 13.3" |
"first_location_of_maximum" | 2.9 | 38.6 | -35.7 | -92.5 | "x 13.2" |
"first_location_of_minimum" | 2.9 | 36.1 | -33.2 | -92.0 | "x 12.5" |
"mean_abs_change" | 3.7 | 36.5 | -32.9 | -90.0 | "x 10.0" |
"ratio_n_unique_to_length" | 3.9 | 38.8 | -34.8 | -89.9 | "x 9.9" |
"longest_streak_below_mean" | 12.6 | 122.8 | -110.2 | -89.7 | "x 9.7" |
"absolute_sum_of_changes" | 3.7 | 34.6 | -30.8 | -89.2 | "x 9.3" |
"mean_change" | 2.3 | 21.4 | -19.1 | -89.2 | "x 9.3" |
"longest_streak_above_mean" | 13.7 | 124.1 | -110.4 | -89.0 | "x 9.1" |
"sum_reoccurring_points" | 7.4 | 61.0 | -53.6 | -87.9 | "x 8.3" |
"last_location_of_maximum" | 3.7 | 26.4 | -22.7 | -86.0 | "x 7.2" |
"percent_reoccurring_values" | 8.6 | 60.5 | -51.9 | -85.8 | "x 7.1" |
"last_location_of_minimum" | 3.8 | 26.1 | -22.2 | -85.3 | "x 6.8" |
"mean_second_derivative_central" | 3.7 | 21.9 | -18.2 | -83.2 | "x 6.0" |
"has_duplicate" | 6.6 | 38.6 | -32.0 | -82.9 | "x 5.8" |
"number_crossings" | 11.7 | 66.6 | -54.9 | -82.4 | "x 5.7" |
"time_reversal_asymmetry_statistic" | 8.4 | 43.4 | -35.0 | -80.7 | "x 5.2" |
"linear_trend" | 34.1 | 140.3 | -106.1 | -75.7 | "x 4.1" |
"sum_reoccurring_values" | 23.7 | 63.1 | -39.4 | -62.4 | "x 2.7" |
"permutation_entropy" | 195.3 | 219.8 | -24.6 | -11.2 | "x 1.1" |