forked from williamhogman/feature-selection
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmetafeatures.py
More file actions
61 lines (38 loc) · 1.1 KB
/
metafeatures.py
File metadata and controls
61 lines (38 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import numpy as np
def variance(ts):
return ts.var()
def mean(ts):
return ts.mean()
def median(ts):
return ts.median()
def count(ts):
return ts.unstack().count(axis=1).mean()
def existence(ts):
us = ts.unstack()
return (us.count(axis=1) > 0).sum() / us.shape[0]
def ftest(ts):
mtx = np.array(ts.unstack())
rowmeans = np.nanmean(mtx, axis=1)
ssw = np.nan_to_num((mtx.T - rowmeans) ** 2).sum()
return ts.var() / ssw
META_FEATURES = {
"variance": variance,
"ftest": ftest,
"mean": mean,
"median": median,
"count": count,
"existence": existence,
}
def extract_meta_features(timeseries):
return {k: META_FEATURES[k](timeseries) for k in META_FEATURES}
def extract_meta_features_as_arr(timeseries):
metas = extract_meta_features(timeseries)
return [metas[k] for k in sorted(metas)]
def seq_of_timeseries_variable(df, cols):
for col in cols:
d = extract_meta_features(df[col])
d["name"] = col
yield d
def compute_for(df, cols):
return pd.DataFrame(seq_of_timeseries_variable(df, cols))