-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_model.py
More file actions
142 lines (113 loc) · 4.79 KB
/
data_model.py
File metadata and controls
142 lines (113 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""
Data model for managing datasets with undo/redo support.
"""
import pandas as pd
import copy
class DataState:
"""Represents a state of the dataset for undo/redo."""
def __init__(self, df, description=""):
self.df = df.copy() if df is not None else None
self.description = description
class DataModel:
"""Manages dataset with undo/redo functionality."""
def __init__(self):
self.current_df = None
self.filename = None
self.undo_stack = []
self.redo_stack = []
self.max_history = 50
def load_data(self, filename):
"""Load data from CSV or Excel file."""
if filename.endswith('.csv'):
df = pd.read_csv(filename)
elif filename.endswith(('.xlsx', '.xls')):
df = pd.read_excel(filename)
else:
raise ValueError("Unsupported file format. Use CSV or Excel files.")
self.filename = filename
self.current_df = df
self.undo_stack = []
self.redo_stack = []
return df
def save_state(self, description=""):
"""Save current state to undo stack."""
if self.current_df is not None:
state = DataState(self.current_df, description)
self.undo_stack.append(state)
# Limit stack size
if len(self.undo_stack) > self.max_history:
self.undo_stack.pop(0)
# Clear redo stack on new action
self.redo_stack = []
def apply_transformation(self, transform_func, description=""):
"""Apply a transformation function to the data."""
if self.current_df is None:
return None
self.save_state(description)
try:
self.current_df = transform_func(self.current_df)
return self.current_df
except Exception as e:
# Revert on error
self.undo()
raise e
def undo(self):
"""Undo the last operation."""
if not self.undo_stack:
return None
# Save current state to redo stack
current_state = DataState(self.current_df, "redo point")
self.redo_stack.append(current_state)
# Restore previous state
previous_state = self.undo_stack.pop()
self.current_df = previous_state.df.copy()
return self.current_df
def redo(self):
"""Redo the last undone operation."""
if not self.redo_stack:
return None
# Save current state to undo stack
current_state = DataState(self.current_df, "undo point")
self.undo_stack.append(current_state)
# Restore redo state
redo_state = self.redo_stack.pop()
self.current_df = redo_state.df.copy()
return self.current_df
def can_undo(self):
"""Check if undo is available."""
return len(self.undo_stack) > 0
def can_redo(self):
"""Check if redo is available."""
return len(self.redo_stack) > 0
def get_profile(self):
"""Get column profiling information."""
if self.current_df is None:
return None
profile = {}
for col in self.current_df.columns:
profile[col] = {
'dtype': str(self.current_df[col].dtype),
'count': int(self.current_df[col].count()),
'missing': int(self.current_df[col].isna().sum()),
'missing_pct': float(self.current_df[col].isna().sum() / len(self.current_df) * 100),
'unique': int(self.current_df[col].nunique()),
}
# Add numeric stats if applicable
if pd.api.types.is_numeric_dtype(self.current_df[col]):
profile[col].update({
'mean': float(self.current_df[col].mean()) if not self.current_df[col].isna().all() else None,
'std': float(self.current_df[col].std()) if not self.current_df[col].isna().all() else None,
'min': float(self.current_df[col].min()) if not self.current_df[col].isna().all() else None,
'max': float(self.current_df[col].max()) if not self.current_df[col].isna().all() else None,
})
return profile
def export_data(self, filename):
"""Export data to CSV or Excel file."""
if self.current_df is None:
raise ValueError("No data to export")
if filename.endswith('.csv'):
self.current_df.to_csv(filename, index=False)
elif filename.endswith(('.xlsx', '.xls')):
self.current_df.to_excel(filename, index=False)
else:
raise ValueError("Unsupported file format. Use CSV or Excel files.")