-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdates_fix.py
More file actions
105 lines (97 loc) · 3.29 KB
/
dates_fix.py
File metadata and controls
105 lines (97 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import csv
import re
import pandas as pd
import streamlit as st
from io import StringIO
# Helper function to reformat dates
def clean_date(date_str):
# 01/02/2012 or 01-02-2012
m = re.match(r"(\d{2})[/-](\d{2})[/-](\d{4})", date_str)
if m:
return f"{m.group(3)}-{m.group(1)}-{m.group(2)}"
# 10/5/2012 or 10-5-2012
m = re.match(r"(\d{2})[/-](\d{1})[/-](\d{4})", date_str)
if m:
return f"{m.group(3)}-{m.group(1)}-0{m.group(2)}"
# 5/10/2012 or 5-10-2012
m = re.match(r"(\d{1})[/-](\d{2})[/-](\d{4})", date_str)
if m:
return f"{m.group(3)}-0{m.group(1)}-{m.group(2)}"
# 5/1/2012 or 5-1-2012
m = re.match(r"(\d{1})[/-](\d{1})[/-](\d{4})", date_str)
if m:
return f"{m.group(3)}-0{m.group(1)}-0{m.group(2)}"
# 01/02/12 or 01-02-12
m = re.match(r"(\d{2})[/-](\d{2})[/-](\d{2})", date_str)
if m:
return f"20{m.group(3)}-{m.group(1)}-{m.group(2)}"
# 10/5/12 or 10-5-12
m = re.match(r"(\d{2})[/-](\d{1})[/-](\d{2})", date_str)
if m:
return f"20{m.group(3)}-{m.group(1)}-0{m.group(2)}"
# 5/10/12 or 5-10-12
m = re.match(r"(\d{1})[/-](\d{2})[/-](\d{2})", date_str)
if m:
return f"20{m.group(3)}-0{m.group(1)}-{m.group(2)}"
# 5/1/12 or 5-1-12
m = re.match(r"(\d{1})[/-](\d{1})[/-](\d{2})", date_str)
if m:
return f"20{m.group(3)}-0{m.group(1)}-0{m.group(2)}"
# 10-Apr-12 or 10/Apr/12
m = re.match(r"(\d{2})[/-]([A-Za-z]{3})[/-](\d{2})", date_str)
if m:
months = {
"jan": "01",
"feb": "02",
"mar": "03",
"apr": "04",
"may": "05",
"jun": "06",
"jul": "07",
"aug": "08",
"sep": "09",
"oct": "10",
"nov": "11",
"dec": "12",
}
mm = months.get(m.group(2).lower(), m.group(2))
return f"20{m.group(3)}-{mm}-{m.group(1)}"
# 1-Apr-12 or 1/Apr/12
m = re.match(r"(\d{1})[/-]([A-Za-z]{3})[/-](\d{2})", date_str)
if m:
months = {
"jan": "01",
"feb": "02",
"mar": "03",
"apr": "04",
"may": "05",
"jun": "06",
"jul": "07",
"aug": "08",
"sep": "09",
"oct": "10",
"nov": "11",
"dec": "12",
}
mm = months.get(m.group(2).lower(), m.group(2))
return f"20{m.group(3)}-{mm}-0{m.group(1)}"
# 2020/07/11 or 2020-07-11
m = re.match(r"(\d{4})[/-](\d{2})[/-](\d{2})", date_str)
if m:
return f"{m.group(1)}-{m.group(2)}-{m.group(3)}"
# Not matched
return date_str
st.title("Date Cleaner: Standardize Dates in CSV")
uploaded_file = st.file_uploader("Drop your CSV file here", type=["csv", "txt"])
if uploaded_file:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
data_in = csv.reader(stringio)
data = []
cleaned_data = [[clean_date(item) for item in row] for row in data_in]
df = pd.DataFrame(cleaned_data)
# Set first row as column headers, then drop it from the DataFrame
df.columns = df.iloc[0].tolist()
df = df[1:].reset_index(drop=True)
st.dataframe(df)
csv = df.to_csv(index=False, header=True)
st.download_button("Download cleaned file", csv, "dates_fixed.csv", "text/csv")