-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_exploration.py
More file actions
33 lines (29 loc) · 1.08 KB
/
data_exploration.py
File metadata and controls
33 lines (29 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
# Load the datasets
train_features = pd.read_csv('dataset and all/training_set_features.csv')
train_labels = pd.read_csv('dataset and all/training_set_labels.csv')
test_features = pd.read_csv('dataset and all/test_set_features.csv')
submission_format = pd.read_csv('dataset and all/submission_format.csv')
# Display the first few rows of each dataset
print("Training Features:")
print(train_features.head())
print("\nTraining Labels:")
print(train_labels.head())
print("\nTest Features:")
print(test_features.head())
print("\nSubmission Format:")
print(submission_format.head())
# Check for missing values
print("\nMissing Values in Training Features:")
print(train_features.isnull().sum())
print("\nMissing Values in Training Labels:")
print(train_labels.isnull().sum())
print("\nMissing Values in Test Features:")
print(test_features.isnull().sum())
# Check the data types
print("\nData Types in Training Features:")
print(train_features.dtypes)
print("\nData Types in Training Labels:")
print(train_labels.dtypes)
print("\nData Types in Test Features:")
print(test_features.dtypes)