-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheda.py
More file actions
67 lines (52 loc) · 2.24 KB
/
eda.py
File metadata and controls
67 lines (52 loc) · 2.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
# coding: utf-8
# ## Import dependencies
# In[1]:
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ast
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
# from nltk.stem.snowball import SnowballStemmer
# from nltk.stem.wordnet import WordNetLemmatizer
# from nltk.corpus import wordnet
# from surprise import Reader, Dataset, SVD, evaluate
import warnings; warnings.simplefilter('ignore')
apps_training = apps.loc[apps['Split'] == 'Train']
apps_training.shape
apps_training.head()
apps_testing = apps.loc[apps['Split'] == 'Test']
apps_testing.shape
apps_testing.head()
user_history_training = user_history.loc[user_history['Split'] =='Train']
user_history_training = user_history.loc[user_history['Split'] =='Train']
user_history_testing = user_history.loc[user_history['Split'] =='Test']
apps_training = apps.loc[apps['Split'] == 'Train']
apps_testing = apps.loc[apps['Split'] == 'Test']
users_training = users.loc[users['Split']=='Train']
users_testing = users.loc[users['Split']=='Test']
user_history_training.shape
user_history_training.head()
user_history_testing = user_history.loc[user_history['Split'] =='Test']
user_history_testing.shape
user_history_testing.head()
users_training = users.loc[users['Split']=='Train']
users_training.shape
users_training.head()
users_testing = users.loc[users['Split']=='Test'
users_testing.shape
users_testing.head()
apps_training.head()
user_history_training.head()
users_training.head(5).transpose()
jobs.head()
jobs.groupby(['City','State','Country']).size().reset_index(name='Locationwise')
jobs.groupby(['Country']).size().reset_index(name='Locationwise').sort_values('Locationwise',
ascending=False).head()
Country_wise_job = jobs.groupby(['Country']).size().reset_index(name='Locationwise').sort_values('Locationwise',
ascending=False)