forked from railbotan/sql
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdb_tasks.py
More file actions
139 lines (116 loc) · 4.97 KB
/
db_tasks.py
File metadata and controls
139 lines (116 loc) · 4.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import sqlite3
import pandas as pd
import re
def get_clean_field(field):
return re.sub(r'\<[^>]*\>', '', str(field))
# создание базы данных и таблицы works
con = sqlite3.connect('works.sqlite')
cursor = con.cursor()
cursor.execute('drop table if exists works')
cursor.execute('create table works ('
'ID INTEGER PRIMARY KEY AUTOINCREMENT,'
'salary INTEGER,'
'educationType TEXT,'
'jobTitle TEXT,'
'qualification TEXT,'
'gender TEXT,'
'dateModify TEXT,'
'skills TEXT,'
'otherInfo TEXT)')
con.commit()
df = pd.read_csv("works.csv")
# ДЗ Скиллы и other info
# очистка поля skills от html тегов
df['skills'] = df['skills'].apply(get_clean_field)
# очистка поля otherInfo от html тегов
df['otherInfo'] = df['otherInfo'].apply(get_clean_field)
df.to_sql("works", con, if_exists='append', index=False)
con.commit()
# ДЗ
# Создание справочника по полю gender
cursor.execute('drop table if exists genders')
cursor.execute('create table genders(id INTEGER PRIMARY KEY AUTOINCREMENT, gender_val TEXT)')
con.commit()
cursor.execute('INSERT INTO genders(gender_val) SELECT DISTINCT gender FROM works WHERE gender IS NOT NULL')
con.commit()
cursor.execute('ALTER TABLE works ADD COLUMN gender_id INTEGER REFERENCES genders(id)')
con.commit()
cursor.execute('UPDATE works SET gender_id = (SELECT id FROM genders WHERE gender_val = works.gender)')
con.commit()
cursor.execute('ALTER TABLE works DROP COLUMN gender')
con.commit()
# # содержание таблицы-справочника по гендерам
# cursor.execute('SELECT * FROM genders')
# print(cursor.fetchall())
# # вывод-проверка столбца гендер в таблице works
# cursor.execute('SELECT gender_val FROM genders,works WHERE genders.id = works.gender_id')
# print(cursor.fetchall())
# Создание таблицы для образования
cursor.execute('drop table if exists education')
cursor.execute('create table education(id INTEGER PRIMARY KEY AUTOINCREMENT, edu_val TEXT)')
con.commit()
cursor.execute('INSERT INTO education(edu_val) SELECT DISTINCT educationType FROM works WHERE educationType IS NOT NULL')
con.commit()
cursor.execute('ALTER TABLE works ADD COLUMN educationType_id INTEGER REFERENCES education(id)')
con.commit()
cursor.execute('UPDATE works SET educationType_id = (SELECT id FROM education WHERE edu_val = works.educationType)')
con.commit()
cursor.execute('ALTER TABLE works DROP COLUMN educationType')
con.commit()
# # содержание таблицы-справочника по образованию
# cursor.execute('SELECT * FROM education')
# print(cursor.fetchall())
# # вывод-проверка столбца образования в таблице works
# cursor.execute('SELECT edu_val FROM education,works WHERE education.id = works.educationType_id')
# print(cursor.fetchall())
# РАБОТА НА ПАРЕ
# cursor.execute('create index salary_index on works (salary)')
# con.commit()
# # количество всех записей
# cursor.execute('SELECT COUNT(*) FROM works')
# print(cursor.fetchall()[0][0])
# # men
# cursor.execute('SELECT COUNT(*) FROM works where gender = "Мужской"')
# print(cursor.fetchall()[0][0])
# # women
# cursor.execute('SELECT COUNT(*) FROM works where gender = "Женский"')
# print(cursor.fetchall()[0][0])
# # другой способ
# cursor.execute('SELECT gender, COUNT(*) FROM works group by gender')
# print(cursor.fetchall())
# # У скольки записей заполены skills?
# cursor.execute('SELECT COUNT(*) FROM works where skills not null')
# print(cursor.fetchall()[0][0])
# # # Получить заполненные скиллы.
# # cursor.execute('SELECT skills FROM works where skills not null')
# # print(cursor.fetchall())
# # Вывести зарплату только у тех, у кого в скилах есть Python
# cursor.execute('SELECT salary FROM works where skills LIKE "%Python%"')
# print(cursor.fetchall())
# Построить перцентили и разброс по з/п у мужчин и женщин.
# men
# cursor.execute('SELECT salary FROM works where gender = "Мужской"')
# m_salary = [t[0] for t in cursor.fetchall()]
# # print(m_salary)
#
# # women
# cursor.execute('SELECT salary FROM works where gender = "Женский"')
# w_salary = [t[0] for t in cursor.fetchall()]
# # print(w_salary)
#
# plt.plot()
# m_salary = np.quantile(m_salary, np.linspace(0.1, 1, 10))
# w_salary = np.quantile(w_salary, np.linspace(0.1, 1, 10))
#
# plt.hist(m_salary, bins=100, color='blue')
# plt.show()
# plt.hist(w_salary, bins=100, color='red')
# plt.show()
#
# # другой способ
# plt.plot(np.linspace(0.1, 1, 10), m_salary)
# plt.plot(np.linspace(0.1, 1, 10), w_salary)
# plt.xlabel("Перцентили")
# plt.ylabel("Зарплата")
#
# plt.show()