env/psql-notes.sql at main · defndaines/env · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
-- connect to a db.
\c db_name

-- run queries from file.
\i /tmp/scratch.sql

-- Turn off paging
\pset pager off

-- See all settings
SHOW ALL;

-- Identify index use
SELECT * FROM pg_stat_user_indexes WHERE relname='orders';

-- Long Running Queries (greater than 30 seconds)
  SELECT now() - query_start AS runtime, usename, datname, state, query
    FROM pg_stat_activity
   WHERE now() - query_start > '30 seconds'::INTERVAL
ORDER BY runtime DESC;

-- cache hit rates (should not be less than 0.99)
SELECT sum(heap_blks_read) AS heap_read,
       sum(heap_blks_hit) AS heap_hit,
       (sum(heap_blks_hit) - sum(heap_blks_read)) / sum(heap_blks_hit) AS ratio
  FROM pg_statio_user_tables;

-- table index usage rates (should not be less than 0.99)
  SELECT relname,
         100 * idx_scan / (seq_scan + idx_scan) AS percent_of_times_index_used,
         n_live_tup AS rows_in_table
    FROM pg_stat_user_tables
ORDER BY n_live_tup DESC;

-- show running queries
  SELECT pid, age(clock_timestamp(), query_start), usename, query
    FROM pg_stat_activity
   WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'
ORDER BY query_start DESC;

-- table index usage rates (should not be less than 0.99)
  SELECT relname,
         CASE
           WHEN (seq_scan + idx_scan) != 0
           THEN 100.0 * idx_scan / (seq_scan + idx_scan)
           ELSE 0
         END AS percent_of_times_index_used,
         n_live_tup AS rows_in_table
    FROM pg_stat_user_tables
ORDER BY n_live_tup DESC;

-- Unused indexes (not perfect) ... relative to last reset of statistics
  SELECT idstat.relname AS table_name,
         indexrelname AS index_name,
         idstat.idx_scan AS index_scans_count,
         pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
         tabstat.idx_scan AS table_reads_index_count,
         tabstat.seq_scan AS table_reads_seq_count,
         tabstat.seq_scan + tabstat.idx_scan AS table_reads_count,
         n_tup_upd + n_tup_ins + n_tup_del AS table_writes_count,
         pg_size_pretty(pg_relation_size(idstat.relid)) AS table_size
    FROM pg_stat_user_indexes AS idstat
    JOIN pg_indexes ON indexrelname = indexname AND idstat.schemaname = pg_indexes.schemaname
    JOIN pg_stat_user_tables AS tabstat ON idstat.relid = tabstat.relid
   WHERE indexdef !~* 'unique'
ORDER BY idstat.idx_scan DESC, pg_relation_size(indexrelid) DESC;

  SELECT relname,
         indexrelname,
         idx_scan,
         idx_tup_read,
         idx_tup_fetch,
         pg_size_pretty(pg_relation_size(indexrelname::REGCLASS)) AS size
    FROM pg_stat_all_indexes
   WHERE schemaname = 'public'
     AND indexrelname NOT LIKE 'pg_toast_%'
     AND idx_scan = 0
     AND idx_tup_read = 0
     AND idx_tup_fetch = 0
ORDER BY pg_relation_size(indexrelname::REGCLASS) DESC;

-- Reset statistics
SELECT oid FROM pg_class WHERE relname = 'table_name';

SELECT pg_stat_reset_single_table_counters( <that oid> );

-- Find invalid indices (can happen when running concurrently on release)
SELECT relname
  FROM pg_class, pg_index
 WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid;

-- Table bloat
https://github.com/ioguix/pgsql-bloat-estimation/blob/master/table/table_bloat.sql

-- Index bloat
https://github.com/ioguix/pgsql-bloat-estimation/blob/master/btree/btree_bloat.sql

-- Reindex
REINDEX INDEX CONCURRENTLY index_name;

-- DB size
SELECT pg_size_pretty(pg_database_size(current_database()));

-- Export to CSV
\copy reactions TO '/tmp/reactions.csv' WITH (FORMAT CSV, HEADER);

-- Copy in files (pipe-delimited)
\copy rxnorm_concepts_temp FROM 'rrf/RXNCONSO.RRF' DELIMITER '|' CSV QUOTE AS e'\x1f';
\copy rxnorm_attributes_temp FROM 'rrf/RXNSAT.RRF' DELIMITER '|' CSV QUOTE AS e'\x1f';
\copy rxnorm_relationships_temp FROM 'rrf/RXNREL.RRF' DELIMITER '|' CSV QUOTE AS e'\x1f';
\copy rxnorm_archived_atoms_temp FROM 'rrf/RXNATOMARCHIVE.RRF' DELIMITER '|' CSV QUOTE AS e'\x1f';

-- From Shell: Import a dump
pg_restore --jobs 4 --clean --if-exists --no-privileges --no-owner \
  --host localhost \
  --dbname your_db_name \
  your_db-2021-06-15.dump

--- JSON Queries
SELECT id,
       name,
       key_values_json->'service_user' AS service_user
  FROM your_settings
 WHERE id = 2;

-- Get distinct keys from JSON in a JSON field
SELECT DISTINCT ARRAY (SELECT json_object_keys(key_values_json)) AS keys
  FROM table_with_json;

--- JSONB Queries
 -- Check that field is present
SELECT count(meta) FROM job_states WHERE (meta->>'task') IS NOT NULL;

 -- See distinct top-level keys in object.
SELECT DISTINCT ARRAY (SELECT jsonb_object_keys(meta)) AS keys FROM job_states;

 -- See values based upon non-top-level key value.
SELECT count(meta) FROM job_states WHERE (meta->'task'->>'id') IS NOT NULL;

 -- See distinct keys within a nested object.
SELECT DISTINCT ARRAY (SELECT jsonb_object_keys(meta->'task')) AS keys
  FROM job_states
 WHERE (meta->>'task') IS NOT NULL;

 -- Update a nested field in JSON.
UPDATE events
   SET payload = jsonb_set(payload, '{"companies"}', '[]')
 WHERE id = 821252;

-- CTEs
  WITH x AS (SELECT * FROM y), z AS (SELECT * FROM w WHERE id > 666)
SELECT *
  FROM v
 WHERE u IN (SELECT t FROM w);

-- See what the next ID for a table is
SELECT last_value FROM transactions_id_seq;

-- How many records per month.
  SELECT count(t.id), date_trunc('month', t.created_at)
    FROM date_stamped_table AS t
GROUP BY date_trunc('month', t.created_at)
ORDER BY date_trunc('month', t.created_at);

-- Using a CASE statement to make enumerations explicit
  SELECT category,
         CASE
					 WHEN category = 0 THEN 'labs'
					 WHEN category = 1 THEN 'vitals'
					 WHEN category = 2 THEN 'procedures'
					 WHEN category = 3 THEN 'diagnosis'
					 WHEN category = 4 THEN 'assessments'
					 ELSE 'unknown'
         END AS type,
         count(*)
    FROM your_table
GROUP BY category
ORDER BY category;

-- Update sequence values in the DB.
SELECT pg_catalog.setval(pg_get_serial_sequence('table_name', 'id'), max(id))
  FROM table_name;

-- Largest tables in DB
  SELECT nspname || '.' || relname AS relation,
         pg_size_pretty(pg_total_relation_size(c.oid)) AS total_size
    FROM pg_class AS c LEFT JOIN pg_namespace AS n ON n.oid = c.relnamespace
   WHERE nspname NOT IN ('pg_catalog', 'information_schema')
         AND c.relkind != 'i'
         AND nspname !~ '^pg_toast'
ORDER BY pg_total_relation_size(c.oid) DESC;

-- Find tables with a certain field name ('organization_id')
  SELECT c.column_name, t.table_name
    FROM information_schema.tables AS t
         INNER JOIN information_schema.columns AS c
                 ON c.table_name = t.table_name
                    AND c.table_schema = t.table_schema
   WHERE c.column_name LIKE '%_count'
         AND t.table_schema NOT IN ('information_schema', 'pg_catalog')
         AND t.table_type = 'BASE TABLE'
ORDER BY t.table_schema;

-- Find records from the last fifteen minutes
WHERE created_at > (NOW() - INTERVAL '15 minutes')

-- Selecting records where a field appears more than once:
    WITH rs AS (
              SELECT *, count(*) OVER (PARTITION BY rule_id) AS cnt FROM rules
            )
  SELECT rule_id, check_period
    FROM rs
   WHERE cnt > 1
ORDER BY rule_id;

-- New records chunked in 10 minute intervals.
  SELECT count(*) AS cnt,
         timezone(
          'UTC',
          to_timestamp(floor(extract('epoch', created_at) / 600) * 600)
         ) AS interval_alias
    FROM snapshots
GROUP BY interval_alias
ORDER BY 2 DESC;

-- Export data as JSON (NOTE: still needs to be formatted, like adding "," between records).
COPY (
  SELECT ROW_TO_JSON(r)
  FROM (SELECT id, name FROM rules ORDER BY name) r
) TO '/tmp/rules.json';

-- Index and table hit rates for the current database
SELECT current_database() AS database,
       (
        SELECT sum(idx_blks_hit) / sum(idx_blks_hit + idx_blks_read)
          FROM pg_statio_user_indexes
       ) AS index_hit_rate,
       (
        SELECT sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read))
          FROM pg_statio_user_tables
       ) AS table_hit_rate;

-- See how large tables are:
  SELECT table_name, pg_relation_size(quote_ident(table_name))
    FROM information_schema.tables
   WHERE table_schema = 'public'
ORDER BY pg_relation_size DESC;

-- To see size of DBs on disk:
\l+

-- Query for Locking
  SELECT pg_stat_activity.pid,
         pg_locks.transactionid,
         pg_locks.granted,
         substr(pg_stat_activity.query, 1, 30) AS query_snippet,
         age(now(), pg_stat_activity.query_start) AS age
    FROM pg_stat_activity,
         pg_locks
         LEFT JOIN pg_class ON pg_locks.relation = pg_class.oid
   WHERE pg_stat_activity.query != '<insufficient privilege>'
         AND pg_locks.pid = pg_stat_activity.pid
         AND pg_locks.mode = 'ExclusiveLock'
         AND pg_locks.transactionid IS NOT NULL
ORDER BY query_start;

-- Queries for Stats Statements
  SELECT total_time / 1000 / 60 AS total_minutes,
         total_time / calls AS average_time,
         query
    FROM pg_stat_statements
ORDER BY 1 DESC
   LIMIT 10;

-- Order by total_time from stats
  SELECT substring(query, 1, 50) AS short_query,
         round(total_time::NUMERIC, 2) AS total_time,
         calls,
         round(mean_time::NUMERIC, 2) AS mean,
         round(
          (100 * total_time / sum(total_time::NUMERIC) OVER ())::NUMERIC,
          2
         ) AS percentage_cpu,
         max_time,
         stddev_time
    FROM pg_stat_statements
ORDER BY total_time DESC
   LIMIT 20;

-- to test out the autovacuum and analyze scale factor changes
  SELECT relname,
         last_vacuum,
         last_autovacuum,
         last_analyze,
         last_autoanalyze,
         n_dead_tup AS deadrows
    FROM pg_stat_user_tables
ORDER BY n_dead_tup DESC
   LIMIT 10;

-- When was vacuum last run?
SELECT last_vacuum, vacuum_count FROM pg_stat_all_tables WHERE relname = 'mytable';

-- Just see what's going on in the DB at the moment:
SELECT pid, usename, application_name, client_addr, backend_start, query_start, wait_event_type, state
  FROM pg_stat_activity
 WHERE datname = 'your_db_name';

-- As above, showing the query:
SELECT pid, usename, application_name, client_addr, backend_start, query_start, wait_event_type, state, query
  FROM pg_stat_activity
 WHERE datname = 'your_db_name';

-- See what the value of a sequence is:
SELECT last_value FROM correlations_id_seq;
SELECT CURRVAL('correlations_id_seq');

-- Update a sequence
BEGIN;
-- protect against concurrent inserts while you update the counter
LOCK TABLE your_table IN EXCLUSIVE MODE;
-- Update the sequence
SELECT setval('your_table_id_seq', COALESCE((SELECT MAX(id)+1 FROM your_table), 1), false);
COMMIT;

-- count all tables
CREATE OR REPLACE FUNCTION
count_rows(schema TEXT, tablename TEXT) RETURNS INTEGER
AS
$body$
DECLARE
  result INTEGER;
  query VARCHAR;
BEGIN
  query := 'SELECT count(1) FROM ' || schema || '.' || tablename;
  EXECUTE query INTO result;
  RETURN result;
END;
$body$
LANGUAGE plpgsql;

  SELECT table_schema, table_name, count_rows(table_schema, table_name)
    FROM information_schema.tables
   WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
         AND table_type = 'BASE TABLE'
ORDER BY count_rows DESC;

-- Query Configuration
SELECT name, setting FROM pg_settings WHERE name IN ('max_replication_slots','wal_level','max_wal_senders');

-- Instance Uptime
SELECT current_timestamp - pg_postmaster_start_time() AS uptime;

-- Median
SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY field) AS median FROM table;