diff --git a/README.md b/README.md index 55bed1c..b6d9da0 100644 --- a/README.md +++ b/README.md @@ -30,3 +30,45 @@ After the file is loaded into the postgis database, you can connect to it locall You can update the database by re-running the osm2pgsql docker container, it will use `osm2pgsql-replication` to update the existing database. + +## Post-Import Hook + +The importer supports an optional post-processing hook that runs after successful import or update cycles. This is useful for creating statistics views, refreshing materialized views, or running custom SQL. + +### Environment Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `POST_IMPORT_SQL` | No | _(empty)_ | Path to SQL file executed after successful import/update | +| `POST_IMPORT_SCRIPT` | No | _(empty)_ | Path to shell script (takes precedence over SQL) | +| `POST_IMPORT_FAIL_HARD` | No | `true` | If `true`, post-import failure fails the entire import | + +### Execution Behavior + +- **Script precedence**: If `POST_IMPORT_SCRIPT` is set and exists, it runs instead of SQL +- **Skip silently**: If neither variable is set or files don't exist, continues without error +- **Connection reuse**: SQL execution inherits database connection from importer environment +- **Logging**: Output prefixed with `[post-import]` to distinguish from import logs + +### Example Configuration + +```yaml +services: + osm2pgsql: + environment: + # ... other vars ... + POST_IMPORT_SQL: /osm2pgsql-configs/post-import.sql + POST_IMPORT_FAIL_HARD: "true" + volumes: + - ./osm2pgsql-configs:/osm2pgsql-configs:ro +``` + +### Writing Idempotent SQL + +Post-import SQL should be idempotent (safe to run on both create and update cycles): + +```sql +-- Drop and recreate pattern (recommended for stats views) +DROP MATERIALIZED VIEW IF EXISTS my_stats; +CREATE MATERIALIZED VIEW my_stats AS SELECT ...; +``` diff --git a/docker-compose.yml b/docker-compose.yml index 17d016b..a900554 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,10 +14,19 @@ services: PG_ENV_POSTGRES_USER: docker PG_ENV_POSTGRES_PASSWORD: docker REGION: north-america/us/delaware + # Output mode: pgsql (default) or flex + OSM2PGSQL_MODE: flex + # Custom Lua config path (optional, only used when OSM2PGSQL_MODE=flex) + OSM2PGSQL_LUA_CONFIG: /osm2pgsql-configs/config.lua + # Performance tuning (auto-detected if not set) + # OSM2PGSQL_CACHE: 8000 # Cache size in MB (use 0 with flat-nodes) + # OSM2PGSQL_FLAT_NODES: /osm/data/flat-nodes.bin # Use flat nodes for PBF > 2GB + # OSM2PGSQL_PROCS: 8 # Parallel threads (auto: min of CPU cores, 8) restart: on-failure:2 volumes: # Specify local directory with .pbf file - ./data:/osm/data + - ./osm2pgsql:/osm2pgsql-configs postgres: image: postgis/postgis diff --git a/osm2pgsql/Dockerfile b/osm2pgsql/Dockerfile index 5f91727..e0bedc5 100644 --- a/osm2pgsql/Dockerfile +++ b/osm2pgsql/Dockerfile @@ -11,5 +11,7 @@ RUN apt-get clean \ && rm -rf /var/lib/apt/lists/* ADD ./osm-importer.sh /usr/local/bin/osm-importer.sh -ADD ./custom.style /user/local/bin/custom.style +ADD ./custom.style /usr/local/bin/custom.style +ADD ./flex-config.lua /usr/local/bin/flex-config.lua +ADD ./compatible.lua /usr/local/bin/compatible.lua WORKDIR /osm \ No newline at end of file diff --git a/osm2pgsql/compatible.lua b/osm2pgsql/compatible.lua new file mode 100644 index 0000000..0a0bec4 --- /dev/null +++ b/osm2pgsql/compatible.lua @@ -0,0 +1,772 @@ +-- Source: https://github.com/osm2pgsql-dev/osm2pgsql/blob/master/flex-config/compatible.lua +-- Commit: 02f0ed6d34adc34b4941347de789f7beec564649 (2023-07-30) +-- License: Public Domain +-- +-- This file is a copy of osm2pgsql's compatible.lua flex configuration. +-- It is included here because the Debian package does not ship flex configs. + +-- This config example file is released into the Public Domain. + +-- This configuration for the flex output tries to be compatible with the +-- original pgsql C transform output. There might be some corner cases but +-- it should do exactly the same in almost all cases. + +-- The output projection used (3857, web mercator is the default). Set this +-- to 4326 if you were using the -l|--latlong option or to the EPSG +-- code you were using on the -E|-proj option. +local srid = 3857 + +-- Set this to true if you were using option -K|--keep-coastlines. +local keep_coastlines = false + +-- Set this to the table name prefix (what used to be option -p|--prefix). +local prefix = 'planet_osm' + +-- Set this to true if multipolygons should be written as multipolygons into +-- db (what used to be option -G|--multi-geometry). +local multi_geometry = false + +-- Set this to true if you want an hstore column (what used to be option +-- -k|--hstore). Can not be true if "hstore_all" is true. +local hstore = false + +-- Set this to true if you want all tags in an hstore column (what used to +-- be option -j|--hstore-all). Can not be true if "hstore" is true. +local hstore_all = false + +-- Only keep objects that have a value in one of the non-hstore columns +-- (normal action with --hstore is to keep all objects). Equivalent to +-- what used to be set through option --hstore-match-only. +local hstore_match_only = false + +-- Set this to add an additional hstore (key/value) column containing all tags +-- that start with the specified string, eg "name:". Will produce an extra +-- hstore column that contains all "name:xx" tags. Equivalent to what used to +-- be set through option -z|--hstore-column. Unlike the -z option which can +-- be specified multiple time, this does only support a single additional +-- hstore column. +local hstore_column = nil + +-- If this is set, area calculations are always in Mercator coordinates units +-- irrespective of the srid setting. +-- Set this to true if you used --reproject-area before. +local reproject_area = false + +-- There is some very old specialized handling of route relations in osm2pgsql, +-- which you probably don't need. This is disabled here, but you can enable +-- it by setting this to true. If you don't understand this, leave it alone. +local enable_legacy_route_processing = false + +-- --------------------------------------------------------------------------- + +if hstore and hstore_all then + error("hstore and hstore_all can't be both true") +end + +-- Used for splitting up long linestrings +local max_length = 1 +if srid == 3857 then + max_length = 100000 +end + +-- Ways with any of the following keys will be treated as polygon +local polygon_keys = { + 'aeroway', + 'amenity', + 'building', + 'harbour', + 'historic', + 'landuse', + 'leisure', + 'man_made', + 'military', + 'natural', + 'office', + 'place', + 'power', + 'public_transport', + 'shop', + 'sport', + 'tourism', + 'water', + 'waterway', + 'wetland', + 'abandoned:aeroway', + 'abandoned:amenity', + 'abandoned:building', + 'abandoned:landuse', + 'abandoned:power', + 'area:highway' +} + +-- Objects without any of the following keys will be deleted +local generic_keys = { + 'access', + 'addr:housename', + 'addr:housenumber', + 'addr:interpolation', + 'admin_level', + 'aerialway', + 'aeroway', + 'amenity', + 'area', + 'barrier', + 'bicycle', + 'boundary', + 'brand', + 'bridge', + 'building', + 'capital', + 'construction', + 'covered', + 'culvert', + 'cutting', + 'denomination', + 'disused', + 'ele', + 'embankment', + 'foot', + 'generation:source', + 'harbour', + 'highway', + 'historic', + 'hours', + 'intermittent', + 'junction', + 'landuse', + 'layer', + 'leisure', + 'lock', + 'man_made', + 'military', + 'motorcar', + 'name', + 'natural', + 'office', + 'oneway', + 'operator', + 'place', + 'population', + 'power', + 'power_source', + 'public_transport', + 'railway', + 'ref', + 'religion', + 'route', + 'service', + 'shop', + 'sport', + 'surface', + 'toll', + 'tourism', + 'tower:type', + 'tracktype', + 'tunnel', + 'water', + 'waterway', + 'wetland', + 'width', + 'wood', + 'abandoned:aeroway', + 'abandoned:amenity', + 'abandoned:building', + 'abandoned:landuse', + 'abandoned:power', + 'area:highway' +} + +-- The following keys will be deleted +local delete_keys = { + 'attribution', + 'comment', + 'created_by', + 'fixme', + 'note', + 'note:*', + 'odbl', + 'odbl:note', + 'source', + 'source:*', + 'source_ref', + 'way', + 'way_area', + 'z_order', +} + +local point_columns = { + 'access', + 'addr:housename', + 'addr:housenumber', + 'addr:interpolation', + 'admin_level', + 'aerialway', + 'aeroway', + 'amenity', + 'area', + 'barrier', + 'bicycle', + 'brand', + 'bridge', + 'boundary', + 'building', + 'capital', + 'construction', + 'covered', + 'culvert', + 'cutting', + 'denomination', + 'disused', + 'ele', + 'embankment', + 'foot', + 'generator:source', + 'harbour', + 'highway', + 'historic', + 'horse', + 'intermittent', + 'junction', + 'landuse', + 'layer', + 'leisure', + 'lock', + 'man_made', + 'military', + 'motorcar', + 'name', + 'natural', + 'office', + 'oneway', + 'operator', + 'place', + 'population', + 'power', + 'power_source', + 'public_transport', + 'railway', + 'ref', + 'religion', + 'route', + 'service', + 'shop', + 'sport', + 'surface', + 'toll', + 'tourism', + 'tower:type', + 'tunnel', + 'water', + 'waterway', + 'wetland', + 'width', + 'wood', +} + +local non_point_columns = { + 'access', + 'addr:housename', + 'addr:housenumber', + 'addr:interpolation', + 'admin_level', + 'aerialway', + 'aeroway', + 'amenity', + 'area', + 'barrier', + 'bicycle', + 'brand', + 'bridge', + 'boundary', + 'building', + 'construction', + 'covered', + 'culvert', + 'cutting', + 'denomination', + 'disused', + 'embankment', + 'foot', + 'generator:source', + 'harbour', + 'highway', + 'historic', + 'horse', + 'intermittent', + 'junction', + 'landuse', + 'layer', + 'leisure', + 'lock', + 'man_made', + 'military', + 'motorcar', + 'name', + 'natural', + 'office', + 'oneway', + 'operator', + 'place', + 'population', + 'power', + 'power_source', + 'public_transport', + 'railway', + 'ref', + 'religion', + 'route', + 'service', + 'shop', + 'sport', + 'surface', + 'toll', + 'tourism', + 'tower:type', + 'tracktype', + 'tunnel', + 'water', + 'waterway', + 'wetland', + 'width', + 'wood', +} + +local function gen_columns(text_columns, with_hstore, area, geometry_type) + local columns = {} + + local add_column = function (name, type) + columns[#columns + 1] = { column = name, type = type } + end + + for _, c in ipairs(text_columns) do + add_column(c, 'text') + end + + add_column('z_order', 'int') + + if area then + add_column('way_area', 'real') + end + + if hstore_column then + add_column(hstore_column, 'hstore') + end + + if with_hstore then + add_column('tags', 'hstore') + end + + add_column('way', geometry_type) + columns[#columns].projection = srid + columns[#columns].not_null = true + + return columns +end + +local tables = {} + +tables.point = osm2pgsql.define_table{ + name = prefix .. '_point', + ids = { type = 'node', id_column = 'osm_id' }, + columns = gen_columns(point_columns, hstore or hstore_all, false, 'point') +} + +tables.line = osm2pgsql.define_table{ + name = prefix .. '_line', + ids = { type = 'way', id_column = 'osm_id' }, + columns = gen_columns(non_point_columns, hstore or hstore_all, true, 'linestring') +} + +tables.polygon = osm2pgsql.define_table{ + name = prefix .. '_polygon', + ids = { type = 'area', id_column = 'osm_id' }, + columns = gen_columns(non_point_columns, hstore or hstore_all, true, 'geometry') +} + +tables.roads = osm2pgsql.define_table{ + name = prefix .. '_roads', + ids = { type = 'way', id_column = 'osm_id' }, + columns = gen_columns(non_point_columns, hstore or hstore_all, true, 'linestring') +} + +local z_order_lookup = { + proposed = {1, false}, + construction = {2, false}, + steps = {10, false}, + cycleway = {10, false}, + bridleway = {10, false}, + footway = {10, false}, + path = {10, false}, + track = {11, false}, + service = {15, false}, + + tertiary_link = {24, false}, + secondary_link = {25, true}, + primary_link = {27, true}, + trunk_link = {28, true}, + motorway_link = {29, true}, + + raceway = {30, false}, + pedestrian = {31, false}, + living_street = {32, false}, + road = {33, false}, + unclassified = {33, false}, + residential = {33, false}, + tertiary = {34, false}, + secondary = {36, true}, + primary = {37, true}, + trunk = {38, true}, + motorway = {39, true} +} + +local function as_bool(value) + return value == 'yes' or value == 'true' or value == '1' +end + +local function get_z_order(tags) + local z_order = 100 * math.floor(tonumber(tags.layer or '0') or 0) + local roads = false + + local highway = tags['highway'] + if highway then + local r = z_order_lookup[highway] or {0, false} + z_order = z_order + r[1] + roads = r[2] + end + + if tags.railway then + z_order = z_order + 35 + roads = true + end + + if tags.boundary and tags.boundary == 'administrative' then + roads = true + end + + if as_bool(tags.bridge) then + z_order = z_order + 100 + end + + if as_bool(tags.tunnel) then + z_order = z_order - 100 + end + + return z_order, roads +end + +local function make_check_in_list_func(list) + local h = {} + for _, k in ipairs(list) do + h[k] = true + end + return function(tags) + for k, _ in pairs(tags) do + if h[k] then + return true + end + end + return false + end +end + +local is_polygon = make_check_in_list_func(polygon_keys) +local clean_tags = osm2pgsql.make_clean_tags_func(delete_keys) + +local function make_column_hash(columns) + local h = {} + + for _, k in ipairs(columns) do + h[k] = true + end + + return h +end + +local function make_get_output(columns) + local h = make_column_hash(columns) + if hstore_all then + return function(tags) + local output = {} + local hstore_entries = {} + + for k, _ in pairs(tags) do + if h[k] then + output[k] = tags[k] + end + hstore_entries[k] = tags[k] + end + + return output, hstore_entries + end + else + return function(tags) + local output = {} + local hstore_entries = {} + + for k, _ in pairs(tags) do + if h[k] then + output[k] = tags[k] + else + hstore_entries[k] = tags[k] + end + end + + return output, hstore_entries + end + end +end + +local has_generic_tag = make_check_in_list_func(generic_keys) + +local get_point_output = make_get_output(point_columns) +local get_non_point_output = make_get_output(non_point_columns) + +local function get_hstore_column(tags) + local len = #hstore_column + local h = {} + for k, v in pairs(tags) do + if k:sub(1, len) == hstore_column then + h[k:sub(len + 1)] = v + end + end + + if next(h) then + return h + end + return nil +end + +function osm2pgsql.process_node(object) + if clean_tags(object.tags) then + return + end + + local output + local output_hstore = {} + if hstore or hstore_all then + output, output_hstore = get_point_output(object.tags) + if not next(output) and not next(output_hstore) then + return + end + if hstore_match_only and not has_generic_tag(object.tags) then + return + end + else + output = object.tags + if not has_generic_tag(object.tags) then + return + end + end + + output.tags = output_hstore + + if hstore_column then + output[hstore_column] = get_hstore_column(object.tags) + end + + output.way = object:as_point() + tables.point:insert(output) +end + +local function add_line(output, geom, roads) + for sgeom in geom:segmentize(max_length):geometries() do + output.way = sgeom + tables.line:insert(output) + if roads then + tables.roads:insert(output) + end + end +end + +local function compute_geom_and_area(geom) + local area, projected_geom + + projected_geom = geom:transform(srid) + + if reproject_area and srid ~= 3857 then + area = geom:transform(3857):area() + else + area = projected_geom:area() + end + + return projected_geom, area +end + +function osm2pgsql.process_way(object) + if clean_tags(object.tags) then + return + end + + local add_area = false + if object.tags.natural == 'coastline' then + add_area = true + if not keep_coastlines then + object.tags.natural = nil + end + end + + local output + local output_hstore = {} + if hstore or hstore_all then + output, output_hstore = get_non_point_output(object.tags) + if not next(output) and not next(output_hstore) then + return + end + if hstore_match_only and not has_generic_tag(object.tags) then + return + end + if add_area and hstore_all then + output_hstore.area = 'yes' + end + else + output = object.tags + if not has_generic_tag(object.tags) then + return + end + end + + local polygon + local area_tag = object.tags.area + if area_tag == 'yes' or area_tag == '1' or area_tag == 'true' then + polygon = true + elseif area_tag == 'no' or area_tag == '0' or area_tag == 'false' then + polygon = false + else + polygon = is_polygon(object.tags) + end + + if add_area then + output.area = 'yes' + polygon = true + end + + local z_order, roads = get_z_order(object.tags) + output.z_order = z_order + + output.tags = output_hstore + + if hstore_column then + output[hstore_column] = get_hstore_column(object.tags) + end + + if polygon and object.is_closed then + local pgeom, area = compute_geom_and_area(object:as_polygon()) + output.way = pgeom + output.way_area = area + tables.polygon:insert(output) + else + add_line(output, object:as_linestring(), roads) + end +end + +function osm2pgsql.process_relation(object) + if clean_tags(object.tags) then + return + end + + local rtype = object:grab_tag('type') + if (rtype ~= 'route') and (rtype ~= 'multipolygon') and (rtype ~= 'boundary') then + return + end + + local output + local output_hstore = {} + if hstore or hstore_all then + output, output_hstore = get_non_point_output(object.tags) + if not next(output) and not next(output_hstore) then + return + end + if hstore_match_only and not has_generic_tag(object.tags) then + return + end + else + output = object.tags + if not has_generic_tag(object.tags) then + return + end + end + + if not next(output) and not next(output_hstore) then + return + end + + if enable_legacy_route_processing and (hstore or hstore_all) and rtype == 'route' then + if not object.tags.route_name then + output_hstore.route_name = object.tags.name + end + + local state = object.tags.state + if state ~= 'alternate' and state ~= 'connection' then + state = 'yes' + end + + local network = object.tags.network + if network == 'lcn' then + output_hstore.lcn = output_hstore.lcn or state + output_hstore.lcn_ref = output_hstore.lcn_ref or object.tags.ref + elseif network == 'rcn' then + output_hstore.rcn = output_hstore.rcn or state + output_hstore.rcn_ref = output_hstore.rcn_ref or object.tags.ref + elseif network == 'ncn' then + output_hstore.ncn = output_hstore.ncn or state + output_hstore.ncn_ref = output_hstore.ncn_ref or object.tags.ref + elseif network == 'lwn' then + output_hstore.lwn = output_hstore.lwn or state + output_hstore.lwn_ref = output_hstore.lwn_ref or object.tags.ref + elseif network == 'rwn' then + output_hstore.rwn = output_hstore.rwn or state + output_hstore.rwn_ref = output_hstore.rwn_ref or object.tags.ref + elseif network == 'nwn' then + output_hstore.nwn = output_hstore.nwn or state + output_hstore.nwn_ref = output_hstore.nwn_ref or object.tags.ref + end + + local pc = object.tags.preferred_color + if pc == '0' or pc == '1' or pc == '2' or pc == '3' or pc == '4' then + output_hstore.route_pref_color = pc + else + output_hstore.route_pref_color = '0' + end + end + + local make_boundary = false + local make_polygon = false + if rtype == 'boundary' then + make_boundary = true + elseif rtype == 'multipolygon' and object.tags.boundary then + make_boundary = true + elseif rtype == 'multipolygon' then + make_polygon = true + end + + local z_order, roads = get_z_order(object.tags) + output.z_order = z_order + + output.tags = output_hstore + + if hstore_column then + output[hstore_column] = get_hstore_column(object.tags) + end + + if not make_polygon then + add_line(output, object:as_multilinestring(), roads) + end + + if make_boundary or make_polygon then + local geom = object:as_multipolygon() + + if multi_geometry then + local pgeom, area = compute_geom_and_area(geom) + output.way = pgeom + output.way_area = area + tables.polygon:insert(output) + else + for sgeom in geom:geometries() do + local pgeom, area = compute_geom_and_area(sgeom) + output.way = pgeom + output.way_area = area + tables.polygon:insert(output) + end + end + end +end diff --git a/osm2pgsql/flex-config.lua b/osm2pgsql/flex-config.lua new file mode 100644 index 0000000..e5a0108 --- /dev/null +++ b/osm2pgsql/flex-config.lua @@ -0,0 +1,40 @@ +-- Flex configuration that wraps osm2pgsql's bundled compatible.lua +-- +-- This file loads the standard compatible.lua from osm2pgsql's installation. +-- Custom tags (wikidata, wikipedia, etc.) are stored in the hstore column. +-- +-- Tables created: planet_osm_point, planet_osm_line, planet_osm_polygon, planet_osm_roads + +-- Configuration variables (compatible.lua reads these as globals) +-- See: https://github.com/osm2pgsql-dev/osm2pgsql/blob/master/flex-config/compatible.lua + +-- Enable hstore column for additional tags (equivalent to -k flag) +hstore = true +hstore_all = false +hstore_match_only = false + +-- Table name prefix +prefix = 'planet_osm' + +-- Output projection (3857 = Web Mercator, 4326 = WGS84) +srid = 3857 + +-- Keep coastlines (equivalent to -K flag) +keep_coastlines = false + +-- Write multipolygons as multipolygons (equivalent to -G flag) +multi_geometry = false + +-- Path to compatible.lua (bundled in this image) +local compatible_path = '/usr/local/bin/compatible.lua' + +-- Load compatible.lua +local f = io.open(compatible_path, 'r') +if f then + f:close() + dofile(compatible_path) + print('osm2pgsql flex: Loaded ' .. compatible_path) +else + error('Could not find compatible.lua at: ' .. compatible_path .. + '\nEnsure osm2pgsql flex-config files are installed.') +end diff --git a/osm2pgsql/osm-importer.sh b/osm2pgsql/osm-importer.sh index bc95f10..f7e25fa 100755 --- a/osm2pgsql/osm-importer.sh +++ b/osm2pgsql/osm-importer.sh @@ -7,16 +7,181 @@ echo DATADIR=${DATADIR:="/osm/data"} echo PBF=${PBF:=$DATADIR/$(echo $REGION | grep -o '[^/]*$')-latest.osm.pbf} HOST=download.geofabrik.de -if psql --no-password -h $PG_PORT_5432_TCP_ADDR -U $PG_ENV_POSTGRES_USER $PG_ENV_POSTGRES_DB -c "select * from osm2pgsql_properties;"; then +# Output mode: pgsql (default) or flex +OSM2PGSQL_MODE=${OSM2PGSQL_MODE:-pgsql} +echo "OSM2PGSQL_MODE=$OSM2PGSQL_MODE" + +# Post-import configuration +# POST_IMPORT_SQL: Path to SQL file to run after import (optional) +# POST_IMPORT_SCRIPT: Path to shell script to run after import (optional, takes precedence) +# POST_IMPORT_FAIL_HARD: If "true" (default), post-import failure fails the import +POST_IMPORT_FAIL_HARD=${POST_IMPORT_FAIL_HARD:-true} + +# Post-import runner function +# Runs after successful import or update cycle +run_post_import() { + echo "" + echo "========================================" + echo "[post-import] Starting post-import phase" + echo "========================================" + + # Check for script first (takes precedence) + if [[ -n "$POST_IMPORT_SCRIPT" ]]; then + if [[ -f "$POST_IMPORT_SCRIPT" ]]; then + echo "[post-import] Running script: $POST_IMPORT_SCRIPT" + bash "$POST_IMPORT_SCRIPT" + local exit_code=$? + if [[ $exit_code -ne 0 ]]; then + echo "[post-import] ERROR: Script exited with code $exit_code" + if [[ "$POST_IMPORT_FAIL_HARD" == "true" ]]; then + return $exit_code + fi + echo "[post-import] Continuing despite error (POST_IMPORT_FAIL_HARD=false)" + else + echo "[post-import] Script completed successfully" + fi + return 0 + else + echo "[post-import] WARNING: Script not found: $POST_IMPORT_SCRIPT" + if [[ "$POST_IMPORT_FAIL_HARD" == "true" ]]; then + return 1 + fi + fi + fi + + # Check for SQL file + if [[ -n "$POST_IMPORT_SQL" ]]; then + if [[ -f "$POST_IMPORT_SQL" ]]; then + echo "[post-import] Running SQL: $POST_IMPORT_SQL" + psql --no-password \ + -h "$PG_PORT_5432_TCP_ADDR" \ + -p "${PG_PORT_5432_TCP_PORT:-5432}" \ + -U "$PG_ENV_POSTGRES_USER" \ + -d "$PG_ENV_POSTGRES_DB" \ + -f "$POST_IMPORT_SQL" + local exit_code=$? + if [[ $exit_code -ne 0 ]]; then + echo "[post-import] ERROR: SQL exited with code $exit_code" + if [[ "$POST_IMPORT_FAIL_HARD" == "true" ]]; then + return $exit_code + fi + echo "[post-import] Continuing despite error (POST_IMPORT_FAIL_HARD=false)" + else + echo "[post-import] SQL completed successfully" + fi + return 0 + else + echo "[post-import] WARNING: SQL file not found: $POST_IMPORT_SQL" + if [[ "$POST_IMPORT_FAIL_HARD" == "true" ]]; then + return 1 + fi + fi + fi + + # Neither configured - skip silently + if [[ -z "$POST_IMPORT_SCRIPT" && -z "$POST_IMPORT_SQL" ]]; then + echo "[post-import] No post-import configured, skipping" + fi + + return 0 +} + +# Dynamic memory/performance detection +# Override with env vars: OSM2PGSQL_CACHE, OSM2PGSQL_FLAT_NODES, OSM2PGSQL_PROCS +detect_memory_settings() { + # Get available RAM in MB + local AVAILABLE_RAM_MB=$(free -m | awk '/^Mem:/{print $7}') + local TOTAL_RAM_MB=$(free -m | awk '/^Mem:/{print $2}') + + # Get PBF file size in MB (0 if file doesn't exist yet) + local PBF_SIZE_MB=0 + if [[ -f "$PBF" ]]; then + PBF_SIZE_MB=$(($(stat -c%s "$PBF") / 1024 / 1024)) + fi + + echo "Detected: Total RAM=${TOTAL_RAM_MB}MB, Available RAM=${AVAILABLE_RAM_MB}MB, PBF size=${PBF_SIZE_MB}MB" + + # Auto-detect number of processes (cap at 8, or CPU cores) if not set + if [[ -z "$OSM2PGSQL_PROCS" ]]; then + local CPU_CORES=$(nproc) + OSM2PGSQL_PROCS=$(( CPU_CORES > 8 ? 8 : CPU_CORES )) + fi + + # If flat nodes path explicitly set, use it + if [[ -n "$OSM2PGSQL_FLAT_NODES" ]]; then + OSM2PGSQL_CACHE=${OSM2PGSQL_CACHE:-0} + echo "Auto-config: Using flat nodes at $OSM2PGSQL_FLAT_NODES, cache=${OSM2PGSQL_CACHE}MB" + return + fi + + # Decision: Use flat nodes for large imports (PBF > 2GB) + if [[ $PBF_SIZE_MB -gt 2000 ]]; then + # Large import: use flat nodes, no cache needed + OSM2PGSQL_FLAT_NODES="$DATADIR/flat-nodes.bin" + OSM2PGSQL_CACHE=${OSM2PGSQL_CACHE:-0} + echo "Auto-config: Using flat nodes (PBF > 2GB), cache=${OSM2PGSQL_CACHE}MB" + else + # Small import: use cache, no flat nodes + OSM2PGSQL_FLAT_NODES="" + # Cache = min(PBF size, 75% of available RAM, leaving 4GB for PostgreSQL) + if [[ -z "$OSM2PGSQL_CACHE" ]]; then + local MAX_CACHE=$(( (AVAILABLE_RAM_MB - 4000) * 75 / 100 )) + [[ $MAX_CACHE -lt 0 ]] && MAX_CACHE=800 + if [[ $PBF_SIZE_MB -gt 0 && $PBF_SIZE_MB -lt $MAX_CACHE ]]; then + OSM2PGSQL_CACHE=$PBF_SIZE_MB + else + OSM2PGSQL_CACHE=$MAX_CACHE + fi + fi + echo "Auto-config: No flat nodes (small import), cache=${OSM2PGSQL_CACHE}MB" + fi +} + +# Run detection +detect_memory_settings + +echo "Final config: Cache=${OSM2PGSQL_CACHE}MB, Flat nodes=${OSM2PGSQL_FLAT_NODES:-disabled}, Processes=$OSM2PGSQL_PROCS" + +# Build flat-nodes arg only if set +if [[ -n "$OSM2PGSQL_FLAT_NODES" ]]; then + FLAT_NODES_ARG="--flat-nodes $OSM2PGSQL_FLAT_NODES" +else + FLAT_NODES_ARG="" +fi + +if [[ "$OSM2PGSQL_MODE" == "flex" ]]; then + LUA_CONFIG=${OSM2PGSQL_LUA_CONFIG:-/usr/local/bin/flex-config.lua} + echo "Using Flex output with config: $LUA_CONFIG" + STYLE_ARGS="--output flex --style $LUA_CONFIG" + # Flex mode: -k is handled in Lua config, no --extra-attributes + IMPORT_ARGS="-v --create --slim --cache $OSM2PGSQL_CACHE $FLAT_NODES_ARG --number-processes $OSM2PGSQL_PROCS" + UPDATE_ARGS="-v" +else + echo "Using pgsql output with custom.style" + STYLE_ARGS="--style /usr/local/bin/custom.style" + IMPORT_ARGS="-v -k --create --slim --cache $OSM2PGSQL_CACHE $FLAT_NODES_ARG --number-processes $OSM2PGSQL_PROCS --extra-attributes" + UPDATE_ARGS="-v -k --extra-attributes" +fi + +if psql --no-password -h $PG_PORT_5432_TCP_ADDR -U $PG_ENV_POSTGRES_USER $PG_ENV_POSTGRES_DB -c "select * from osm2pgsql_properties;" 2>/dev/null; then echo "Updating." osm2pgsql-replication update \ - -v \ --host $PG_PORT_5432_TCP_ADDR \ --database $PG_ENV_POSTGRES_DB \ --username $PG_ENV_POSTGRES_USER \ - -- -k --style /user/local/bin/custom.style --extra-attributes + -- $UPDATE_ARGS $STYLE_ARGS + UPDATE_EXIT_CODE=$? + + if [[ $UPDATE_EXIT_CODE -eq 0 ]]; then + echo "Update completed successfully." + run_post_import + exit $? + else + echo "ERROR: Update failed with exit code $UPDATE_EXIT_CODE" + exit $UPDATE_EXIT_CODE + fi else - echo "Database not ready, need to intialize." + echo "Database not ready, need to initialize." if [[ -f "$PBF" ]]; then echo "Using local file at $PBF" else @@ -27,24 +192,52 @@ else psql --no-password \ -h $PG_PORT_5432_TCP_ADDR -p $PG_PORT_5432_TCP_PORT \ -U $PG_ENV_POSTGRES_USER $PG_ENV_POSTGRES_DB \ - -c "CREATE EXTENSION hstore" - - osm2pgsql -v \ - -k \ - --create \ - --slim \ - --cache 4000 \ - --extra-attributes \ - --style /user/local/bin/custom.style \ + -c "CREATE EXTENSION IF NOT EXISTS hstore" + + osm2pgsql $IMPORT_ARGS \ + $STYLE_ARGS \ --host $PG_PORT_5432_TCP_ADDR \ --database $PG_ENV_POSTGRES_DB \ --user $PG_ENV_POSTGRES_USER \ --port $PG_PORT_5432_TCP_PORT \ $PBF + IMPORT_EXIT_CODE=$? + + if [[ $IMPORT_EXIT_CODE -ne 0 ]]; then + echo "ERROR: Import failed with exit code $IMPORT_EXIT_CODE" + exit $IMPORT_EXIT_CODE + fi + + echo "Import completed successfully." + + # Run post-import hook after successful import (before replication init) + run_post_import + POST_IMPORT_EXIT_CODE=$? + if [[ $POST_IMPORT_EXIT_CODE -ne 0 ]]; then + exit $POST_IMPORT_EXIT_CODE + fi + + # Use --server if REPLICATION_SERVER is set, otherwise use --osm-file to extract URL from PBF + if [[ -n "${REPLICATION_SERVER:-}" ]]; then + REPL_SOURCE="--server $REPLICATION_SERVER" + else + REPL_SOURCE="--osm-file $PBF" + fi osm2pgsql-replication init \ --host $PG_PORT_5432_TCP_ADDR \ --database $PG_ENV_POSTGRES_DB \ --user $PG_ENV_POSTGRES_USER \ - --port $PG_PORT_5432_TCP_PORT --osm-file $PBF + --port $PG_PORT_5432_TCP_PORT \ + $REPL_SOURCE + REPL_INIT_EXIT_CODE=$? + + if [[ $REPL_INIT_EXIT_CODE -ne 0 ]]; then + echo "WARNING: Replication init failed with exit code $REPL_INIT_EXIT_CODE" + echo "Import and post-processing completed, but replication is not configured." + exit $REPL_INIT_EXIT_CODE + fi + + echo "Import, post-processing, and replication init completed successfully." + exit 0 fi \ No newline at end of file