-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_sample_data.sh
More file actions
executable file
·59 lines (50 loc) · 2.03 KB
/
load_sample_data.sh
File metadata and controls
executable file
·59 lines (50 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
###############################################################################
# Load Sample Data into Iceberg
#
# This script loads sample business data into Iceberg tables for lab exercises.
#
# Usage:
# ./scripts/load_sample_data.sh
###############################################################################
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m'
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE}Loading Sample Data into Iceberg${NC}"
echo -e "${BLUE}========================================${NC}"
# Check if Spark is installed
if ! command -v spark-shell &> /dev/null; then
echo "Error: spark-shell not found. Please install Apache Spark."
exit 1
fi
# Check if sample data exists
if [ ! -d "$PROJECT_DIR/data/sample" ]; then
echo "Sample data not found. Generating sample data first..."
python3 "$SCRIPT_DIR/generate_sample_data.py"
fi
# Load data into Iceberg
echo -e "\n${BLUE}Loading sample data into Iceberg tables...${NC}"
spark-shell \
--packages org.apache.iceberg:iceberg-spark-runtime-3.5:1.5.0 \
--conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.iceberg.type=rest \
--conf spark.sql.catalog.iceberg.uri=http://localhost:8181/api/catalog \
--conf spark.hadoop.fs.s3a.endpoint=http://localhost:9000 \
--conf spark.hadoop.fs.s3a.access.key=minioadmin \
--conf spark.hadoop.fs.s3a.secret.key=minioadmin \
--conf spark.hadoop.fs.s3a.path.style.access=true \
-i "$SCRIPT_DIR/load_sample_data.scala"
echo -e "\n${GREEN}========================================${NC}"
echo -e "${GREEN}Sample data loading complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo -e "\nSample tables are now available:"
echo " - iceberg.sample_customers"
echo " - iceberg.sample_products"
echo " - iceberg.sample_orders"
echo " - iceberg.sample_transactions"
echo " - iceberg.sample_events"