From 97083acfd0f703182c5b5bd9f08782666f07ad04 Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Mon, 12 May 2025 08:42:17 -0700 Subject: [PATCH 1/3] wip --- config.yaml | 5 +++- data/README.md | 70 +++++++++++++++++++++++++++++++++++++++++++ data/categories.json | 34 +++++++++++++++++++++ data/products.json | 41 +++++++++++++++++++++++++ data/sample-data.yaml | 63 ++++++++++++++++++++++++++++++++++++++ data/users.json | 23 ++++++++++++++ schema.graphql | 33 ++++++++++++++++++-- 7 files changed, 266 insertions(+), 3 deletions(-) create mode 100644 data/README.md create mode 100644 data/categories.json create mode 100644 data/products.json create mode 100644 data/sample-data.yaml create mode 100644 data/users.json diff --git a/config.yaml b/config.yaml index 04bebfd..29d728c 100644 --- a/config.yaml +++ b/config.yaml @@ -12,4 +12,7 @@ static: # This allows static files to be directly accessible root: web files: web/** roles: # This can define the roles that are used in the application - files: roles.yaml \ No newline at end of file + files: roles.yaml +dataLoader: # This loads data into user tables from YAML or JSON files + files: data/*.{json,yaml,yml} + # forceLoad: true # Uncomment to force loading data even if tables already have records \ No newline at end of file diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..bf27f10 --- /dev/null +++ b/data/README.md @@ -0,0 +1,70 @@ +# HarperDB Data Loader + +This directory contains data files that are automatically loaded into your HarperDB database tables when your application starts. The Data Loader component reads YAML or JSON files and populates your database tables with the specified data. + +## How It Works + +1. Data files must be placed in this directory with `.yaml`, `.yml`, or `.json` extensions +2. Data files are processed when HarperDB starts +3. Records are loaded based on timestamp comparison: + - New records are added + - Existing records are updated if the new data has a newer `__updatedtime__` + - Existing records are kept if they have a newer `__updatedtime__` than the data file + +## File Format + +### YAML Example + +```yaml +# Format: { database, table, records[] } +database: dev +table: products +records: + - id: 1 + name: "Laptop" + price: 999.99 + __createdtime__: 1682752800000 + __updatedtime__: 1682752800000 + - id: 2 + name: "Smartphone" + price: 699.99 + __createdtime__: 1682752801000 + __updatedtime__: 1682752801000 +``` + +### JSON Example + +```json +{ + "database": "dev", + "table": "products", + "records": [ + { + "id": 1, + "name": "Laptop", + "price": 999.99, + "__createdtime__": 1682752800000, + "__updatedtime__": 1682752800000 + }, + { + "id": 2, + "name": "Smartphone", + "price": 699.99, + "__createdtime__": 1682752801000, + "__updatedtime__": 1682752801000 + } + ] +} +``` + +## Important Notes + +- Tables are automatically created if they don't exist +- Primary keys are preserved from your data +- Use `__updatedtime__` and `__createdtime__` timestamps to control updates + - Records with newer `__updatedtime__` values will overwrite older ones + - Records with older `__updatedtime__` values will be skipped (not overwritten) + - If no timestamp is provided, the current time is used as `__updatedtime__` +- One table per file: Each file should have one database/table combination +- You can have multiple data files, and they'll all be processed +- The `database` field is optional; if not provided, the default database is used \ No newline at end of file diff --git a/data/categories.json b/data/categories.json new file mode 100644 index 0000000..5e8b697 --- /dev/null +++ b/data/categories.json @@ -0,0 +1,34 @@ +{ + "table": "Category", + "records": [ + { + "id": "electronics", + "name": "Electronics", + "description": "Electronic devices and gadgets", + "__createdtime__": 1682752803000, + "__updatedtime__": 1682752803000 + }, + { + "id": "furniture", + "name": "Furniture", + "description": "Home and office furniture", + "__createdtime__": 1682752804000, + "__updatedtime__": 1682752804000 + }, + { + "id": "clothing", + "name": "Clothing", + "description": "Apparel and accessories", + "__createdtime__": 1682752805000, + "__updatedtime__": 1682752805000 + }, + { + "id": "pants", + "name": "Pants", + "description": "Apparel and accessories", + "parent": "clothing", + "__createdtime__": 1682752805000, + "__updatedtime__": 1682752805000 + } + ] +} \ No newline at end of file diff --git a/data/products.json b/data/products.json new file mode 100644 index 0000000..3df948b --- /dev/null +++ b/data/products.json @@ -0,0 +1,41 @@ +{ + "table": "Product", + "records": [ + { + "id": 1, + "name": "Laptop", + "price": 999.99, + "category": "electronics", + "inStock": true, + "__createdtime__": 1682752800000, + "__updatedtime__": 1682752800000 + }, + { + "id": 2, + "name": "Smartphone", + "price": 699.99, + "category": "electronics", + "inStock": false, + "__createdtime__": 1682752801000, + "__updatedtime__": 1682752801000 + }, + { + "id": 3, + "name": "Desk Chair", + "price": 199.99, + "category": "furniture", + "inStock": true, + "__createdtime__": 1682752802000, + "__updatedtime__": 1682752802000 + }, + { + "id": 4, + "name": "Jeans", + "price": 98.99, + "category": "clothing", + "inStock": true, + "__createdtime__": 1682752802000, + "__updatedtime__": 1682752802000 + } + ] +} \ No newline at end of file diff --git a/data/sample-data.yaml b/data/sample-data.yaml new file mode 100644 index 0000000..5a8f03c --- /dev/null +++ b/data/sample-data.yaml @@ -0,0 +1,63 @@ +# This is a sample YAML data file for HarperDB Data Loader +# Format: { database, table, records[] } +# +# This example shows inserting tables and records into a +# specific database, instead of the default 'data' database. +# +# Example: Products for an e-commerce application +# ```yaml +# database: dev +# table: products +# records: +# - id: 1 +# name: "Laptop" +# price: 999.99 +# category: "Electronics" +# inStock: true +# __createdtime__: 1682752800000 +# __updatedtime__: 1682752800000 +# - id: 2 +# name: "Smartphone" +# price: 699.99 +# category: "Electronics" +# inStock: false +# __createdtime__: 1682752801000 +# __updatedtime__: 1682752801000 +# - id: 3 +# name: "Desk Chair" +# price: 199.99 +# category: "Furniture" +# inStock: true +# __createdtime__: 1682752802000 +# __updatedtime__: 1682752802000 +# ``` +# +# If you need to load data into multiple tables, use separate files +# for each table. For example: +# +# categories.yaml: +# ```yaml +# database: dev +# table: categories +# records: +# - id: "electronics" +# name: "Electronics" +# description: "Electronic devices and gadgets" +# __createdtime__: 1682752803000 +# __updatedtime__: 1682752803000 +# ``` +# +# users.yaml: +# ```yaml +# database: dev +# table: users +# records: +# - id: 1 +# username: "john_doe" +# email: "john@example.com" +# firstName: "John" +# lastName: "Doe" +# active: true +# __createdtime__: 1682752806000 +# __updatedtime__: 1682752806000 +# ``` \ No newline at end of file diff --git a/data/users.json b/data/users.json new file mode 100644 index 0000000..6d83a97 --- /dev/null +++ b/data/users.json @@ -0,0 +1,23 @@ +{ + "table": "User", + "records": [ + { + "id": 1, + "username": "john_doe", + "email": "john@example.com", + "firstName": "John", + "lastName": "Doe", + "__createdtime__": 1682752806000, + "__updatedtime__": 1682752806000 + }, + { + "id": 2, + "username": "jane_smith", + "email": "jane@example.com", + "firstName": "Jane", + "lastName": "Smith", + "__createdtime__": 1682752807000, + "__updatedtime__": 1682752807000 + } + ] +} \ No newline at end of file diff --git a/schema.graphql b/schema.graphql index 5ad5d48..0b59218 100644 --- a/schema.graphql +++ b/schema.graphql @@ -1,7 +1,36 @@ -## Here we can define any tables in our database. This example shows how we define a type as a table using -## the type name as the table name and specifying it is an "export" available in the REST and other external protocols. +## This schema defines the data model for our application. +## Each type with the @table directive becomes a table in the database. +## The @export directive makes it available in the REST API and other external protocols. + +## Example table for reference type TableName @table @export { id: ID @primaryKey # Here we define primary key (must be one) name: String # we can define any other attributes here tag: String @indexed # we can specify any attributes that should be indexed } + +## Example tables for data loader example +type Product @table @export { + id: ID @primaryKey + name: String @indexed + price: Float + category: Category @relationship(from: categoryId) + inStock: Boolean +} + +type Category @table @export { + id: ID @primaryKey + name: String @indexed + description: String + parent: Category @relationship(from: categoryId) + products: [Product] @relationship(to: categoryId) + children: [Category] @relationship(to: categoryId) +} + +type User @table @export { + id: ID @primaryKey + username: String @indexed + email: String @indexed + firstName: String + lastName: String +} \ No newline at end of file From 9a801547e0b524bef98babc942bd4014afd195c3 Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Mon, 12 May 2025 22:05:21 -0700 Subject: [PATCH 2/3] more example data for the dataLoader --- data/products.json | 16 ++++++++++------ data/users-myco.json | 24 ++++++++++++++++++++++++ data/users.json | 4 ++-- schema.graphql | 9 +++++++++ 4 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 data/users-myco.json diff --git a/data/products.json b/data/products.json index 3df948b..45b2eee 100644 --- a/data/products.json +++ b/data/products.json @@ -2,7 +2,7 @@ "table": "Product", "records": [ { - "id": 1, + "id": "1", "name": "Laptop", "price": 999.99, "category": "electronics", @@ -11,7 +11,7 @@ "__updatedtime__": 1682752800000 }, { - "id": 2, + "id": "2", "name": "Smartphone", "price": 699.99, "category": "electronics", @@ -20,19 +20,23 @@ "__updatedtime__": 1682752801000 }, { - "id": 3, + "id": "3", "name": "Desk Chair", "price": 199.99, "category": "furniture", + "details": { + "weight": 1.5, + "color": "black" + }, "inStock": true, "__createdtime__": 1682752802000, - "__updatedtime__": 1682752802000 + "__updatedtime__": 1682752803000 }, { - "id": 4, + "id": "4", "name": "Jeans", "price": 98.99, - "category": "clothing", + "category": "pants", "inStock": true, "__createdtime__": 1682752802000, "__updatedtime__": 1682752802000 diff --git a/data/users-myco.json b/data/users-myco.json new file mode 100644 index 0000000..2d01cfe --- /dev/null +++ b/data/users-myco.json @@ -0,0 +1,24 @@ +{ + "database": "myco", + "table": "User", + "records": [ + { + "id": "1", + "username": "jane_doe", + "email": "jane@example.com", + "firstName": "Jane", + "lastName": "Doe", + "__createdtime__": 1682752806000, + "__updatedtime__": 1682752806000 + }, + { + "id": "2", + "username": "Jon_smith", + "email": "jon@example.com", + "firstName": "Jon", + "lastName": "Smith", + "__createdtime__": 1682752807000, + "__updatedtime__": 1682752807000 + } + ] +} \ No newline at end of file diff --git a/data/users.json b/data/users.json index 6d83a97..c13dbae 100644 --- a/data/users.json +++ b/data/users.json @@ -2,7 +2,7 @@ "table": "User", "records": [ { - "id": 1, + "id": "1", "username": "john_doe", "email": "john@example.com", "firstName": "John", @@ -11,7 +11,7 @@ "__updatedtime__": 1682752806000 }, { - "id": 2, + "id": "2", "username": "jane_smith", "email": "jane@example.com", "firstName": "Jane", diff --git a/schema.graphql b/schema.graphql index 0b59218..fc63f65 100644 --- a/schema.graphql +++ b/schema.graphql @@ -15,6 +15,7 @@ type Product @table @export { name: String @indexed price: Float category: Category @relationship(from: categoryId) + details: Any inStock: Boolean } @@ -33,4 +34,12 @@ type User @table @export { email: String @indexed firstName: String lastName: String +} + +type MycoUser @table(database: "myco", table: "User") @export { + id: ID @primaryKey + username: String @indexed + email: String @indexed + firstName: String + lastName: String } \ No newline at end of file From 4dbaa432429edd38a2fa603c2537e1a9b0ecde18 Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Thu, 9 Oct 2025 07:03:21 -0700 Subject: [PATCH 3/3] updating readme and examples --- data/README.md | 103 ++++++++++++++++++++++--------------------- data/categories.json | 16 ++----- data/products.json | 24 ++++------ data/users-myco.json | 8 +--- data/users.json | 8 +--- schema.graphql | 8 ++-- 6 files changed, 73 insertions(+), 94 deletions(-) diff --git a/data/README.md b/data/README.md index bf27f10..b7f8ef3 100644 --- a/data/README.md +++ b/data/README.md @@ -1,70 +1,73 @@ # HarperDB Data Loader -This directory contains data files that are automatically loaded into your HarperDB database tables when your application starts. The Data Loader component reads YAML or JSON files and populates your database tables with the specified data. +This directory contains YAML or JSON files that are automatically loaded into your HarperDB database tables when your application starts. ## How It Works -1. Data files must be placed in this directory with `.yaml`, `.yml`, or `.json` extensions -2. Data files are processed when HarperDB starts -3. Records are loaded based on timestamp comparison: - - New records are added - - Existing records are updated if the new data has a newer `__updatedtime__` - - Existing records are kept if they have a newer `__updatedtime__` than the data file +1. Place data files in this directory with `.yaml`, `.yml`, or `.json` extensions +2. Files are processed when HarperDB starts +3. Records are inserted/updated based on file modification time: + - New records are always added + - Existing records are only updated if the file's modification time is newer than the record's stored timestamp + - Records with timestamps newer than the file's modification time are preserved unchanged ## File Format -### YAML Example - -```yaml -# Format: { database, table, records[] } -database: dev -table: products -records: - - id: 1 - name: "Laptop" - price: 999.99 - __createdtime__: 1682752800000 - __updatedtime__: 1682752800000 - - id: 2 - name: "Smartphone" - price: 699.99 - __createdtime__: 1682752801000 - __updatedtime__: 1682752801000 -``` +Data files must contain `table` and `records` fields. The `database` field is optional. ### JSON Example ```json { - "database": "dev", - "table": "products", - "records": [ + "database": "dev", // Optional - uses default database if omitted + "table": "Product", // Required - name of the table + "records": [ // Required - array of records { - "id": 1, + "id": "1", // Primary key field "name": "Laptop", - "price": 999.99, - "__createdtime__": 1682752800000, - "__updatedtime__": 1682752800000 - }, - { - "id": 2, - "name": "Smartphone", - "price": 699.99, - "__createdtime__": 1682752801000, - "__updatedtime__": 1682752801000 + "price": 999.99 } ] } ``` -## Important Notes - -- Tables are automatically created if they don't exist -- Primary keys are preserved from your data -- Use `__updatedtime__` and `__createdtime__` timestamps to control updates - - Records with newer `__updatedtime__` values will overwrite older ones - - Records with older `__updatedtime__` values will be skipped (not overwritten) - - If no timestamp is provided, the current time is used as `__updatedtime__` -- One table per file: Each file should have one database/table combination -- You can have multiple data files, and they'll all be processed -- The `database` field is optional; if not provided, the default database is used \ No newline at end of file +### YAML Example + +```yaml +database: dev # Optional - uses default database if omitted +table: products # Required - name of the table +records: # Required - array of records + - id: 1 # Primary key field + name: "Laptop" + price: 999.99 +``` + +## Key Features + +- **Automatic Table Creation**: Tables are created if they don't exist +- **Primary Key Detection**: The `id` field is automatically detected as the primary key +- **File Modification Time**: + - The data loader uses the file's modification time (`mtime`) to determine if records should be updated + - "Touching" a file (updating its modification time) will force a reload of its data + - This allows for simpler data files without timestamp properties in the records +- **Multi-file Support**: You can have multiple data files for different tables +- **Complex Data Types**: Supports nested objects, arrays, and various data types +- **One Table Per File**: Each file should define one table + +## Tips for Managing Data + +- To force a reload of data, simply update the file's modification time: + ```bash + # Update the file's timestamp using touch + touch data/products.json + ``` +- If you need to restore to previous data, you can replace the file and update its timestamp +- The system automatically handles the comparison between file modification time and record timestamps + +## Sample Files + +- `categories.json`: Category data with parent/child relationships +- `products.json`: Product data with references to categories +- `users.json`: User account data + +These sample files demonstrate common data patterns and relationships. \ No newline at end of file diff --git a/data/categories.json b/data/categories.json index 5e8b697..3c387fb 100644 --- a/data/categories.json +++ b/data/categories.json @@ -4,31 +4,23 @@ { "id": "electronics", "name": "Electronics", - "description": "Electronic devices and gadgets", - "__createdtime__": 1682752803000, - "__updatedtime__": 1682752803000 + "description": "Electronic devices and gadgets. LFG!!1" }, { "id": "furniture", "name": "Furniture", - "description": "Home and office furniture", - "__createdtime__": 1682752804000, - "__updatedtime__": 1682752804000 + "description": "Home and office furniture" }, { "id": "clothing", "name": "Clothing", - "description": "Apparel and accessories", - "__createdtime__": 1682752805000, - "__updatedtime__": 1682752805000 + "description": "Apparel and accessories" }, { "id": "pants", "name": "Pants", "description": "Apparel and accessories", - "parent": "clothing", - "__createdtime__": 1682752805000, - "__updatedtime__": 1682752805000 + "categoryId": "clothing" } ] } \ No newline at end of file diff --git a/data/products.json b/data/products.json index 45b2eee..cbe9416 100644 --- a/data/products.json +++ b/data/products.json @@ -5,41 +5,33 @@ "id": "1", "name": "Laptop", "price": 999.99, - "category": "electronics", - "inStock": true, - "__createdtime__": 1682752800000, - "__updatedtime__": 1682752800000 + "categoryId": "electronics", + "inStock": true }, { "id": "2", "name": "Smartphone", "price": 699.99, - "category": "electronics", - "inStock": false, - "__createdtime__": 1682752801000, - "__updatedtime__": 1682752801000 + "categoryId": "electronics", + "inStock": false }, { "id": "3", "name": "Desk Chair", "price": 199.99, - "category": "furniture", + "categoryId": "furniture", "details": { "weight": 1.5, "color": "black" }, - "inStock": true, - "__createdtime__": 1682752802000, - "__updatedtime__": 1682752803000 + "inStock": true }, { "id": "4", "name": "Jeans", "price": 98.99, - "category": "pants", - "inStock": true, - "__createdtime__": 1682752802000, - "__updatedtime__": 1682752802000 + "categoryId": "pants", + "inStock": true } ] } \ No newline at end of file diff --git a/data/users-myco.json b/data/users-myco.json index 2d01cfe..c824379 100644 --- a/data/users-myco.json +++ b/data/users-myco.json @@ -7,18 +7,14 @@ "username": "jane_doe", "email": "jane@example.com", "firstName": "Jane", - "lastName": "Doe", - "__createdtime__": 1682752806000, - "__updatedtime__": 1682752806000 + "lastName": "Doe" }, { "id": "2", "username": "Jon_smith", "email": "jon@example.com", "firstName": "Jon", - "lastName": "Smith", - "__createdtime__": 1682752807000, - "__updatedtime__": 1682752807000 + "lastName": "Smith" } ] } \ No newline at end of file diff --git a/data/users.json b/data/users.json index c13dbae..613b012 100644 --- a/data/users.json +++ b/data/users.json @@ -6,18 +6,14 @@ "username": "john_doe", "email": "john@example.com", "firstName": "John", - "lastName": "Doe", - "__createdtime__": 1682752806000, - "__updatedtime__": 1682752806000 + "lastName": "Doe" }, { "id": "2", "username": "jane_smith", "email": "jane@example.com", "firstName": "Jane", - "lastName": "Smith", - "__createdtime__": 1682752807000, - "__updatedtime__": 1682752807000 + "lastName": "Smith" } ] } \ No newline at end of file diff --git a/schema.graphql b/schema.graphql index fc63f65..b346d16 100644 --- a/schema.graphql +++ b/schema.graphql @@ -14,7 +14,7 @@ type Product @table @export { id: ID @primaryKey name: String @indexed price: Float - category: Category @relationship(from: categoryId) + category: Category @relationship(from: "categoryId") details: Any inStock: Boolean } @@ -23,9 +23,9 @@ type Category @table @export { id: ID @primaryKey name: String @indexed description: String - parent: Category @relationship(from: categoryId) - products: [Product] @relationship(to: categoryId) - children: [Category] @relationship(to: categoryId) + parent: Category @relationship(from: "categoryId") + products: [Product] @relationship(to: "categoryId") + children: [Category] @relationship(to: "categoryId") } type User @table @export {