Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion inc/Core/Steps/AI/AIStep.php
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ protected function executeStep(): array {
if ( ! empty( $this->dataPackets ) ) {
$messages[] = array(
'role' => 'user',
'content' => wp_json_encode( array( 'data_packets' => $this->dataPackets ), JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE ),
'content' => wp_json_encode( array( 'data_packets' => self::sanitizeDataPacketsForAi( $this->dataPackets ) ), JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE ),
);
}

Expand Down Expand Up @@ -258,6 +258,61 @@ protected function executeStep(): array {
return self::processLoopResults( $loop_result, $this->dataPackets, $payload, $available_tools );
}

/**
* Remove local-only file paths before serializing data packets to AI.
*
* Fetch handlers may include file_info.file_path so downstream runtime steps
* can attach images or access files. That internal path should not be exposed
* in the AI-visible JSON payload because models can copy it into generated
* content. The original packets remain unchanged for runtime use.
*
* @param array $data_packets Original data packets.
* @return array Sanitized copy safe for AI serialization.
*/
public static function sanitizeDataPacketsForAi( array $data_packets ): array {
$sanitized_packets = array();

foreach ( $data_packets as $packet ) {
if ( ! is_array( $packet ) ) {
$sanitized_packets[] = $packet;
continue;
}

$sanitized_packet = $packet;

if ( isset( $sanitized_packet['data'] ) && is_array( $sanitized_packet['data'] ) ) {
$sanitized_packet['data'] = self::sanitizePacketDataForAi( $sanitized_packet['data'] );
}

$sanitized_packets[] = $sanitized_packet;
}

return $sanitized_packets;
}

/**
* Remove internal file path fields from packet data.
*
* @param array $packet_data Packet data array.
* @return array Sanitized packet data.
*/
private static function sanitizePacketDataForAi( array $packet_data ): array {
if ( ! isset( $packet_data['file_info'] ) || ! is_array( $packet_data['file_info'] ) ) {
return $packet_data;
}

$sanitized_file_info = $packet_data['file_info'];
unset( $sanitized_file_info['file_path'] );

if ( empty( $sanitized_file_info ) ) {
unset( $packet_data['file_info'] );
return $packet_data;
}

$packet_data['file_info'] = $sanitized_file_info;
return $packet_data;
}

/**
* Process AI conversation loop results into data packets.
*
Expand Down
79 changes: 79 additions & 0 deletions tests/Unit/Core/Steps/AI/AIStepTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?php
/**
* Tests for AIStep AI payload sanitization.
*
* @package DataMachine\Tests\Unit\Core\Steps\AI
*/

namespace DataMachine\Tests\Unit\Core\Steps\AI;

use DataMachine\Core\Steps\AI\AIStep;
use PHPUnit\Framework\TestCase;

class AIStepTest extends TestCase {

public function test_sanitize_data_packets_for_ai_removes_file_path_but_keeps_other_file_info(): void {
$data_packets = array(
array(
'type' => 'fetch',
'data' => array(
'title' => 'Test post',
'body' => 'Body',
'file_info' => array(
'file_path' => '/var/www/extrachill.com/wp-content/uploads/dm-files/test.jpg',
'file_name' => 'test.jpg',
'mime_type' => 'image/jpeg',
'file_size' => 12345,
),
),
'metadata' => array(),
),
);

$sanitized = AIStep::sanitizeDataPacketsForAi( $data_packets );

$this->assertArrayNotHasKey( 'file_path', $sanitized[0]['data']['file_info'] );
$this->assertSame( 'test.jpg', $sanitized[0]['data']['file_info']['file_name'] );
$this->assertSame( 'image/jpeg', $sanitized[0]['data']['file_info']['mime_type'] );
$this->assertSame( 12345, $sanitized[0]['data']['file_info']['file_size'] );

// Original packet remains unchanged for runtime behavior.
$this->assertSame(
'/var/www/extrachill.com/wp-content/uploads/dm-files/test.jpg',
$data_packets[0]['data']['file_info']['file_path']
);
}

public function test_sanitize_data_packets_for_ai_drops_empty_file_info_after_redaction(): void {
$data_packets = array(
array(
'type' => 'fetch',
'data' => array(
'file_info' => array(
'file_path' => '/tmp/only-path.png',
),
),
'metadata' => array(),
),
);

$sanitized = AIStep::sanitizeDataPacketsForAi( $data_packets );

$this->assertArrayNotHasKey( 'file_info', $sanitized[0]['data'] );
}

public function test_sanitize_data_packets_for_ai_leaves_packets_without_file_info_unchanged(): void {
$data_packets = array(
array(
'type' => 'fetch',
'data' => array(
'title' => 'No file info',
'body' => 'Still here',
),
'metadata' => array( 'source_type' => 'rss' ),
),
);

$this->assertSame( $data_packets, AIStep::sanitizeDataPacketsForAi( $data_packets ) );
}
}
Loading