From 668af16c03b24037dd1bab3cb9dafee19badb081 Mon Sep 17 00:00:00 2001 From: Nikola Stojic Date: Fri, 6 Feb 2026 12:08:36 +0100 Subject: [PATCH 01/11] Fix for lerobot API change --- alignit/record.py | 8 +++++++- alignit/robots/xarm.py | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/alignit/record.py b/alignit/record.py index 00e897b..7277126 100644 --- a/alignit/record.py +++ b/alignit/record.py @@ -15,11 +15,17 @@ ) from alignit.robots.xarmsim import XarmSim -from alignit.robots.xarm import Xarm + from alignit.utils.zhou import se3_sixd import draccus from alignit.config import RecordConfig +Xarm = None +try: + from alignit.robots.xarm import Xarm +except ImportError: + pass + def generate_spiral_trajectory(start_pose, cfg): """Generate spiral trajectory using configuration parameters.""" diff --git a/alignit/robots/xarm.py b/alignit/robots/xarm.py index 39c69fd..e750d91 100644 --- a/alignit/robots/xarm.py +++ b/alignit/robots/xarm.py @@ -2,7 +2,8 @@ import numpy as np import transforms3d as t3d -from lerobot.cameras.realsense import RealSenseCamera, RealSenseCameraConfig +from lerobot.cameras.realsense.camera_realsense import RealSenseCamera +from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig from lerobot_xarm.xarm import Xarm as LeXarm from lerobot_xarm.config import XarmConfig From 449954604f58ab467c287810374635e5d24ec9ed Mon Sep 17 00:00:00 2001 From: Nikola Stojic Date: Fri, 6 Feb 2026 14:08:35 +0100 Subject: [PATCH 02/11] Implement depth loading based on config --- alignit/infere.py | 58 +++++++++++++++++++++++++++-------------------- alignit/record.py | 41 ++++++++++++++++++--------------- 2 files changed, 56 insertions(+), 43 deletions(-) diff --git a/alignit/infere.py b/alignit/infere.py index e533162..21a42d6 100644 --- a/alignit/infere.py +++ b/alignit/infere.py @@ -1,5 +1,4 @@ import time - import torch import transforms3d as t3d import numpy as np @@ -10,7 +9,12 @@ from alignit.utils.zhou import sixd_se3 from alignit.utils.tfs import print_pose, are_tfs_close from alignit.robots.xarmsim import XarmSim -from alignit.robots.xarm import Xarm + +Xarm = None +try: + from alignit.robots.xarm import Xarm +except ImportError: + pass @draccus.wrap() @@ -42,41 +46,45 @@ def main(cfg: InferConfig): iteration = 0 iterations_within_tolerance = 0 ang_tol_rad = np.deg2rad(cfg.ang_tolerance) + try: while True: observation = robot.get_observation() rgb_image = observation["rgb"].astype(np.float32) / 255.0 - depth_image = observation["depth"].astype(np.float32) - print( - "Min/Max depth,mean (raw):", - observation["depth"].min(), - observation["depth"].max(), - observation["depth"].mean(), - ) - print( - "Min/Max depth,mean (scaled):", - depth_image.min(), - depth_image.max(), - depth_image.mean(), - ) + rgb_image_tensor = ( torch.from_numpy(np.array(rgb_image)) .permute(2, 0, 1) # (H, W, C) -> (C, H, W) .unsqueeze(0) .to(device) ) - - depth_image_tensor = ( - torch.from_numpy(np.array(depth_image)) - .unsqueeze(0) # Add channel dimension: (1, H, W) - .unsqueeze(0) # Add batch dimension: (1, 1, H, W) - .to(device) - ) rgb_images_batch = rgb_image_tensor.unsqueeze(1) - depth_images_batch = depth_image_tensor.unsqueeze(1) + + depth_images_batch = None + if cfg.model.use_depth_input: + depth_image = observation["depth"].astype(np.float32) + + print( + "Min/Max depth,mean (raw):", + observation["depth"].min(), + observation["depth"].max(), + observation["depth"].mean(), + ) + + depth_image_tensor = ( + torch.from_numpy(np.array(depth_image)) + .unsqueeze(0) # Add channel dimension: (1, H, W) + .unsqueeze(0) # Add batch dimension: (1, 1, H, W) + .to(device) + ) + depth_images_batch = depth_image_tensor.unsqueeze(1) with torch.no_grad(): - relative_action = net(rgb_images_batch, depth_images=depth_images_batch) + if cfg.model.use_depth_input: + relative_action = net(rgb_images_batch, depth_images=depth_images_batch) + else: + relative_action = net(rgb_images_batch) + relative_action = relative_action.squeeze(0).cpu().numpy() relative_action = sixd_se3(relative_action) @@ -128,4 +136,4 @@ def main(cfg: InferConfig): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/alignit/record.py b/alignit/record.py index 7277126..02c9841 100644 --- a/alignit/record.py +++ b/alignit/record.py @@ -15,7 +15,6 @@ ) from alignit.robots.xarmsim import XarmSim - from alignit.utils.zhou import se3_sixd import draccus from alignit.config import RecordConfig @@ -82,23 +81,26 @@ def generate_spiral_trajectory(start_pose, cfg): return trajectory - @draccus.wrap() def main(cfg: RecordConfig): """Record alignment dataset using configuration parameters.""" robot = XarmSim() - features = Features( - { - "images": Sequence(Image()), - "action": Sequence(Value("float32")), - "depth": Sequence(Image()), - } - ) + + save_depth = getattr(cfg, "save_depth", True) + + feature_dict = { + "images": Sequence(Image()), + "action": Sequence(Value("float32")), + } + + if save_depth: + feature_dict["depth"] = Sequence(Image()) + + features = Features(feature_dict) for episode in range(cfg.episodes): pose_start, pose_alignment_target = robot.reset() trajectory = generate_spiral_trajectory(pose_start, cfg.trajectory) - pose = robot.pose() frames = [] for pose in trajectory: robot.servo_to_pose( @@ -110,36 +112,39 @@ def main(cfg: RecordConfig): action_sixd = se3_sixd(action_pose) observation = robot.get_observation() - print(observation.keys()) frame = { "images": [observation["rgb"].copy()], "action": action_sixd, - "depth": [observation["depth"].copy()], } + + if save_depth: + frame["depth"] = [observation["depth"].copy()] + frames.append(frame) print(f"Episode {episode+1} completed with {len(frames)} frames.") episode_dataset = Dataset.from_list(frames, features=features) - # 2. Load existing dataset if available if os.path.exists(cfg.dataset.path): existing_dataset = load_from_disk(cfg.dataset.path) + + if "depth" in existing_dataset.features and not save_depth: + print("Warning: Existing dataset has depth, but current run does not.") + existing_dataset = existing_dataset.cast(features) combined_dataset = concatenate_datasets([existing_dataset, episode_dataset]) else: combined_dataset = episode_dataset - # 3. Save to TEMPORARY location first (avoid self-overwrite) temp_path = f"{cfg.dataset.path}_temp" combined_dataset.save_to_disk(temp_path) - # 4. Atomic replacement (only after successful save) if os.path.exists(cfg.dataset.path): - shutil.rmtree(cfg.dataset.path) # Remove old version - shutil.move(temp_path, cfg.dataset.path) # Move new version into place + shutil.rmtree(cfg.dataset.path) + shutil.move(temp_path, cfg.dataset.path) robot.disconnect() if __name__ == "__main__": - main() + main() \ No newline at end of file From f87aee5b5b45721c1a9603910ed544c6194c55a4 Mon Sep 17 00:00:00 2001 From: Nikola Stojic Date: Fri, 6 Feb 2026 14:28:57 +0100 Subject: [PATCH 03/11] Added option to choose between simulation and real robot. Set depth default to False. --- alignit/config.py | 6 +++++- alignit/record.py | 14 +++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/alignit/config.py b/alignit/config.py index 15b8ff2..2af3529 100644 --- a/alignit/config.py +++ b/alignit/config.py @@ -48,7 +48,7 @@ class ModelConfig: metadata={"help": "Path to save/load trained model"}, ) use_depth_input: bool = field( - default=True, metadata={"help": "Whether to use depth input for the model"} + default=False, metadata={"help": "Whether to use depth input for the model"} ) depth_hidden_dim: int = field( default=128, metadata={"help": "Output dimension of depth CNN"} @@ -92,6 +92,10 @@ class RecordConfig: dataset: DatasetConfig = field(default_factory=DatasetConfig) trajectory: TrajectoryConfig = field(default_factory=TrajectoryConfig) + robot_type: str = field( + default="sim", + metadata={"help": "Robot type: 'sim' for simulation or 'real' for real xArm robot"}, + ) episodes: int = field(default=10, metadata={"help": "Number of episodes to record"}) lin_tol_alignment: float = field( default=0.015, metadata={"help": "Linear tolerance for alignment servo"} diff --git a/alignit/record.py b/alignit/record.py index 02c9841..1c1cc36 100644 --- a/alignit/record.py +++ b/alignit/record.py @@ -84,7 +84,19 @@ def generate_spiral_trajectory(start_pose, cfg): @draccus.wrap() def main(cfg: RecordConfig): """Record alignment dataset using configuration parameters.""" - robot = XarmSim() + if cfg.robot_type == "sim": + print("Using MuJoCo simulation (xArm Lite6 simulator)") + robot = XarmSim() + elif cfg.robot_type == "real": + print("Connecting to real xArm robot...") + if Xarm is None: + print("Robot not available.") + return + robot = Xarm() + else: + raise ValueError( + f"Unknown robot_type '{cfg.robot_type}'. Use 'sim' or 'real'." + ) save_depth = getattr(cfg, "save_depth", True) From 55399a2ae9e567cb4314e005afdb13418d8470a3 Mon Sep 17 00:00:00 2001 From: stojicnnn Date: Fri, 6 Feb 2026 17:39:01 +0100 Subject: [PATCH 04/11] Fixed import issues caused by new lerobot release. --- .gitignore | 1 + .python-version | 1 + alignit/record.py | 23 ++++------------------- alignit/robots/xarm.py | 4 ++-- 4 files changed, 8 insertions(+), 21 deletions(-) create mode 100644 .python-version diff --git a/.gitignore b/.gitignore index d1e521b..2a532e8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ data alignnet_model.pth +.venv/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..1445aee --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10.14 diff --git a/alignit/record.py b/alignit/record.py index 1c1cc36..d31365d 100644 --- a/alignit/record.py +++ b/alignit/record.py @@ -15,15 +15,12 @@ ) from alignit.robots.xarmsim import XarmSim +from alignit.robots.xarm import Xarm from alignit.utils.zhou import se3_sixd import draccus from alignit.config import RecordConfig -Xarm = None -try: - from alignit.robots.xarm import Xarm -except ImportError: - pass + def generate_spiral_trajectory(start_pose, cfg): @@ -84,19 +81,7 @@ def generate_spiral_trajectory(start_pose, cfg): @draccus.wrap() def main(cfg: RecordConfig): """Record alignment dataset using configuration parameters.""" - if cfg.robot_type == "sim": - print("Using MuJoCo simulation (xArm Lite6 simulator)") - robot = XarmSim() - elif cfg.robot_type == "real": - print("Connecting to real xArm robot...") - if Xarm is None: - print("Robot not available.") - return - robot = Xarm() - else: - raise ValueError( - f"Unknown robot_type '{cfg.robot_type}'. Use 'sim' or 'real'." - ) + robot = Xarm() save_depth = getattr(cfg, "save_depth", True) @@ -111,7 +96,7 @@ def main(cfg: RecordConfig): features = Features(feature_dict) for episode in range(cfg.episodes): - pose_start, pose_alignment_target = robot.reset() + pose_start, pose_alignment_target = robot.reset(cfg) trajectory = generate_spiral_trajectory(pose_start, cfg.trajectory) frames = [] for pose in trajectory: diff --git a/alignit/robots/xarm.py b/alignit/robots/xarm.py index e750d91..301b40f 100644 --- a/alignit/robots/xarm.py +++ b/alignit/robots/xarm.py @@ -4,8 +4,8 @@ import transforms3d as t3d from lerobot.cameras.realsense.camera_realsense import RealSenseCamera from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig -from lerobot_xarm.xarm import Xarm as LeXarm -from lerobot_xarm.config import XarmConfig +from lerobot_robot_xarm.xarm import Xarm as LeXarm +from lerobot_robot_xarm.config_xarm import XarmConfig from alignit.robots.robot import Robot from alignit.utils.tfs import are_tfs_close From 83ba37252eced767b87e03bf85232360c92a1740 Mon Sep 17 00:00:00 2001 From: stojicnnn Date: Fri, 6 Feb 2026 18:03:19 +0100 Subject: [PATCH 05/11] RObot moves terribly. --- alignit/infere.py | 48 ++++++++++++++++++------------------------ alignit/robots/xarm.py | 6 +++++- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/alignit/infere.py b/alignit/infere.py index 21a42d6..40a93c5 100644 --- a/alignit/infere.py +++ b/alignit/infere.py @@ -37,7 +37,7 @@ def main(cfg: InferConfig): net.to(device) net.eval() - robot = XarmSim() + robot = Xarm() start_pose = t3d.affines.compose( [0.23, 0, 0.25], t3d.euler.euler2mat(np.pi, 0, 0), [1, 1, 1] @@ -50,34 +50,28 @@ def main(cfg: InferConfig): try: while True: observation = robot.get_observation() - rgb_image = observation["rgb"].astype(np.float32) / 255.0 - - rgb_image_tensor = ( - torch.from_numpy(np.array(rgb_image)) - .permute(2, 0, 1) # (H, W, C) -> (C, H, W) - .unsqueeze(0) + # 1. Capture the raw numpy array from LeRobot's async_read + rgb_np = observation["rgb"].astype(np.float32) / 255.0 + + # 2. Safety check: Ensure it has 3 dimensions (H, W, C) + # If the camera returned (480, 640), turn it into (480, 640, 1) + if rgb_np.ndim == 2: + rgb_np = np.expand_dims(rgb_np, axis=-1) + + # 3. Backbone requirement: EfficientNet needs 3 channels + # If it's grayscale (1 channel), broadcast it to 3 channels + if rgb_np.shape[-1] == 1: + rgb_np = np.repeat(rgb_np, 3, axis=-1) + + # 4. Transform to (Batch, Sequence, Channel, Height, Width) + # This converts (480, 640, 3) -> (3, 480, 640) -> (1, 1, 3, 480, 640) + rgb_images_batch = ( + torch.from_numpy(rgb_np) + .permute(2, 0, 1) # Move channels to front + .unsqueeze(0) # Add Batch dimension + .unsqueeze(0) # Add Sequence dimension .to(device) ) - rgb_images_batch = rgb_image_tensor.unsqueeze(1) - - depth_images_batch = None - if cfg.model.use_depth_input: - depth_image = observation["depth"].astype(np.float32) - - print( - "Min/Max depth,mean (raw):", - observation["depth"].min(), - observation["depth"].max(), - observation["depth"].mean(), - ) - - depth_image_tensor = ( - torch.from_numpy(np.array(depth_image)) - .unsqueeze(0) # Add channel dimension: (1, H, W) - .unsqueeze(0) # Add batch dimension: (1, 1, H, W) - .to(device) - ) - depth_images_batch = depth_image_tensor.unsqueeze(1) with torch.no_grad(): if cfg.model.use_depth_input: diff --git a/alignit/robots/xarm.py b/alignit/robots/xarm.py index 301b40f..d428a9c 100644 --- a/alignit/robots/xarm.py +++ b/alignit/robots/xarm.py @@ -38,7 +38,11 @@ def get_intrinsics(self): return self.camera.get_intrinsics() def get_observation(self): - rgb_image, depth_image, acquisition_time = self.camera.async_read() + frame = self.camera.async_read() + + rgb_image = frame[0] + depth_image = frame[1] + depth_array_clipped = np.clip(np.array(depth_image), a_min=0, a_max=1000) depth_image = np.array(depth_array_clipped) / 1000.0 From 9d7a6779c7d0a74541302dfcc0429da92a6216fd Mon Sep 17 00:00:00 2001 From: stojicnnn Date: Fri, 13 Feb 2026 16:06:12 +0100 Subject: [PATCH 06/11] Removed depth image, added benchmarks --- alignit/infere.py | 197 +++++++++++++--------- alignit/record.py | 29 +--- alignit/robots/xarm.py | 9 +- alignit/robots/xarmsim/__init__.py | 36 +++- alignit/train.py | 29 +--- alignit/visualize.py | 4 +- benchmark/__init__.py | 1 + benchmark/metrics.py | 116 +++++++++++++ benchmark/run_benchmark.py | 261 +++++++++++++++++++++++++++++ 9 files changed, 537 insertions(+), 145 deletions(-) create mode 100644 benchmark/__init__.py create mode 100644 benchmark/metrics.py create mode 100644 benchmark/run_benchmark.py diff --git a/alignit/infere.py b/alignit/infere.py index 40a93c5..cab3ac3 100644 --- a/alignit/infere.py +++ b/alignit/infere.py @@ -16,13 +16,13 @@ except ImportError: pass - @draccus.wrap() def main(cfg: InferConfig): """Run inference/alignment using configuration parameters.""" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") + # Initialize Model net = AlignNet( backbone_name=cfg.model.backbone, backbone_weights=cfg.model.backbone_weights, @@ -37,97 +37,136 @@ def main(cfg: InferConfig): net.to(device) net.eval() - robot = Xarm() - - start_pose = t3d.affines.compose( - [0.23, 0, 0.25], t3d.euler.euler2mat(np.pi, 0, 0), [1, 1, 1] - ) - robot.servo_to_pose(start_pose, lin_tol=1e-2) - iteration = 0 - iterations_within_tolerance = 0 + # Initialize Robot + robot = XarmSim() + + num_alignments = getattr(cfg, 'num_alignments', 5) ang_tol_rad = np.deg2rad(cfg.ang_tolerance) + alignment_results = [] - try: - while True: - observation = robot.get_observation() - # 1. Capture the raw numpy array from LeRobot's async_read - rgb_np = observation["rgb"].astype(np.float32) / 255.0 - - # 2. Safety check: Ensure it has 3 dimensions (H, W, C) - # If the camera returned (480, 640), turn it into (480, 640, 1) - if rgb_np.ndim == 2: - rgb_np = np.expand_dims(rgb_np, axis=-1) - - # 3. Backbone requirement: EfficientNet needs 3 channels - # If it's grayscale (1 channel), broadcast it to 3 channels - if rgb_np.shape[-1] == 1: - rgb_np = np.repeat(rgb_np, 3, axis=-1) - - # 4. Transform to (Batch, Sequence, Channel, Height, Width) - # This converts (480, 640, 3) -> (3, 480, 640) -> (1, 1, 3, 480, 640) - rgb_images_batch = ( - torch.from_numpy(rgb_np) - .permute(2, 0, 1) # Move channels to front - .unsqueeze(0) # Add Batch dimension - .unsqueeze(0) # Add Sequence dimension - .to(device) - ) + # Safety limit: Total attempts allowed before declaring a trial "Failed" + MAX_TOTAL_STEPS = 10000 + + print(f"\nRunning {num_alignments} alignment trials...\n") + + for alignment_trial in range(num_alignments): + print(f"\n{'='*60}") + print(f"Alignment Trial {alignment_trial + 1}/{num_alignments}") + print(f"{'='*60}") + + # 1. Randomize Start Pose + start_pose = t3d.affines.compose( + [np.random.uniform(0.15, 0.30), + np.random.uniform(-0.15, 0.15), + np.random.uniform(0.20, 0.35)], + t3d.euler.euler2mat(np.pi + np.random.uniform(-0.3, 0.3), + np.random.uniform(-0.3, 0.3), + np.random.uniform(-np.pi, np.pi)), + [1, 1, 1] + ) + robot.servo_to_pose(start_pose, lin_tol=1e-2, ang_tol=0.1) + + iteration = 0 + iterations_within_tolerance = 0 + trial_data = [] + + try: + while True: + # 2. Get Observation and Preprocess + observation = robot.get_observation() + rgb_np = observation["rgb"].astype(np.float32) / 255.0 + + if rgb_np.ndim == 2: + rgb_np = np.expand_dims(rgb_np, axis=-1) + if rgb_np.shape[-1] == 1: + rgb_np = np.repeat(rgb_np, 3, axis=-1) + + rgb_images_batch = ( + torch.from_numpy(rgb_np) + .permute(2, 0, 1) + .unsqueeze(0).unsqueeze(0) + .to(device) + ) - with torch.no_grad(): - if cfg.model.use_depth_input: - relative_action = net(rgb_images_batch, depth_images=depth_images_batch) - else: + # 3. Model Inference + with torch.no_grad(): relative_action = net(rgb_images_batch) - relative_action = relative_action.squeeze(0).cpu().numpy() - relative_action = sixd_se3(relative_action) - - if cfg.debug_output: - print_pose(relative_action) + relative_action = relative_action.squeeze(0).cpu().numpy() + relative_action = sixd_se3(relative_action) - relative_action[:3, :3] = np.linalg.matrix_power( - relative_action[:3, :3], cfg.rotation_matrix_multiplier - ) - if are_tfs_close( - relative_action, lin_tol=cfg.lin_tolerance, ang_tol=ang_tol_rad - ): - iterations_within_tolerance += 1 - else: - iterations_within_tolerance = 0 + # Apply rotation matrix power scaling + relative_action[:3, :3] = np.linalg.matrix_power( + relative_action[:3, :3], cfg.rotation_matrix_multiplier + ) + + # 4. Logic Fix: Check Alignment Tolerance + # iterations_within_tolerance tracks consecutive successful alignments + if are_tfs_close( + relative_action, lin_tol=cfg.lin_tolerance, ang_tol=ang_tol_rad + ): + iterations_within_tolerance += 1 + print(f"Step {iteration}: Within Tol ({iterations_within_tolerance}/{cfg.max_iterations})") + else: + # Reset if we move outside the tolerance zone + iterations_within_tolerance = 0 + print(f"Step {iteration}: Adjusting...") - print(relative_action) - target_pose = robot.pose() @ relative_action - iteration += 1 - action = { - "pose": target_pose, - "gripper.pos": 1.0, - } - robot.send_action(action) - if iterations_within_tolerance >= cfg.max_iterations: - print(f"Reached maximum iterations ({cfg.max_iterations}) - stopping.") - print("Moving robot to final pose.") + # 5. Move Robot current_pose = robot.pose() - gripper_z_offset = np.array( - [ + target_pose = current_pose @ relative_action + iteration += 1 + + robot.send_action({"pose": target_pose, "gripper.pos": 1.0}) + + # 6. Exit Conditions + + # SUCCESS: Remained close for the required number of iterations + if iterations_within_tolerance >= cfg.max_iterations: + print(f"✓ Converged after {iteration} total steps.") + + # Finalize: Move to height offset and close gripper + gripper_z_offset = np.array([ [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, cfg.manual_height], [0, 0, 0, 1], - ] - ) - offset_pose = current_pose @ gripper_z_offset - robot.servo_to_pose(pose=offset_pose) - robot.close_gripper() - robot.gripper_off() - - break - - time.sleep(10.0) - except KeyboardInterrupt: - print("\nExiting...") - + ]) + robot.servo_to_pose(pose=robot.pose() @ gripper_z_offset) + robot.close_gripper() + robot.gripper_off() + + alignment_results.append({ + "trial": alignment_trial + 1, + "success": True, + "iterations": iteration, + }) + break + + # FAILURE: Safety timeout reached + if iteration >= MAX_TOTAL_STEPS: + print(f"✗ Failed: Timeout reached ({MAX_TOTAL_STEPS} steps).") + alignment_results.append({ + "trial": alignment_trial + 1, + "success": False, + "iterations": iteration, + }) + break + + except KeyboardInterrupt: + print("\nTrial interrupted by user.") + break + + # Summary Statistics + print(f"\n{'='*60}") + print(f"INFERENCE SUMMARY") + print(f"{'='*60}") + successful = sum(1 for r in alignment_results if r["success"]) + print(f"Success Rate: {successful}/{len(alignment_results)} ({successful*100//max(1, len(alignment_results))}%)") + if alignment_results: + print(f"Avg Steps to Converge: {np.mean([r['iterations'] for r in alignment_results]):.1f}") + robot.disconnect() - if __name__ == "__main__": main() \ No newline at end of file diff --git a/alignit/record.py b/alignit/record.py index d31365d..d1ae283 100644 --- a/alignit/record.py +++ b/alignit/record.py @@ -81,22 +81,17 @@ def generate_spiral_trajectory(start_pose, cfg): @draccus.wrap() def main(cfg: RecordConfig): """Record alignment dataset using configuration parameters.""" - robot = Xarm() - - save_depth = getattr(cfg, "save_depth", True) + robot = XarmSim() - feature_dict = { - "images": Sequence(Image()), - "action": Sequence(Value("float32")), - } - - if save_depth: - feature_dict["depth"] = Sequence(Image()) - - features = Features(feature_dict) + features = Features( + { + "images": Sequence(Image()), + "action": Sequence(Value("float32")), + } + ) for episode in range(cfg.episodes): - pose_start, pose_alignment_target = robot.reset(cfg) + pose_start, pose_alignment_target = robot.reset() trajectory = generate_spiral_trajectory(pose_start, cfg.trajectory) frames = [] for pose in trajectory: @@ -113,10 +108,6 @@ def main(cfg: RecordConfig): "images": [observation["rgb"].copy()], "action": action_sixd, } - - if save_depth: - frame["depth"] = [observation["depth"].copy()] - frames.append(frame) print(f"Episode {episode+1} completed with {len(frames)} frames.") @@ -124,10 +115,6 @@ def main(cfg: RecordConfig): if os.path.exists(cfg.dataset.path): existing_dataset = load_from_disk(cfg.dataset.path) - - if "depth" in existing_dataset.features and not save_depth: - print("Warning: Existing dataset has depth, but current run does not.") - existing_dataset = existing_dataset.cast(features) combined_dataset = concatenate_datasets([existing_dataset, episode_dataset]) else: diff --git a/alignit/robots/xarm.py b/alignit/robots/xarm.py index d428a9c..80f078c 100644 --- a/alignit/robots/xarm.py +++ b/alignit/robots/xarm.py @@ -38,17 +38,10 @@ def get_intrinsics(self): return self.camera.get_intrinsics() def get_observation(self): - frame = self.camera.async_read() - - rgb_image = frame[0] - depth_image = frame[1] - - depth_array_clipped = np.clip(np.array(depth_image), a_min=0, a_max=1000) - depth_image = np.array(depth_array_clipped) / 1000.0 + rgb_image = self.camera.async_read() return { "rgb": rgb_image, - "depth": depth_image, } def disconnect(self): diff --git a/alignit/robots/xarmsim/__init__.py b/alignit/robots/xarmsim/__init__.py index 7b58a59..3dcce63 100644 --- a/alignit/robots/xarmsim/__init__.py +++ b/alignit/robots/xarmsim/__init__.py @@ -98,6 +98,12 @@ def reset(self): random_pos, t3d.euler.euler2mat(roll, pitch, yaw), [1, 1, 1] ) self._set_object_pose("pickup_object", pose) + # Disable collisions for the pickup object so it doesn't get pushed + # by the robot during alignment trials. + try: + self._disable_object_collisions("pickup_object") + except Exception: + pass pose1 = self._get_object_pose() pose_start = pose1 @ t3d.affines.compose( [0, 0, -0.1], t3d.euler.euler2mat(0, 0, 0), [1, 1, 1] @@ -125,6 +131,30 @@ def _set_object_pose(self, object_name, pose_matrix): self.data.qvel[qvel_adr : qvel_adr + 6] = 0 mj.mj_forward(self.model, self.data) + def _disable_object_collisions(self, object_name: str): + """Disable collisions for all geoms belonging to a body. + + This prevents the object from being pushed or reacting to contacts + when the robot touches it during benchmarking. + """ + try: + body_id = self.model.body(object_name).id + except Exception: + return + + # body_geomadr is the start index of geoms for this body + # body_geomnum is the number of geoms + start = int(self.model.body_geomadr[body_id]) + count = int(self.model.body_geomnum[body_id]) + for i in range(start, start + count): + try: + # Set contact type and affinity to 0 to disable contacts + self.model.geom_contype[i] = 0 + self.model.geom_conaffinity[i] = 0 + except Exception: + # Some mujoco bindings may not allow assignment; ignore failures + pass + def close_gripper(self): self._set_gripper_position(self.gripper_close_pos) @@ -191,15 +221,9 @@ def get_observation(self): name = mujoco.mj_id2name(self.model, mujoco.mjtObj.mjOBJ_CAMERA, i) self.renderer.update_scene(self.data, camera=name) image = self.renderer.render() - self.renderer.enable_depth_rendering() - self.renderer.update_scene(self.data, camera=name) - image_depth = self.renderer.render() - self.renderer.disable_depth_rendering() # TODO: Handle multiple cameras obs["rgb"] = image[:, :, ::-1] - obs["depth"] = image_depth - obs["depth"] = np.clip(obs["depth"], 0, 1) return obs diff --git a/alignit/train.py b/alignit/train.py index f1c2266..1bc60a1 100644 --- a/alignit/train.py +++ b/alignit/train.py @@ -14,11 +14,9 @@ def collate_fn(batch): images = [item["images"] for item in batch] - depth_images = [item.get("depth", None) for item in batch] actions = [item["action"] for item in batch] return { "images": images, - "depth_images": depth_images, "action": torch.tensor(actions, dtype=torch.float32), } @@ -57,7 +55,6 @@ def main(cfg: TrainConfig): total_loss = 0 for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"): images = batch["images"] - depth_images_pil = batch["depth_images"] actions = batch["action"].to(device) batch_rgb_tensors = [] @@ -76,32 +73,8 @@ def main(cfg: TrainConfig): batch_rgb_tensors = torch.stack(batch_rgb_tensors, dim=0).to(device) - batch_depth_tensors = None - if cfg.model.use_depth_input: - batch_depth_tensors = [] - for depth_sequence in depth_images_pil: - if depth_sequence is None: - raise ValueError( - "Depth images expected but not found when use_depth_input=True" - ) - - depth_sequence_processed = [] - for d_img in depth_sequence: - depth_array = np.array(d_img) - depth_tensor = torch.from_numpy(depth_array).float() - depth_tensor = depth_tensor.unsqueeze(0) - depth_sequence_processed.append(depth_tensor) - - stacked_depth = torch.stack(depth_sequence_processed, dim=0) - batch_depth_tensors.append(stacked_depth) - - batch_depth_tensors = torch.stack(batch_depth_tensors, dim=0).to(device) - optimizer.zero_grad() - if cfg.model.use_depth_input: - outputs = net(batch_rgb_tensors, depth_images=batch_depth_tensors) - else: - outputs = net(batch_rgb_tensors) + outputs = net(batch_rgb_tensors) loss = criterion(outputs, actions) loss.backward() diff --git a/alignit/visualize.py b/alignit/visualize.py index 60d83b8..70b23f8 100644 --- a/alignit/visualize.py +++ b/alignit/visualize.py @@ -13,18 +13,16 @@ def visualize(cfg: VisualizeConfig): def get_data(index): item = dataset[index] image = item["images"][0] - depth = item["depth"][0] action_sixd = item["action"] action = sixd_se3(action_sixd) label = get_pose_str(action, degrees=True) - return image, depth, label + return image, label gr.Interface( fn=get_data, inputs=gr.Slider(0, len(dataset) - 1, step=1, label="Index", interactive=True), outputs=[ gr.Image(type="pil", label="Image"), - gr.Image(type="pil", label="Depth Image"), gr.Text(label="Label"), ], title="Dataset Image Viewer", diff --git a/benchmark/__init__.py b/benchmark/__init__.py new file mode 100644 index 0000000..46dd880 --- /dev/null +++ b/benchmark/__init__.py @@ -0,0 +1 @@ +"""Benchmark module for alignment evaluation.""" diff --git a/benchmark/metrics.py b/benchmark/metrics.py new file mode 100644 index 0000000..998ceee --- /dev/null +++ b/benchmark/metrics.py @@ -0,0 +1,116 @@ +"""Metrics utilities for benchmark evaluation.""" + +import numpy as np + + +def compute_translation_error(pose_actual, pose_target): + """ + Compute translation error in mm. + + Args: + pose_actual: 4x4 transformation matrix (actual pose) + pose_target: 4x4 transformation matrix (target pose) + + Returns: + Translation error in mm + """ + t_actual = pose_actual[:3, 3] + t_target = pose_target[:3, 3] + error_m = np.linalg.norm(t_actual - t_target) + return error_m * 1000 # Convert to mm + + +def compute_rotation_error(pose_actual, pose_target): + """ + Compute rotation error in degrees using angle-axis representation. + + Args: + pose_actual: 4x4 transformation matrix (actual pose) + pose_target: 4x4 transformation matrix (target pose) + + Returns: + Rotation error in degrees + """ + R_actual = pose_actual[:3, :3] + R_target = pose_target[:3, :3] + + # Compute relative rotation + R_rel = R_actual @ R_target.T + + # Convert to angle-axis and extract angle + rot = R.from_matrix(R_rel) + angle_rad = np.arccos(np.clip((np.trace(R_rel) - 1) / 2, -1, 1)) + angle_deg = np.degrees(angle_rad) + + return angle_deg + + +def compute_pose_error(pose_actual, pose_target, trans_weight=1.0, rot_weight=1.0): + """ + Compute combined pose error. + + Args: + pose_actual: 4x4 transformation matrix + pose_target: 4x4 transformation matrix + trans_weight: Weight for translation error (mm) + rot_weight: Weight for rotation error (degrees) + + Returns: + dict with 'translation', 'rotation', and 'combined' errors + """ + trans_err = compute_translation_error(pose_actual, pose_target) + rot_err = compute_rotation_error(pose_actual, pose_target) + + # Normalize and combine + combined = trans_weight * trans_err + rot_weight * rot_err + + return { + "translation_mm": trans_err, + "rotation_deg": rot_err, + "combined": combined, + } + + +def check_convergence(error, trans_tol_mm=5.0, rot_tol_deg=5.0): + """ + Check if pose error is within acceptable tolerance. + + Args: + error: dict from compute_pose_error() + trans_tol_mm: Translation tolerance in mm + rot_tol_deg: Rotation tolerance in degrees + + Returns: + bool: True if converged + """ + return (error["translation_mm"] <= trans_tol_mm and + error["rotation_deg"] <= rot_tol_deg) + + +def compute_statistics(errors): + """ + Compute statistics over a list of errors. + + Args: + errors: list of error dicts from compute_pose_error() + + Returns: + dict with mean, std, min, max for each error type + """ + trans_errors = [e["translation_mm"] for e in errors] + rot_errors = [e["rotation_deg"] for e in errors] + + return { + "translation": { + "mean_mm": np.mean(trans_errors), + "std_mm": np.std(trans_errors), + "min_mm": np.min(trans_errors), + "max_mm": np.max(trans_errors), + }, + "rotation": { + "mean_deg": np.mean(rot_errors), + "std_deg": np.std(rot_errors), + "min_deg": np.min(rot_errors), + "max_deg": np.max(rot_errors), + }, + } diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py new file mode 100644 index 0000000..4bffb1d --- /dev/null +++ b/benchmark/run_benchmark.py @@ -0,0 +1,261 @@ +"""Benchmark runner for alignment in simulation.""" + +import json +import time +from datetime import datetime +from pathlib import Path + +import numpy as np +import torch +import transforms3d as t3d +import draccus + +from alignit.config import InferConfig +from alignit.models.alignnet import AlignNet +from alignit.utils.zhou import sixd_se3 +from alignit.utils.tfs import are_tfs_close +from alignit.robots.xarmsim import XarmSim +from benchmark.metrics import ( + compute_pose_error, + check_convergence, + compute_statistics, +) + + +@draccus.wrap() +def run_benchmark(cfg: InferConfig, num_trials: int = 5, max_iterations: int = 50): + """ + Run alignment benchmark in simulation. + + Args: + cfg: InferConfig with model settings + num_trials: Number of random alignment trials to run + max_iterations: Max iterations per trial + + Returns: + dict with benchmark results + """ + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + # Load model + net = AlignNet( + backbone_name=cfg.model.backbone, + backbone_weights=cfg.model.backbone_weights, + use_vector_input=cfg.model.use_vector_input, + fc_layers=cfg.model.fc_layers, + vector_hidden_dim=cfg.model.vector_hidden_dim, + output_dim=cfg.model.output_dim, + feature_agg=cfg.model.feature_agg, + use_depth_input=cfg.model.use_depth_input, + ) + net.load_state_dict(torch.load(cfg.model.path, map_location=device)) + net.to(device) + net.eval() + + robot = XarmSim() + + # Initialize results + results = { + "timestamp": datetime.now().isoformat(), + "model_path": cfg.model.path, + "num_trials": num_trials, + "max_iterations": max_iterations, + "trials": [], + "summary": {}, + } + + # Run benchmark trials + print(f"\n{'='*60}") + print(f"Running {num_trials} alignment trials...") + print(f"{'='*60}\n") + + all_final_errors = [] + convergence_count = 0 + + for trial_idx in range(num_trials): + print(f"\n[Trial {trial_idx + 1}/{num_trials}]") + + # Random start pose + start_pose = t3d.affines.compose( + [np.random.uniform(0.15, 0.30), + np.random.uniform(-0.15, 0.15), + np.random.uniform(0.20, 0.35)], + t3d.euler.euler2mat(np.pi + np.random.uniform(-0.3, 0.3), + np.random.uniform(-0.3, 0.3), + np.random.uniform(-np.pi, np.pi)), + [1, 1, 1] + ) + + # Random target pose (within reasonable bounds) + target_pose = t3d.affines.compose( + [np.random.uniform(0.15, 0.30), + np.random.uniform(-0.15, 0.15), + np.random.uniform(0.20, 0.35)], + t3d.euler.euler2mat(np.pi + np.random.uniform(-0.3, 0.3), + np.random.uniform(-0.3, 0.3), + np.random.uniform(-np.pi, np.pi)), + [1, 1, 1] + ) + + robot.servo_to_pose(start_pose, lin_tol=1e-2, ang_tol=0.1) + + trial_data = { + "start_pose": start_pose.tolist(), + "target_pose": target_pose.tolist(), + "iterations": [], + "converged": False, + } + + iteration = 0 + converged = False + + try: + while iteration < max_iterations: + observation = robot.get_observation() + rgb_np = observation["rgb"].astype(np.float32) / 255.0 + + # Ensure 3 channels + if rgb_np.ndim == 2: + rgb_np = np.expand_dims(rgb_np, axis=-1) + if rgb_np.shape[-1] == 1: + rgb_np = np.repeat(rgb_np, 3, axis=-1) + + rgb_images_batch = ( + torch.from_numpy(rgb_np) + .permute(2, 0, 1) + .unsqueeze(0) + .unsqueeze(0) + .to(device) + ) + + with torch.no_grad(): + relative_action = net(rgb_images_batch) + + relative_action = relative_action.squeeze(0).cpu().numpy() + relative_action = sixd_se3(relative_action) + + # Apply rotation multiplier for more stable convergence + relative_action[:3, :3] = np.linalg.matrix_power( + relative_action[:3, :3], cfg.rotation_matrix_multiplier + ) + + # Compute error before this step + current_pose = robot.pose() + error = compute_pose_error(current_pose, target_pose) + + # Check convergence + is_converged = check_convergence( + error, + trans_tol_mm=cfg.lin_tolerance * 1000, + rot_tol_deg=np.degrees(cfg.ang_tolerance) + ) + + trial_data["iterations"].append({ + "iteration": iteration, + "translation_error_mm": error["translation_mm"], + "rotation_error_deg": error["rotation_deg"], + "converged": is_converged, + }) + + print(f" Iter {iteration + 1:2d}: " + f"Trans={error['translation_mm']:6.2f}mm, " + f"Rot={error['rotation_deg']:6.2f}°, " + f"Converged={is_converged}") + + if is_converged: + converged = True + trial_data["converged"] = True + convergence_count += 1 + break + + # Execute action + target_pose_iter = robot.pose() @ relative_action + robot.servo_to_pose(pose=target_pose_iter, lin_tol=1e-3, ang_tol=1e-2) + iteration += 1 + + except Exception as e: + print(f" Error during trial: {e}") + + # Get final error + final_pose = robot.pose() + final_error = compute_pose_error(final_pose, target_pose) + trial_data["final_error"] = { + "translation_mm": final_error["translation_mm"], + "rotation_deg": final_error["rotation_deg"], + } + trial_data["num_iterations"] = iteration + 1 + + all_final_errors.append(final_error) + results["trials"].append(trial_data) + + print(f" Final: Trans={final_error['translation_mm']:.2f}mm, " + f"Rot={final_error['rotation_deg']:.2f}°, " + f"Iters={trial_data['num_iterations']}") + + # Compute summary statistics + stats = compute_statistics(all_final_errors) + results["summary"] = { + "convergence_rate": convergence_count / num_trials, + "convergence_count": convergence_count, + "translation": stats["translation"], + "rotation": stats["rotation"], + "avg_iterations": np.mean([t["num_iterations"] for t in results["trials"]]), + } + + robot.disconnect() + + return results + + +def save_results(results, output_dir: str = "./benchmark/results"): + """Save benchmark results to JSON.""" + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + result_file = output_path / f"benchmark_{timestamp}.json" + + with open(result_file, "w") as f: + json.dump(results, f, indent=2) + + return result_file + + +def print_summary(results): + """Print benchmark summary to console.""" + print(f"\n{'='*60}") + print(f"BENCHMARK SUMMARY") + print(f"{'='*60}\n") + + summary = results["summary"] + + print(f"Convergence Rate: {summary['convergence_rate']*100:.1f}% " + f"({summary['convergence_count']}/{results['num_trials']})") + print(f"Average Iterations: {summary['avg_iterations']:.1f}") + + print(f"\nTranslation Error (mm):") + print(f" Mean: {summary['translation']['mean_mm']:.2f} ± {summary['translation']['std_mm']:.2f}") + print(f" Range: [{summary['translation']['min_mm']:.2f}, {summary['translation']['max_mm']:.2f}]") + + print(f"\nRotation Error (degrees):") + print(f" Mean: {summary['rotation']['mean_deg']:.2f} ± {summary['rotation']['std_deg']:.2f}") + print(f" Range: [{summary['rotation']['min_deg']:.2f}, {summary['rotation']['max_deg']:.2f}]") + + print(f"\n{'='*60}\n") + + +if __name__ == "__main__": + import sys + + # Parse arguments + num_trials = int(sys.argv[1]) if len(sys.argv) > 1 else 5 + + # Run benchmark + results = run_benchmark(num_trials=num_trials, max_iterations=50) + + # Save and print results + result_file = save_results(results) + print(f"Results saved to: {result_file}") + + print_summary(results) From dbaddfc9456bd70ef79323f253d6c55942dc9cf6 Mon Sep 17 00:00:00 2001 From: stojicnnn Date: Fri, 13 Feb 2026 18:21:51 +0100 Subject: [PATCH 07/11] TEsting new loss fn --- alignit/config.py | 2 +- alignit/infere.py | 38 ++++-- alignit/losses.py | 198 +++++++++++++++++++++++++++++ alignit/robots/xarmsim/__init__.py | 45 ++++++- alignit/train.py | 4 +- 5 files changed, 265 insertions(+), 22 deletions(-) create mode 100644 alignit/losses.py diff --git a/alignit/config.py b/alignit/config.py index 2af3529..c564460 100644 --- a/alignit/config.py +++ b/alignit/config.py @@ -168,7 +168,7 @@ class InferConfig: metadata={"help": "Number of iterations within tolerance before stopping"}, ) rotation_matrix_multiplier: int = field( - default=3, + default=2.0, metadata={ "help": "Number of times to multiply the rotation matrix of relative action in order to speed up convergence" }, diff --git a/alignit/infere.py b/alignit/infere.py index cab3ac3..369ed06 100644 --- a/alignit/infere.py +++ b/alignit/infere.py @@ -45,7 +45,7 @@ def main(cfg: InferConfig): alignment_results = [] # Safety limit: Total attempts allowed before declaring a trial "Failed" - MAX_TOTAL_STEPS = 10000 + MAX_TOTAL_STEPS = 1000 print(f"\nRunning {num_alignments} alignment trials...\n") @@ -95,31 +95,41 @@ def main(cfg: InferConfig): relative_action = relative_action.squeeze(0).cpu().numpy() relative_action = sixd_se3(relative_action) - # Apply rotation matrix power scaling - relative_action[:3, :3] = np.linalg.matrix_power( - relative_action[:3, :3], cfg.rotation_matrix_multiplier - ) + # 4. Calculate error magnitude from ORIGINAL unscaled action + # This represents the actual residual error from the network + error_magnitude = np.linalg.norm(relative_action[:3, 3]) - # 4. Logic Fix: Check Alignment Tolerance - # iterations_within_tolerance tracks consecutive successful alignments + # 5. Check alignment based on ORIGINAL unscaled action + # This is the true convergence check - not affected by scaling if are_tfs_close( relative_action, lin_tol=cfg.lin_tolerance, ang_tol=ang_tol_rad ): iterations_within_tolerance += 1 - print(f"Step {iteration}: Within Tol ({iterations_within_tolerance}/{cfg.max_iterations})") + print(f"Step {iteration}: Within Tol ({iterations_within_tolerance}/{cfg.debouncing_count}) [error: {error_magnitude:.6f}]") else: # Reset if we move outside the tolerance zone iterations_within_tolerance = 0 - print(f"Step {iteration}: Adjusting...") + print(f"Step {iteration}: Adjusting... [error: {error_magnitude:.6f}]") - # 5. Move Robot + # 6. Scale action for robot movement (separate from convergence check) + # Error-magnitude based scaling: bigger errors → bigger movements, small errors → small movements + min_scale = 1.0 + error_scale = max(error_magnitude, min_scale) + + scaled_action = relative_action.copy() + scaled_action[:3, 3] *= error_scale # Scale translation + scaled_action[:3, :3] = np.linalg.matrix_power( + scaled_action[:3, :3], int(cfg.rotation_matrix_multiplier) + ) # Apply rotation power scaling + + # 7. Move Robot current_pose = robot.pose() - target_pose = current_pose @ relative_action + target_pose = current_pose @ scaled_action iteration += 1 robot.send_action({"pose": target_pose, "gripper.pos": 1.0}) - # 6. Exit Conditions + # 8. Exit Conditions # SUCCESS: Remained close for the required number of iterations if iterations_within_tolerance >= cfg.max_iterations: @@ -133,8 +143,8 @@ def main(cfg: InferConfig): [0, 0, 0, 1], ]) robot.servo_to_pose(pose=robot.pose() @ gripper_z_offset) - robot.close_gripper() - robot.gripper_off() + #robot.close_gripper() + #robot.gripper_off() alignment_results.append({ "trial": alignment_trial + 1, diff --git a/alignit/losses.py b/alignit/losses.py new file mode 100644 index 0000000..c529e7c --- /dev/null +++ b/alignit/losses.py @@ -0,0 +1,198 @@ +"""Custom loss functions for alignment training.""" + +import torch +import torch.nn as nn + + +class InverseWeightedPositionLoss(nn.Module): + """ + Inverse-distance weighted loss for position, MSE for orientation. + + Position loss: uses 1/distance weighting so small errors have higher gradient impact. + Orientation loss: standard MSE. + + For predicted output (B, 9): [pos_x, pos_y, pos_z, rot_6d_0...rot_6d_5] + + Example: + - Position error 0.1 m → loss contribution ≈ 100 * error² (high penalty) + - Position error 1.0 m → loss contribution ≈ 1 * error² (low penalty) + - Orientation: always MSE regardless of magnitude + """ + + def __init__(self, pos_weight: float = 1.0, rot_weight: float = 1.0, epsilon: float = 0.01): + """ + Args: + pos_weight: Weight multiplier for position loss + rot_weight: Weight multiplier for rotation loss + epsilon: Small value to prevent division by zero (default: 0.01) + """ + super().__init__() + self.pos_weight = pos_weight + self.rot_weight = rot_weight + self.epsilon = epsilon + + def forward(self, pred, target): + """ + Args: + pred: Predicted values (B, 9) where first 3 are position, last 6 are rotation + target: Target values (B, 9) + + Returns: + Scalar loss value + """ + # Split position and rotation + pred_pos = pred[:, :3] + pred_rot = pred[:, 3:] + target_pos = target[:, :3] + target_rot = target[:, 3:] + + # Position loss: inverse-weighted MSE + # L_pos = (1 / ||target_pos||) * MSE_pos + pos_distance = torch.norm(target_pos, dim=1, keepdim=True) + self.epsilon + pos_mse = (pred_pos - target_pos) ** 2 + pos_loss = (pos_mse / pos_distance).mean() + + # Rotation loss: standard MSE + rot_mse = (pred_rot - target_rot) ** 2 + rot_loss = rot_mse.mean() + + # Combine with weights + total_loss = (self.pos_weight * pos_loss + self.rot_weight * rot_loss) / (self.pos_weight + self.rot_weight) + return total_loss + + +class LogarithmicMSELoss(nn.Module): + """ + Inverse-based MSE loss that penalizes smaller errors more heavily. + + Instead of: L = MSE(pred, target) + We compute: L = MSE / (1 + MSE) + + This amplifies the importance of small errors in the loss landscape. + As error approaches 0, gradients remain significant, encouraging precision. + + The key property: + - When MSE → 0: gradient → base (large relative impact) + - When MSE → ∞: loss → 1 (bounded, handles outliers) + + Example gradients (relative to error magnitude): + - MSE error 0.1 → loss ≈ 0.0909, gradient ≈ 0.826 + - MSE error 1.0 → loss = 0.5, gradient ≈ 0.25 + - MSE error 10.0 → loss ≈ 0.909, gradient ≈ 0.0083 + + Compare to standard MSE: + - MSE error 0.1 → loss = 0.1, gradient = 1.0 + - MSE error 1.0 → loss = 1.0, gradient = 1.0 + - MSE error 10.0 → loss = 10.0, gradient = 1.0 + + The inverse-based version increases the gradient for small errors, + making the optimizer focus more on achieving high precision. + """ + + def __init__(self, base: float = 1.0): + """ + Args: + base: Scaling factor for the MSE in numerator. + Higher values make loss increase faster with error. + (default: 1.0) + """ + super().__init__() + self.base = base + + def forward(self, pred, target): + """ + Args: + pred: Predicted values (B, output_dim) + target: Target values (B, output_dim) + + Returns: + Scalar loss value + """ + mse = torch.mean((pred - target) ** 2) + # MSE / (1 + MSE) ensures bounded loss and high penalty for small errors + inverse_loss = (self.base * mse) / (1.0 + self.base * mse) + return inverse_loss + + +class WeightedLogMSELoss(nn.Module): + """ + Weighted logarithmic MSE loss that emphasizes different components. + + Useful if translation and rotation errors should be weighted differently. + + For SE(3) outputs (3 translation + 6 rotation components): + - Translation: typically 0-0.1 (meters) + - Rotation: typically 0-6 (6D representation) + + You might want to weight them differently. + """ + + def __init__(self, trans_weight: float = 1.0, rot_weight: float = 1.0, base: float = 1.0): + """ + Args: + trans_weight: Weight for first 3 components (translation) + rot_weight: Weight for last 6 components (rotation) + base: Scaling factor for log compression + """ + super().__init__() + self.trans_weight = trans_weight + self.rot_weight = rot_weight + self.base = base + + def forward(self, pred, target): + """ + Args: + pred: Predicted values (B, 9) where first 3 are translation, last 6 are rotation + target: Target values (B, 9) + + Returns: + Scalar loss value + """ + # Split into translation and rotation + pred_trans = pred[:, :3] + pred_rot = pred[:, 3:] + target_trans = target[:, :3] + target_rot = target[:, 3:] + + # Compute MSE for each component + mse_trans = torch.mean((pred_trans - target_trans) ** 2) + mse_rot = torch.mean((pred_rot - target_rot) ** 2) + + # Apply weighting and logarithmic compression + weighted_mse = (self.trans_weight * mse_trans + + self.rot_weight * mse_rot) / (self.trans_weight + self.rot_weight) + + log_loss = torch.log(1.0 + self.base * weighted_mse) + return log_loss + + +class SmoothL1LogLoss(nn.Module): + """ + Hybrid loss combining Smooth L1 (Huber) with logarithmic compression. + + Smooth L1 is robust to outliers and combines best of L1 and L2. + Adding log makes small errors have bigger impact. + """ + + def __init__(self, beta: float = 1.0, base: float = 1.0): + """ + Args: + beta: Threshold for Smooth L1 (default: 1.0) + base: Log compression scaling + """ + super().__init__() + self.smooth_l1 = nn.SmoothL1Loss(beta=beta) + self.base = base + + def forward(self, pred, target): + """ + Args: + pred: Predicted values + target: Target values + + Returns: + Scalar loss value + """ + smooth_l1_loss = self.smooth_l1(pred, target) + log_loss = torch.log(1.0 + self.base * smooth_l1_loss) + return log_loss diff --git a/alignit/robots/xarmsim/__init__.py b/alignit/robots/xarmsim/__init__.py index 3dcce63..dbe72d8 100644 --- a/alignit/robots/xarmsim/__init__.py +++ b/alignit/robots/xarmsim/__init__.py @@ -98,6 +98,8 @@ def reset(self): random_pos, t3d.euler.euler2mat(roll, pitch, yaw), [1, 1, 1] ) self._set_object_pose("pickup_object", pose) + # Lock the object joint to prevent it from falling/moving + self._lock_object_joint("pickup_object") # Disable collisions for the pickup object so it doesn't get pushed # by the robot during alignment trials. try: @@ -131,8 +133,29 @@ def _set_object_pose(self, object_name, pose_matrix): self.data.qvel[qvel_adr : qvel_adr + 6] = 0 mj.mj_forward(self.model, self.data) + def _lock_object_joint(self, object_name: str): + """Lock object joint to prevent any movement (translation or rotation).""" + try: + body_id = self.model.body(object_name).id + joint_id = self.model.body_jntadr[body_id] + + if joint_id >= 0: + qvel_adr = self.model.jnt_dofadr[joint_id] + # Lock all 6 DOFs (3 translation + 3 rotation for free joint) + self.data.qvel[qvel_adr : qvel_adr + 6] = 0 + + # Set very high damping on the joint to resist any motion + try: + dof_adr = self.model.jnt_dofadr[joint_id] + for i in range(6): + self.model.dof_damping[dof_adr + i] = 100.0 # Very high damping + except Exception: + pass + except Exception: + pass + def _disable_object_collisions(self, object_name: str): - """Disable collisions for all geoms belonging to a body. + """Disable collisions AND gravity for object to keep it stationary. This prevents the object from being pushed or reacting to contacts when the robot touches it during benchmarking. @@ -142,19 +165,28 @@ def _disable_object_collisions(self, object_name: str): except Exception: return - # body_geomadr is the start index of geoms for this body - # body_geomnum is the number of geoms + # 1. Disable collisions: set contact type and affinity to 0 start = int(self.model.body_geomadr[body_id]) count = int(self.model.body_geomnum[body_id]) for i in range(start, start + count): try: - # Set contact type and affinity to 0 to disable contacts self.model.geom_contype[i] = 0 self.model.geom_conaffinity[i] = 0 except Exception: - # Some mujoco bindings may not allow assignment; ignore failures pass + # 2. Disable gravity on object by setting mass to near-zero + try: + self.model.body_mass[body_id] = 0.001 + except Exception: + pass + + # 3. Set inertia to near-zero to prevent rotation from contacts + try: + self.model.body_inertia[body_id] = [0.0001, 0.0001, 0.0001] + except Exception: + pass + def close_gripper(self): self._set_gripper_position(self.gripper_close_pos) @@ -189,6 +221,9 @@ def send_action(self, action): self.data.ctrl[self.mujoco_actuator_ids] = target_joint_qpos_for_mujoco mj.mj_step(self.model, self.data) + + # Lock object joint every frame to prevent any motion + self._lock_object_joint("pickup_object") self.viewer.sync() return True diff --git a/alignit/train.py b/alignit/train.py index 1bc60a1..b8eeea4 100644 --- a/alignit/train.py +++ b/alignit/train.py @@ -1,7 +1,6 @@ import torch from torch.utils.data import DataLoader from torch.optim import Adam -from torch.nn import MSELoss from tqdm import tqdm from datasets import load_from_disk from torchvision import transforms @@ -10,6 +9,7 @@ from alignit.config import TrainConfig from alignit.models.alignnet import AlignNet +from alignit.losses import InverseWeightedPositionLoss def collate_fn(batch): @@ -48,7 +48,7 @@ def main(cfg: TrainConfig): ) optimizer = Adam(net.parameters(), lr=cfg.learning_rate) - criterion = MSELoss() + criterion = InverseWeightedPositionLoss(pos_weight=1.0, rot_weight=1.0, epsilon=0.01) net.train() for epoch in range(cfg.epochs): From fc1f84116282e8ace22180761d869f621ed9d34c Mon Sep 17 00:00:00 2001 From: stojicnnn Date: Sat, 21 Feb 2026 10:19:50 +0100 Subject: [PATCH 08/11] Sim working fine --- alignit/losses.py | 193 ++-------------------------------------------- alignit/train.py | 4 +- 2 files changed, 10 insertions(+), 187 deletions(-) diff --git a/alignit/losses.py b/alignit/losses.py index c529e7c..0264667 100644 --- a/alignit/losses.py +++ b/alignit/losses.py @@ -1,198 +1,21 @@ -"""Custom loss functions for alignment training.""" - import torch import torch.nn as nn - -class InverseWeightedPositionLoss(nn.Module): - """ - Inverse-distance weighted loss for position, MSE for orientation. - - Position loss: uses 1/distance weighting so small errors have higher gradient impact. - Orientation loss: standard MSE. - - For predicted output (B, 9): [pos_x, pos_y, pos_z, rot_6d_0...rot_6d_5] - - Example: - - Position error 0.1 m → loss contribution ≈ 100 * error² (high penalty) - - Position error 1.0 m → loss contribution ≈ 1 * error² (low penalty) - - Orientation: always MSE regardless of magnitude - """ - - def __init__(self, pos_weight: float = 1.0, rot_weight: float = 1.0, epsilon: float = 0.01): - """ - Args: - pos_weight: Weight multiplier for position loss - rot_weight: Weight multiplier for rotation loss - epsilon: Small value to prevent division by zero (default: 0.01) - """ +class InversePredictionWeightedLoss(nn.Module): + def __init__(self, epsilon: float = 1e-6): super().__init__() - self.pos_weight = pos_weight - self.rot_weight = rot_weight self.epsilon = epsilon - + def forward(self, pred, target): - """ - Args: - pred: Predicted values (B, 9) where first 3 are position, last 6 are rotation - target: Target values (B, 9) - - Returns: - Scalar loss value - """ - # Split position and rotation pred_pos = pred[:, :3] pred_rot = pred[:, 3:] target_pos = target[:, :3] target_rot = target[:, 3:] - - # Position loss: inverse-weighted MSE - # L_pos = (1 / ||target_pos||) * MSE_pos - pos_distance = torch.norm(target_pos, dim=1, keepdim=True) + self.epsilon - pos_mse = (pred_pos - target_pos) ** 2 - pos_loss = (pos_mse / pos_distance).mean() - - # Rotation loss: standard MSE - rot_mse = (pred_rot - target_rot) ** 2 - rot_loss = rot_mse.mean() - - # Combine with weights - total_loss = (self.pos_weight * pos_loss + self.rot_weight * rot_loss) / (self.pos_weight + self.rot_weight) - return total_loss - - -class LogarithmicMSELoss(nn.Module): - """ - Inverse-based MSE loss that penalizes smaller errors more heavily. - - Instead of: L = MSE(pred, target) - We compute: L = MSE / (1 + MSE) - - This amplifies the importance of small errors in the loss landscape. - As error approaches 0, gradients remain significant, encouraging precision. - - The key property: - - When MSE → 0: gradient → base (large relative impact) - - When MSE → ∞: loss → 1 (bounded, handles outliers) - - Example gradients (relative to error magnitude): - - MSE error 0.1 → loss ≈ 0.0909, gradient ≈ 0.826 - - MSE error 1.0 → loss = 0.5, gradient ≈ 0.25 - - MSE error 10.0 → loss ≈ 0.909, gradient ≈ 0.0083 - - Compare to standard MSE: - - MSE error 0.1 → loss = 0.1, gradient = 1.0 - - MSE error 1.0 → loss = 1.0, gradient = 1.0 - - MSE error 10.0 → loss = 10.0, gradient = 1.0 - - The inverse-based version increases the gradient for small errors, - making the optimizer focus more on achieving high precision. - """ - - def __init__(self, base: float = 1.0): - """ - Args: - base: Scaling factor for the MSE in numerator. - Higher values make loss increase faster with error. - (default: 1.0) - """ - super().__init__() - self.base = base - - def forward(self, pred, target): - """ - Args: - pred: Predicted values (B, output_dim) - target: Target values (B, output_dim) - - Returns: - Scalar loss value - """ - mse = torch.mean((pred - target) ** 2) - # MSE / (1 + MSE) ensures bounded loss and high penalty for small errors - inverse_loss = (self.base * mse) / (1.0 + self.base * mse) - return inverse_loss + weights_pos = 1.0 / (torch.abs(pred_pos) + self.epsilon) + pos_loss = (weights_pos * (pred_pos - target_pos) ** 2).mean() -class WeightedLogMSELoss(nn.Module): - """ - Weighted logarithmic MSE loss that emphasizes different components. - - Useful if translation and rotation errors should be weighted differently. - - For SE(3) outputs (3 translation + 6 rotation components): - - Translation: typically 0-0.1 (meters) - - Rotation: typically 0-6 (6D representation) - - You might want to weight them differently. - """ - - def __init__(self, trans_weight: float = 1.0, rot_weight: float = 1.0, base: float = 1.0): - """ - Args: - trans_weight: Weight for first 3 components (translation) - rot_weight: Weight for last 6 components (rotation) - base: Scaling factor for log compression - """ - super().__init__() - self.trans_weight = trans_weight - self.rot_weight = rot_weight - self.base = base - - def forward(self, pred, target): - """ - Args: - pred: Predicted values (B, 9) where first 3 are translation, last 6 are rotation - target: Target values (B, 9) - - Returns: - Scalar loss value - """ - # Split into translation and rotation - pred_trans = pred[:, :3] - pred_rot = pred[:, 3:] - target_trans = target[:, :3] - target_rot = target[:, 3:] - - # Compute MSE for each component - mse_trans = torch.mean((pred_trans - target_trans) ** 2) - mse_rot = torch.mean((pred_rot - target_rot) ** 2) - - # Apply weighting and logarithmic compression - weighted_mse = (self.trans_weight * mse_trans + - self.rot_weight * mse_rot) / (self.trans_weight + self.rot_weight) - - log_loss = torch.log(1.0 + self.base * weighted_mse) - return log_loss + rot_loss = torch.mean((pred_rot - target_rot) ** 2) - -class SmoothL1LogLoss(nn.Module): - """ - Hybrid loss combining Smooth L1 (Huber) with logarithmic compression. - - Smooth L1 is robust to outliers and combines best of L1 and L2. - Adding log makes small errors have bigger impact. - """ - - def __init__(self, beta: float = 1.0, base: float = 1.0): - """ - Args: - beta: Threshold for Smooth L1 (default: 1.0) - base: Log compression scaling - """ - super().__init__() - self.smooth_l1 = nn.SmoothL1Loss(beta=beta) - self.base = base - - def forward(self, pred, target): - """ - Args: - pred: Predicted values - target: Target values - - Returns: - Scalar loss value - """ - smooth_l1_loss = self.smooth_l1(pred, target) - log_loss = torch.log(1.0 + self.base * smooth_l1_loss) - return log_loss + loss = pos_loss + rot_loss + return loss \ No newline at end of file diff --git a/alignit/train.py b/alignit/train.py index b8eeea4..b59e817 100644 --- a/alignit/train.py +++ b/alignit/train.py @@ -9,7 +9,7 @@ from alignit.config import TrainConfig from alignit.models.alignnet import AlignNet -from alignit.losses import InverseWeightedPositionLoss +from alignit.losses import InversePredictionWeightedLoss def collate_fn(batch): @@ -48,7 +48,7 @@ def main(cfg: TrainConfig): ) optimizer = Adam(net.parameters(), lr=cfg.learning_rate) - criterion = InverseWeightedPositionLoss(pos_weight=1.0, rot_weight=1.0, epsilon=0.01) + criterion = InversePredictionWeightedLoss(epsilon=0.01) net.train() for epoch in range(cfg.epochs): From 012b2269098c455e868b702407424834a8f384ce Mon Sep 17 00:00:00 2001 From: stojicnnn Date: Fri, 6 Mar 2026 16:49:43 +0100 Subject: [PATCH 09/11] Sim working fine, issues with real robot, compared values, in sim the robot converges and minimizes the error while in the real robot, it does not --- alignit/config.py | 10 +++++----- alignit/infere.py | 40 ++++++++++++++++++++++++++++------------ alignit/robots/xarm.py | 2 +- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/alignit/config.py b/alignit/config.py index c564460..d0de32b 100644 --- a/alignit/config.py +++ b/alignit/config.py @@ -151,24 +151,24 @@ class InferConfig: metadata={"help": "Starting pose RPY angles"}, ) lin_tolerance: float = field( - default=5e-3, metadata={"help": "Linear tolerance for convergence (meters)"} + default=1e-3, metadata={"help": "Linear tolerance for convergence (meters)"} ) ang_tolerance: float = field( - default=5, metadata={"help": "Angular tolerance for convergence (degrees)"} + default=2, metadata={"help": "Angular tolerance for convergence (degrees)"} ) max_iterations: Optional[int] = field( - default=20, + default=5, metadata={"help": "Maximum iterations before stopping (None = infinite)"}, ) debug_output: bool = field( default=True, metadata={"help": "Print debug information during inference"} ) debouncing_count: int = field( - default=20, + default=5, metadata={"help": "Number of iterations within tolerance before stopping"}, ) rotation_matrix_multiplier: int = field( - default=2.0, + default=3, metadata={ "help": "Number of times to multiply the rotation matrix of relative action in order to speed up convergence" }, diff --git a/alignit/infere.py b/alignit/infere.py index 369ed06..a317d67 100644 --- a/alignit/infere.py +++ b/alignit/infere.py @@ -56,12 +56,8 @@ def main(cfg: InferConfig): # 1. Randomize Start Pose start_pose = t3d.affines.compose( - [np.random.uniform(0.15, 0.30), - np.random.uniform(-0.15, 0.15), - np.random.uniform(0.20, 0.35)], - t3d.euler.euler2mat(np.pi + np.random.uniform(-0.3, 0.3), - np.random.uniform(-0.3, 0.3), - np.random.uniform(-np.pi, np.pi)), + [0.225, 0.0, 0.275], + t3d.euler.euler2mat(np.pi, 0.0, 0.0), [1, 1, 1] ) robot.servo_to_pose(start_pose, lin_tol=1e-2, ang_tol=0.1) @@ -73,6 +69,7 @@ def main(cfg: InferConfig): try: while True: # 2. Get Observation and Preprocess + observation = robot.get_observation() rgb_np = observation["rgb"].astype(np.float32) / 255.0 @@ -90,13 +87,31 @@ def main(cfg: InferConfig): # 3. Model Inference with torch.no_grad(): - relative_action = net(rgb_images_batch) + # Capture the raw tensor from the network + raw_model_output = net(rgb_images_batch) + + # Convert to numpy and transform to 4x4 SE3 matrix + # Note: 'raw_model_output' is what we just got from the net + relative_action_np = raw_model_output.squeeze(0).cpu().numpy() + relative_action = sixd_se3(relative_action_np) + + # --- DEBUG PRINTING --- + # 1. Handle the rotation (with NumPy 2.0 safety) + rot_mat = np.array(relative_action[:3, :3], dtype=np.float64) + try: + euler_rad = t3d.euler.mat2euler(rot_mat) + euler_deg = np.degrees(euler_rad) + except Exception: + euler_deg = [0.0, 0.0, 0.0] + + # 2. Handle the translation + trans = relative_action[:3, 3] - relative_action = relative_action.squeeze(0).cpu().numpy() - relative_action = sixd_se3(relative_action) + print(f"\n--- Model Prediction (Relative to EE) ---") + print(f"Translation (m): X: {trans[0]:.4f}, Y: {trans[1]:.4f}, Z: {trans[2]:.4f}") + print(f"Rotation (deg): R: {euler_deg[0]:.2f}, P: {euler_deg[1]:.2f}, Y: {euler_deg[2]:.2f}") + print(f"-----------------------------------------\n") - # 4. Calculate error magnitude from ORIGINAL unscaled action - # This represents the actual residual error from the network error_magnitude = np.linalg.norm(relative_action[:3, 3]) # 5. Check alignment based on ORIGINAL unscaled action @@ -113,7 +128,7 @@ def main(cfg: InferConfig): # 6. Scale action for robot movement (separate from convergence check) # Error-magnitude based scaling: bigger errors → bigger movements, small errors → small movements - min_scale = 1.0 + min_scale = 0.0 error_scale = max(error_magnitude, min_scale) scaled_action = relative_action.copy() @@ -127,6 +142,7 @@ def main(cfg: InferConfig): target_pose = current_pose @ scaled_action iteration += 1 + input("Hold ENTER to move robot (release to continue)...") robot.send_action({"pose": target_pose, "gripper.pos": 1.0}) # 8. Exit Conditions diff --git a/alignit/robots/xarm.py b/alignit/robots/xarm.py index 80f078c..ee9125f 100644 --- a/alignit/robots/xarm.py +++ b/alignit/robots/xarm.py @@ -107,7 +107,7 @@ def reset(self, cfg: RecordConfig): self.servo_to_pose(pose=final_pose) current_pose = self.pose() pose_start = current_pose @ t3d.affines.compose( - [0, 0, -0.01], t3d.euler.euler2mat(0, 0, 0), [1, 1, 1] + [0, 0, 0.01], t3d.euler.euler2mat(0, 0, 0), [1, 1, 1] ) pose_alignment_target = current_pose From b38f6f2f749e1b392d32d24c7461a17e6e7d9e74 Mon Sep 17 00:00:00 2001 From: stojicnnn Date: Fri, 13 Mar 2026 15:35:35 +0100 Subject: [PATCH 10/11] Real robot fixed, aligning but not perfect,trying out improvements next --- alignit/config.py | 4 ++-- alignit/infere.py | 33 ++++----------------------------- alignit/record.py | 4 ++-- alignit/robots/xarm.py | 2 +- 4 files changed, 9 insertions(+), 34 deletions(-) diff --git a/alignit/config.py b/alignit/config.py index d0de32b..84913d3 100644 --- a/alignit/config.py +++ b/alignit/config.py @@ -151,10 +151,10 @@ class InferConfig: metadata={"help": "Starting pose RPY angles"}, ) lin_tolerance: float = field( - default=1e-3, metadata={"help": "Linear tolerance for convergence (meters)"} + default=2e-3, metadata={"help": "Linear tolerance for convergence (meters)"} ) ang_tolerance: float = field( - default=2, metadata={"help": "Angular tolerance for convergence (degrees)"} + default=4, metadata={"help": "Angular tolerance for convergence (degrees)"} ) max_iterations: Optional[int] = field( default=5, diff --git a/alignit/infere.py b/alignit/infere.py index a317d67..15773c7 100644 --- a/alignit/infere.py +++ b/alignit/infere.py @@ -22,7 +22,6 @@ def main(cfg: InferConfig): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") - # Initialize Model net = AlignNet( backbone_name=cfg.model.backbone, backbone_weights=cfg.model.backbone_weights, @@ -37,14 +36,12 @@ def main(cfg: InferConfig): net.to(device) net.eval() - # Initialize Robot - robot = XarmSim() + robot = Xarm() num_alignments = getattr(cfg, 'num_alignments', 5) ang_tol_rad = np.deg2rad(cfg.ang_tolerance) alignment_results = [] - # Safety limit: Total attempts allowed before declaring a trial "Failed" MAX_TOTAL_STEPS = 1000 print(f"\nRunning {num_alignments} alignment trials...\n") @@ -54,7 +51,6 @@ def main(cfg: InferConfig): print(f"Alignment Trial {alignment_trial + 1}/{num_alignments}") print(f"{'='*60}") - # 1. Randomize Start Pose start_pose = t3d.affines.compose( [0.225, 0.0, 0.275], t3d.euler.euler2mat(np.pi, 0.0, 0.0), @@ -68,7 +64,6 @@ def main(cfg: InferConfig): try: while True: - # 2. Get Observation and Preprocess observation = robot.get_observation() rgb_np = observation["rgb"].astype(np.float32) / 255.0 @@ -85,18 +80,12 @@ def main(cfg: InferConfig): .to(device) ) - # 3. Model Inference with torch.no_grad(): - # Capture the raw tensor from the network raw_model_output = net(rgb_images_batch) - # Convert to numpy and transform to 4x4 SE3 matrix - # Note: 'raw_model_output' is what we just got from the net relative_action_np = raw_model_output.squeeze(0).cpu().numpy() relative_action = sixd_se3(relative_action_np) - # --- DEBUG PRINTING --- - # 1. Handle the rotation (with NumPy 2.0 safety) rot_mat = np.array(relative_action[:3, :3], dtype=np.float64) try: euler_rad = t3d.euler.mat2euler(rot_mat) @@ -104,7 +93,6 @@ def main(cfg: InferConfig): except Exception: euler_deg = [0.0, 0.0, 0.0] - # 2. Handle the translation trans = relative_action[:3, 3] print(f"\n--- Model Prediction (Relative to EE) ---") @@ -114,30 +102,24 @@ def main(cfg: InferConfig): error_magnitude = np.linalg.norm(relative_action[:3, 3]) - # 5. Check alignment based on ORIGINAL unscaled action - # This is the true convergence check - not affected by scaling if are_tfs_close( relative_action, lin_tol=cfg.lin_tolerance, ang_tol=ang_tol_rad ): iterations_within_tolerance += 1 print(f"Step {iteration}: Within Tol ({iterations_within_tolerance}/{cfg.debouncing_count}) [error: {error_magnitude:.6f}]") else: - # Reset if we move outside the tolerance zone iterations_within_tolerance = 0 print(f"Step {iteration}: Adjusting... [error: {error_magnitude:.6f}]") - # 6. Scale action for robot movement (separate from convergence check) - # Error-magnitude based scaling: bigger errors → bigger movements, small errors → small movements min_scale = 0.0 error_scale = max(error_magnitude, min_scale) scaled_action = relative_action.copy() - scaled_action[:3, 3] *= error_scale # Scale translation + scaled_action[:3, 3] *= error_scale scaled_action[:3, :3] = np.linalg.matrix_power( scaled_action[:3, :3], int(cfg.rotation_matrix_multiplier) - ) # Apply rotation power scaling + ) - # 7. Move Robot current_pose = robot.pose() target_pose = current_pose @ scaled_action iteration += 1 @@ -145,13 +127,10 @@ def main(cfg: InferConfig): input("Hold ENTER to move robot (release to continue)...") robot.send_action({"pose": target_pose, "gripper.pos": 1.0}) - # 8. Exit Conditions - # SUCCESS: Remained close for the required number of iterations if iterations_within_tolerance >= cfg.max_iterations: print(f"✓ Converged after {iteration} total steps.") - # Finalize: Move to height offset and close gripper gripper_z_offset = np.array([ [1, 0, 0, 0], [0, 1, 0, 0], @@ -159,8 +138,6 @@ def main(cfg: InferConfig): [0, 0, 0, 1], ]) robot.servo_to_pose(pose=robot.pose() @ gripper_z_offset) - #robot.close_gripper() - #robot.gripper_off() alignment_results.append({ "trial": alignment_trial + 1, @@ -169,7 +146,6 @@ def main(cfg: InferConfig): }) break - # FAILURE: Safety timeout reached if iteration >= MAX_TOTAL_STEPS: print(f"✗ Failed: Timeout reached ({MAX_TOTAL_STEPS} steps).") alignment_results.append({ @@ -183,7 +159,6 @@ def main(cfg: InferConfig): print("\nTrial interrupted by user.") break - # Summary Statistics print(f"\n{'='*60}") print(f"INFERENCE SUMMARY") print(f"{'='*60}") @@ -195,4 +170,4 @@ def main(cfg: InferConfig): robot.disconnect() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/alignit/record.py b/alignit/record.py index d1ae283..f14bf38 100644 --- a/alignit/record.py +++ b/alignit/record.py @@ -81,7 +81,7 @@ def generate_spiral_trajectory(start_pose, cfg): @draccus.wrap() def main(cfg: RecordConfig): """Record alignment dataset using configuration parameters.""" - robot = XarmSim() + robot = Xarm() features = Features( { @@ -91,7 +91,7 @@ def main(cfg: RecordConfig): ) for episode in range(cfg.episodes): - pose_start, pose_alignment_target = robot.reset() + pose_start, pose_alignment_target = robot.reset(cfg) trajectory = generate_spiral_trajectory(pose_start, cfg.trajectory) frames = [] for pose in trajectory: diff --git a/alignit/robots/xarm.py b/alignit/robots/xarm.py index ee9125f..80f078c 100644 --- a/alignit/robots/xarm.py +++ b/alignit/robots/xarm.py @@ -107,7 +107,7 @@ def reset(self, cfg: RecordConfig): self.servo_to_pose(pose=final_pose) current_pose = self.pose() pose_start = current_pose @ t3d.affines.compose( - [0, 0, 0.01], t3d.euler.euler2mat(0, 0, 0), [1, 1, 1] + [0, 0, -0.01], t3d.euler.euler2mat(0, 0, 0), [1, 1, 1] ) pose_alignment_target = current_pose From b2b840001f55895e99c846384f2d285944d0a367 Mon Sep 17 00:00:00 2001 From: stojicnnn Date: Sat, 14 Mar 2026 12:46:21 +0100 Subject: [PATCH 11/11] Add translation multiplier to InferConfig and update scaling in inference --- alignit/config.py | 4 ++++ alignit/infere.py | 7 +++---- alignit/train.py | 4 +++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/alignit/config.py b/alignit/config.py index 84913d3..f6666ce 100644 --- a/alignit/config.py +++ b/alignit/config.py @@ -173,6 +173,10 @@ class InferConfig: "help": "Number of times to multiply the rotation matrix of relative action in order to speed up convergence" }, ) + translation_multiplier: float = field( + default=1.0, + metadata={"help": "Multiplier for the translation relative action to adjust convergence speed"} + ) manual_height: float = field( default=0.08, metadata={"help": "Height above surface for manual movement"} ) diff --git a/alignit/infere.py b/alignit/infere.py index 15773c7..ff2f49a 100644 --- a/alignit/infere.py +++ b/alignit/infere.py @@ -10,6 +10,7 @@ from alignit.utils.tfs import print_pose, are_tfs_close from alignit.robots.xarmsim import XarmSim + Xarm = None try: from alignit.robots.xarm import Xarm @@ -111,11 +112,9 @@ def main(cfg: InferConfig): iterations_within_tolerance = 0 print(f"Step {iteration}: Adjusting... [error: {error_magnitude:.6f}]") - min_scale = 0.0 - error_scale = max(error_magnitude, min_scale) - scaled_action = relative_action.copy() - scaled_action[:3, 3] *= error_scale + translation_mult = getattr(cfg, 'translation_multiplier', 1.0) + scaled_action[:3, 3] *= translation_mult scaled_action[:3, :3] = np.linalg.matrix_power( scaled_action[:3, :3], int(cfg.rotation_matrix_multiplier) ) diff --git a/alignit/train.py b/alignit/train.py index b59e817..a2197ba 100644 --- a/alignit/train.py +++ b/alignit/train.py @@ -10,6 +10,8 @@ from alignit.config import TrainConfig from alignit.models.alignnet import AlignNet from alignit.losses import InversePredictionWeightedLoss +#import mse loss for comparison +from torch.nn import MSELoss def collate_fn(batch): @@ -48,7 +50,7 @@ def main(cfg: TrainConfig): ) optimizer = Adam(net.parameters(), lr=cfg.learning_rate) - criterion = InversePredictionWeightedLoss(epsilon=0.01) + criterion = InversePredictionWeightedLoss(epsilon=0.01) net.train() for epoch in range(cfg.epochs):