From 7cc0f7aa01a900cbec8a86f5085d2de77d3b0afc Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 10:50:13 -0500
Subject: [PATCH 01/21] Initial issue commit.

---
 scan-batch-dir | 1119 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 874 insertions(+), 245 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 0b70f63..5a9fa25 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -1,4 +1,5 @@
-#!/usr/bin/python3
+#!/usr/bin/python3.12
+
 import yaml
 import argparse
 import os
@@ -11,10 +12,14 @@ import logging
 import openpyxl
 import csv
 import io
+import re
+import requests
 import pandas as pd
 from google.oauth2 import service_account
 from googleapiclient.discovery import build
 
+pd.set_option('future.no_silent_downcasting', True)
+
 # Setup the log file format.
 log_formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(levelname)s %(message)s',datefmt="%Y%m%d %H:%M:%S")
 
@@ -172,7 +177,7 @@ def read_google_sheet(spreadsheet_id: str, sheet_name="Sheet1", credentials_file
 
             # Create DataFrame
             df = pd.DataFrame(padded_rows, columns=headers)
-        
+
             return df
 
     except Exception as e:
@@ -234,7 +239,7 @@ def update_dataframe(df: pd.DataFrame, match_column: str, match_value: str, upda
 
     Args:
         df (pandas.DataFrame): The DataFrame containing updated data.
-        match_column (str): 
+        match_column (str):
         match_value (str):
         update_dict (dict):
 
@@ -268,7 +273,7 @@ def update_dataframe(df: pd.DataFrame, match_column: str, match_value: str, upda
             if column in df_copy.columns:
                 df_copy.loc[mask, column] = value
             else:
-                logger.warning(f"update_dataframe: Column:{column} does not exist - unable to update with value: {value}.") 
+                logger.warning(f"update_dataframe: Column:{column} does not exist - unable to update with value: {value}.")
                 print(f"update_dataframe: Column:{column} does not exist - unable to update with value: {value}.")
 
         # Clean Dataframe of NaN values.
@@ -278,18 +283,18 @@ def update_dataframe(df: pd.DataFrame, match_column: str, match_value: str, upda
 
     except Exception as e:
         return df, False, f"update_dataframe - An error occurred: {str(e)}"
-    
+
 
 
 def add_row_to_dataframe(df, row_data, key_column):
     """
     Add a new row to a pandas DataFrame, ensuring the key column value is unique.
-    
+
     Parameters:
     df (pandas.DataFrame): The input DataFrame
     row_data (dict): Dictionary containing the new row data with column names as keys
     key_column (str): The column name to check for uniqueness
-    
+
     Returns:
     pandas.DataFrame: DataFrame with the new row appended if key is unique, original DataFrame otherwise
     """
@@ -302,26 +307,26 @@ def add_row_to_dataframe(df, row_data, key_column):
         # Check if key_column exists in DataFrame
         if key_column not in df.columns:
             raise ValueError(f"add_row_to_dataframe - Key column '{key_column}' not found in DataFrame")
-    
+
         # Check if key_column exists in row_data
         if key_column not in row_data:
             raise ValueError(f"add_row_to_dataframe - Key column '{key_column}' not found in row_data")
-    
+
         # Check if the key value already exists
         if row_data[key_column] in df[key_column].values:
             #print(f"Warning: Key value '{row_data[key_column]}' already exists in column '{key_column}'. Row not added.")
             return df, False, "add_row_to_dataframe - Row with Key column {key_column} already exists."
-    
+
         # Convert row_data to DataFrame
         new_row = pd.DataFrame([row_data])
-    
+
         # Append the new row to DataFrame
         df_ret = pd.concat([df, new_row], ignore_index=True)
 
         # Remove any NaN values.
         df = df_ret.fillna('')
 
-        return df, True, f"add_row_to_dataframe - Successfully added dataframe row" 
+        return df, True, f"add_row_to_dataframe - Successfully added dataframe row"
 
     except Exception as e:
         return df, False, f"add_row_to_dataframe - Failed to add dataframe row: {str(e)}"
@@ -355,10 +360,10 @@ def value_exists_in_column(df, column_name, value):
 def scan_directory(directory):
     """
     Recursively scan a directory and return a list of files and subdirectories.
-    
+
     Args:
         directory (str): Path to the directory to scan.
-    
+
     Returns:
         list: List of tuples containing file/subdirectory name, type, and path.
     """
@@ -381,7 +386,7 @@ def convert_tiff_to_jp2(tif_file_path, jp2_file_path):
         tif_file_path (str): A full path the TIFF file.
         jp2_file_path (str): A full path to the resulting JP2 file.
     Returns:
-        None 
+        None
     """
     args = [
         "/usr/bin/gm",
@@ -395,12 +400,12 @@ def convert_tiff_to_jp2(tif_file_path, jp2_file_path):
         "-define",
         "jp2:rate=1.0",
         "-define",
-        "jp2:lazy", 
+        "jp2:lazy",
         "-define",
         "jp2:prg=rlcp",
         "-define",
         "jp2:mode=int",
-        "-define", 
+        "-define",
         "jp2:ilyrrates='0.015625,0.01858,0.0221,0.025,0.03125,0.03716,0.04419,0.05,0.0625,0.075,0.088,0.1,0.125,0.15,0.18,0.21,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.84'",
         tif_file_path,
         jp2_file_path
@@ -411,7 +416,8 @@ def convert_tiff_to_jp2(tif_file_path, jp2_file_path):
         if (result.returncode == 0):
             logger.info(f"Successfully converted TIFF to JP2: {tif_file_path}")
     except Exception as e:
-        raise Exception(f"Failed to convert TIFF to JP2: {str(e)}")
+        logger.error(f"Failed to convert TIFF to JP2: {str(e)}")
+        #raise Exception(f"Failed to convert TIFF to JP2: {str(e)}")
 
 def is_valid_image(image_path):
     """
@@ -510,12 +516,13 @@ def parse_arguments():
     parser.add_argument('--config-file', dest="config_file", required=True, help='Path to the YAML configuration file.')
     parser.add_argument('--log-file', dest="log_file", required=True, help='Path to the log file.')
     parser.add_argument('--directory', dest="directory", required=True, help='Path to the directory to scan.')
+    parser.add_argument('--model', dest="model", required=True, help='The Islandora Model associated with the batch.')
 
     # Optional arguments
     parser.add_argument('--in-google-sheet-id', dest="in_gs_id", help='Google Sheet ID related to the directory.')
     parser.add_argument('--in-google-sheet-name', dest="in_gs_name", help='Google Sheet Tab Name.')
     parser.add_argument('--in-google-creds-file', dest="in_gs_creds", help='Google Credentials json file.')
-    
+
     # Parse Arguments
     args = parser.parse_args()
 
@@ -524,7 +531,7 @@ def parse_arguments():
 def process_tiff(file_path:str):
     """
     Processes a TIFF (.tif) file and converts it to a JP2 (.jp2) file.
-    
+
     Args:
         file_path (str) The path to incoming TIFF file.
 
@@ -543,9 +550,13 @@ def process_tiff(file_path:str):
     out_pid  = f"{parent}-{pid}"
 
     logger.info(f"Tiff: Creating JP2: {jp2_path}")
-    print(f"Tiff: Creating JP2: {jp2_path} from {file_path}")
     if not os.path.exists(jp2_path):
+        logger.info("Creating JP2: In: {file_path} Out: {jp2_path}")
+        print(f"  Creating JP2:\n    In: {file_path}\n    Out: {jp2_path}")
         convert_tiff_to_jp2(file_path, jp2_path)
+    else:
+        logger.warning("JP2 Already Exists: {jp2_path}")
+        print(f"  JP2 Already Exists: {jp2_path}")
 
     return out_pid,jp2_path
 
@@ -590,7 +601,7 @@ def process_transcript(file_path:str):
 def process_thumbnail(file_path:str):
     """
     Determine the pid and path to the thumbnail file.
-    
+
     Args:
         file_path (str) The path to the incoming thumbnail.
 
@@ -622,7 +633,7 @@ def process_unknown(file_path:str):
     pid,parent,file,dir,ext = get_file_info(file_path)
     out_pid = f"{parent}-{pid}"
     unknown_path = f"{dir}/{pid}{ext}"
-    
+
     return out_pid,unknown_path
 
 
@@ -650,7 +661,31 @@ def get_file_info(file_path: str):
     return(pid,parent,file,dir,ext)
 
 
-def add_update_dataframe(df: pd.DataFrame, pid: str, row_data, file_name: str):
+def dump_df_columns(df: pd.DataFrame, columns: list):
+    """
+    Prints a DataFrame containing only the specified columns.
+
+    Parameters:
+        df (pd.DataFrame): The original DataFrame.
+        columns (list): List of column names to include.
+
+    Raises:
+        ValueError: If any requested column does not exist in the DataFrame.
+    """
+    # Validate columns
+    missing_cols = [col for col in columns if col not in df.columns]
+    if missing_cols:
+        logger.error(f"The following columns do not exist in the DataFrame: {missing_cols}")
+        #raise ValueError(f"The following columns do not exist in the DataFrame: {missing_cols}")
+
+    # Prints the DataFrame with selected columns
+    with pd.option_context(
+        'display.max_rows', None,
+    ):
+        print(f"Dataframe:\n{df[columns]}")
+
+
+def add_update_dataframe(df: pd.DataFrame, pid: str, row_data):
     """
     Depending upon if the pid exists in the 'id' column of the dataframe
     either add or update the row to the dataframe.
@@ -659,13 +694,12 @@ def add_update_dataframe(df: pd.DataFrame, pid: str, row_data, file_name: str):
         df (DataFrame)  The Pandas Dataframe to work with.
         pid (str)       The PID to find in the DataFrame 'id' column.
         row_data (dict) The row data to add/update the row with.
-        file_name (str) The file_name of the file we are working with.
 
     Returns:
         df (DataFrame)  The updated DataFrame.
     """
     if (value_exists_in_column(df,'id',pid)):
-        logger.info(f"Updating Dataframe: {pid},{file_name}")
+        logger.info(f"Updating Dataframe: {pid}")
         updated_df,success,msg = update_dataframe(df, 'id', pid, row_data)
         if not (success):
             logger.warning(f"Update DataFrame: Failed for PID: {pid} - {msg}")
@@ -675,7 +709,7 @@ def add_update_dataframe(df: pd.DataFrame, pid: str, row_data, file_name: str):
             logger.info(f"Updating DataFrame: Success for PID: {pid}")
             return updated_df
     else:
-        logger.info(f"Adding to Dataframe: {pid},{file_name}")
+        logger.info(f"Adding to Dataframe: {pid}")
         updated_df,success,msg = add_row_to_dataframe(df,row_data,key_column='id')
         if not (success):
             logger.warning(f"Adding to DataFrame: Failed for PID: {pid} - {msg}")
@@ -686,245 +720,635 @@ def add_update_dataframe(df: pd.DataFrame, pid: str, row_data, file_name: str):
             return updated_df
 
 
-def process_object(file_type: str, file_path: str, parent: str, df: pd.DataFrame):
+#def process_object(file_type: str, file_path: str, parent: str, df: pd.DataFrame):
+#    """
+#    Process the incoming file object.
+#
+#    Args:
+#        file_type (str) The type of object ('File','Directory')
+#        file_path (str) The file_path to the object.
+#        parent    (str) The parent directory of the object.
+#        df (pd.DataFrame)  The Pandas DataFrame we will be updating.
+#
+#    Returns:
+#        df (pd.DataFrame)  The Updated Pandas DataFrame.
+#    """
+#    #logger.info(f"Processing an object.")
+#    # object files are kept within a folder of the top level folder.
+#    # files are .tif files which need to be converted.
+#
+#    if not (is_valid_dataframe(df)):
+#        logger.warning("process_object: Incoming dataframe is invalid.")
+#        print(f"process_object: Incoming dataframe is invalid.")
+#        print(f"Invalid DataFrame: {df}")
+#
+#    # Process File...
+#    if (file_type == "File" ) and ( parent != "" ):
+#        #print(f"Processing File")
+#        ignore_file_list = ["target.tif","manifest.csv","manifest.xlsx","manifest.xls"]
+#
+#        dir,ext = os.path.splitext(file_path)
+#        file_name = os.path.basename(dir)
+#        pid = file_name
+#
+#        if ( ext == ".tif" ):
+#            logger.info(f"Processing TIFF: {file_path}")
+#            print(f"Processing TIFF: {file_path}")
+#            outpid,outfile = process_tiff(file_path)
+#            if (is_valid_image(file_path) and is_valid_filesize(134,file_path) and is_valid_image(outfile) and is_valid_filesize(134,outfile)):
+#                row_data = {'id': outpid, 'file': outfile}
+#                updated_df = add_update_dataframe(df,outpid,row_data)
+#                return updated_df
+#
+#        elif ( ext == ".mp3" ):
+#            logger.info(f"Processing MP3: {file_path}")
+#            print(f"Procesing MP3: {file_path}")
+#            outpid,outfile = process_mp3(file_path)
+#            row_data = {'id': outpid, 'file': outfile}
+#            updated_df = add_update_dataframe(df,outpid,row_data)
+#            return updated_df
+#
+#        elif (( ext == ".vtt" ) or ( ext == ".srt" )):
+#            logger.info(f"Processing WebVTT/SRT: {file_path}")
+#            print(f"Processing WebVTT/SRT: {file_path}")
+#            outpid,outfile = process_transcript(file_path)
+#            row_data = {'id': outpid, 'transcript': outfile}
+#            updated_df = add_update_dataframe(df,outpid,row_data)
+#            return updated_df
+#
+#        elif (( ext == ".jpg" ) or ( ext == ".png" )):
+#            logger.info(f"Processing Thumbnail: {file_path}")
+#            print(f"Processing Thumbnail: {file_path}")
+#            outpid,outfile = process_thumbnail(file_path)
+#            row_data = {'id': outpid, 'thumbnail': outfile}
+#            updated_df = add_update_dataframe(df,outpid,row_data)
+#            return updated_df
+#
+#        else:
+#            logger.info(f"Processing Unknown: {file_path}")
+#            print(f"Unknown File extension: {ext} - Skipping.")
+#            outpid,outfile = process_unknown(file_path)
+#            row_data = {'id': outpid, 'file': outfile}
+#            updated_df = add_update_dataframe(df,outpid,row_data)
+#            return updated_df
+#
+#
+#    # Process Top Level file...
+#    elif (file_type == "File") and (parent == ""):
+#        # Top Level file.
+#        #print(f"Processing Top-Level File")
+#        ignore_file_list = ["ignore"]
+#        if file_path.casefold() in ignore_file_list:
+#            logger.info(f"Ingoring: {file_path}")
+#        else:
+#            logger.info(f"Processing Top Level file: {file_path}")
+#            dir,ext = os.path.splitext(file_path)
+#            pid = os.path.basename(dir)
+#
+#            if ( ext == ".tif" ):
+#                # Top Level file is a .tif file.
+#                logger.info(f"Processing Top Level TIFF: {file_path}")
+#                print(f"Processing Top Level TIFF: {file_path}")
+#                outpid,outfile = process_tiff(file_path)
+#                row_data = {'id': pid, 'file': outfile}
+#                updated_df = add_update_dataframe(df,pid,row_data)
+#                return updated_df
+#
+#            elif ( ext == ".mp3" ):
+#                logger.info(f"Processing MP3: {file_path}")
+#                print(f"Procesing MP3: {file_path}")
+#                outpid,outfile = process_mp3(file_path)
+#                row_data = {'id': pid, 'file': outfile}
+#                updated_df = add_update_dataframe(df,pid,row_data)
+#                return updated_df
+#
+#            elif (( ext == ".vtt" ) or ( ext == ".srt" )):
+#                logger.info(f"Processing WebVTT/SRT: {file_path}")
+#                print(f"Processing WebVTT/SRT: {file_path}")
+#                outpid,outfile = process_transcript(file_path)
+#                row_data = {'id': pid, 'transcript': outfile}
+#                updated_df = add_update_dataframe(df,pid,row_data)
+#                return updated_df
+#
+#            elif (( ext == ".jpg" ) or ( ext == ".png" )):
+#                logger.info(f"Processing Thumbnail: {file_path}")
+#                print(f"Processing Thumbnail: {file_path}")
+#                outpid,outfile = process_thumbnail(file_path)
+#                row_data = {'id': pid, 'thumbnail': outfile}
+#                updated_df = add_update_dataframe(df,pid,row_data)
+#                return updated_df
+#
+#            else:
+#                logger.info(f"Processing Unknown: {file_path}")
+#                print(f"Unknown File extension: {ext} - Skipping.")
+#                outpid,outfile = process_unknown(file_path)
+#                row_data = {'id': pid, 'file': outfile}
+#                updated_df = add_update_dataframe(df,pid,row_data)
+#                return updated_df
+#
+#    # Process Directory...
+#    elif (file_type == "Directory"):
+#        #print(f"Processing Directory")
+#        ignore_dir_list = ["ignore"]
+#        if file_path.casefold() in ignore_dir_list:
+#            logger.info(f"Ignoreing: {file_path}")
+#        else:
+#            # Continue
+#            logger.info(f"Processing Directory: {file_path}")
+#            dir,ext = os.path.splitext(file_path)
+#            pid = os.path.basename(dir)
+#
+#            # Check if pid in Google Sheet.
+#            if (value_exists_in_column(df,'id',pid)):
+#                # Update Existing info.
+#                print(f"Found: {pid} in Google Sheet")
+#                logger.info(f"Found: {pid} in Google Sheet")
+#
+#                # Set Row Data
+#                row_data = {'id': pid}
+#
+#                # Update the dataframe.
+#                logger.info(f"Updating DataFrame: {pid}")
+#                #updated_df,success,msg = update_dataframe(df, 'id', pid, row_data)
+#                df,success,msg = update_dataframe(df, 'id', pid, row_data)
+#
+#                if not (success):
+#                    logger.warning(f"Update DataFrame: Failed for PID: {pid} - {msg}")
+#                    print(f"Update DataFrame: Failed for PID: {pid} - {msg}")
+#                else:
+#                    #return updated_df
+#                    return df
+#
+#            else:
+#                # Add new info.
+#                print(f"Not Found: Adding {pid} to DataFrame")
+#                logger.info(f"Not Found: Adding {pid} to DataFrame")
+#
+#                # Set Row Data.
+#                row_data = {'id': pid}
+#
+#                # Update the dataframe.
+#                logger.info(f"Updating DataFrame: {pid}")
+#                print(f"Adding: {row_data}")
+#                #updated_df,success,msg = add_row_to_dataframe(df,row_data,key_column='id')
+#                df,success,msg = add_row_to_dataframe(df,row_data,'id')
+#
+#                if not (success):
+#                    logger.warning(f"Add to DataFrame: failed for PID: {pid} - {msg}")
+#                    print(f"Add to DataFrame: Failed for PID: {pid} - {msg}")
+#                else:
+#                    return df
+#
+#    else:
+#        print(f"Unknown object.")
+#        exit()
+
+
+def get_value_from_df(df: pd.DataFrame,match_column: str,match_value,return_column:str):
     """
-    Process the incoming file object.
+    Given a pandas DataFrame, find the row where match_column == match_value
+    and return the value from return_column.
 
-    Args:
-        file_type (str) The type of object ('File','Directory')
-        file_path (str) The file_path to the object.
-        parent    (str) The parent directory of the object.
-        df (pd.DataFrame)  The Pandas DataFrame we will be updating.
+    Parameters:
+        df (pd.DataFrame): The DataFrame to search.
+        match_column (str): Column name to match on.
+        match_value: Value to look for in match_column.
+        return_column (str): Column name whose value should be returned.
 
     Returns:
-        df (pd.DataFrame)  The Updated Pandas DataFrame.
-    """
-    #logger.info(f"Processing an object.")
-    # object files are kept within a folder of the top level folder.
-    # files are .tif files which need to be converted.
-
-    if not (is_valid_dataframe(df)):
-        logger.warning("process_object: Incoming dataframe is invalid.")
-        print(f"process_object: Incoming dataframe is invalid.")
-        print(f"Invalid DataFrame: {df}")
-
-    # Process File...
-    if (file_type == "File" ) and ( parent != "" ):
-        #print(f"Processing File")
-        ignore_file_list = ["target.tif","manifest.csv","manifest.xlsx","manifest.xls"]
-
-        dir,ext = os.path.splitext(file_path)
-        file_name = os.path.basename(dir)
-        pid = file_name
-
-        if ( ext == ".tif" ):
-            logger.info(f"Processing TIFF: {file_path}")
-            print(f"Processing TIFF: {file_path}")
-            outpid,outfile = process_tiff(file_path)
-            if (is_valid_image(file_path) and is_valid_filesize(134,file_path) and is_valid_image(outfile) and is_valid_filesize(134,outfile)):
-                row_data = {'id': outpid, 'file': outfile}
-                updated_df = add_update_dataframe(df,outpid,row_data,outfile)
-                return updated_df
-
-        elif ( ext == ".mp3" ):
-            logger.info(f"Processing MP3: {file_path}")
-            print(f"Procesing MP3: {file_path}")
-            outpid,outfile = process_mp3(file_path)
-            row_data = {'id': outpid, 'file': outfile}
-            updated_df = add_update_dataframe(df,outpid,row_data,outfile)
-            return updated_df
+       The value from return_column if found, else None.
+    """
 
-        elif (( ext == ".vtt" ) or ( ext == ".srt" )):
-            logger.info(f"Processing WebVTT/SRT: {file_path}")
-            print(f"Processing WebVTT/SRT: {file_path}")
-            outpid,outfile = process_transcript(file_path)
-            row_data = {'id': outpid, 'transcript': outfile}
-            updated_df = add_update_dataframe(df,outpid,row_data,outfile) 
-            return updated_df
+    # Validate columns
+    if match_column not in df.columns:
+        #raise ValueError(f"Column '{match_column}' does not exist in the DataFrame.")
+        logger.warning(f"Column '{match_column}' does not exist in the DataFrame.")
+        return None
+    if return_column not in df.columns:
+        #raise ValueError(f"Column '{return_column}' does not exist in the DataFrame.")
+        logger.warning(f"Column '{return_column}' does not exist in the DataFrame.")
+        return None
 
-        elif (( ext == ".jpg" ) or ( ext == ".png" )):
-            logger.info(f"Processing Thumbnail: {file_path}")
-            print(f"Processing Thumbnail: {file_path}")
-            outpid,outfile = process_thumbnail(file_path)
-            row_data = {'id': outpid, 'thumbnail': outfile}
-            updated_df = add_update_dataframe(df,outpid,row_data,outfile)
-            return updated_df
+    # Filter the DataFrame
+    filtered = df[df[match_column] == match_value]
 
-        else:
-            logger.info(f"Processing Unknown: {file_path}")
-            print(f"Unknown File extension: {ext} - Skipping.")
-            outpid,outfile = process_unknown(file_path)
-            row_data = {'id': outpid, 'file': outfile}
-            updated_df = add_update_dataframe(df,outpid,row_data,outfile)
-            return updated_df
+    if not filtered.empty:
+        return filtered.iloc[0][return_column]  # Return first match
+    else:
+        return None
+
+
+def get_taxonomy_tid(base_url, vocabulary, term_name, auth_token=None):
+    """
+    Fetch taxonomy term ID (tid) from Drupal JSON:API given term name.
+
+    Parameters:
+        base_url (str): Base URL of the Drupal site (e.g., 'https://example.com').
+        vocabulary (str): Machine name of the vocabulary (e.g., 'tags').
+        term_name (str): Name of the taxonomy term to search.
+        auth_token (str): Optional Bearer token for authentication.
+
+    Returns:
+        str or None: The taxonomy term ID if found, else None.
+    """
+    # JSON:API endpoint for taxonomy terms
+    url = f"{base_url}/jsonapi/taxonomy_term/{vocabulary}"
+
+    # Filter by term name
+    params = {
+        "filter[name]": term_name
+    }
+
+    headers = {
+        "Accept": "application/vnd.api+json"
+    }
+
+    if auth_token:
+        headers["Authorization"] = f"Bearer {auth_token}"
+
+    try:
+        response = requests.get(url, headers=headers, params=params)
 
+    except Exception as e:
+        logger.error(f"get_taxonomy_tid - Unexpected error: {e}")
+        print(f"Unexpected error: {e}")
 
-    # Process Top Level file...
-    elif (file_type == "File") and (parent == ""):
-        # Top Level file.
-        #print(f"Processing Top-Level File")
-        ignore_file_list = ["ignore"]
-        if file_path.casefold() in ignore_file_list:
-            logger.info(f"Ingoring: {file_path}")
+    if response.status_code == 200:
+        data = response.json()
+        if data.get("data"):
+            return data["data"][0]["attributes"]["drupal_internal__tid"]
         else:
-            logger.info(f"Processing Top Level file: {file_path}")
-            dir,ext = os.path.splitext(file_path)
-            pid = os.path.basename(dir)
-
-            if ( ext == ".tif" ):
-                # Top Level file is a .tif file.
-                logger.info(f"Processing Top Level TIFF: {file_path}")
-                print(f"Processing Top Level TIFF: {file_path}")
-                outpid,outfile = process_tiff(file_path)
-                row_data = {'id': pid, 'file': outfile}
-                updated_df = add_update_dataframe(df,pid,row_data,outfile)
-                return updated_df
-
-            elif ( ext == ".mp3" ):
-                logger.info(f"Processing MP3: {file_path}")
-                print(f"Procesing MP3: {file_path}")
-                outpid,outfile = process_mp3(file_path)
-                row_data = {'id': pid, 'file': outfile}
-                updated_df = add_update_dataframe(df,pid,row_data,outfile)
-                return updated_df
-
-            elif (( ext == ".vtt" ) or ( ext == ".srt" )):
-                logger.info(f"Processing WebVTT/SRT: {file_path}")
-                print(f"Processing WebVTT/SRT: {file_path}")
-                outpid,outfile = process_transcript(file_path)
-                row_data = {'id': pid, 'transcript': outfile}
-                updated_df = add_update_dataframe(df,pid,row_data,outfile)
-                return updated_df
-
-            elif (( ext == ".jpg" ) or ( ext == ".png" )):
-                logger.info(f"Processing Thumbnail: {file_path}")
-                print(f"Processing Thumbnail: {file_path}")
-                outpid,outfile = process_thumbnail(file_path)
-                row_data = {'id': pid, 'thumbnail': outfile}
-                updated_df = add_update_dataframe(df,pid,row_data,outfile)
-                return updated_df
+            return None
+    else:
+        logger.error(f"get_taxonomy_tid - Error {response.status_code}: {response.text}")
+
+
+def process_file(df, file_path: str, level):
+    logger.info(f"Function: process_file")
+    logger.info(f"Processing File: {file_path}")
+    print(f"Processing File: {file_path}")
+
+    # Skip objects that we don't want to process.
+    if not any(sub in file_path for sub in skip):
 
+        file_folder = os.path.dirname(file_path)        # The Full Path including Directory containing the file.
+        base_name = os.path.basename(file_path)         # The Base filename with extension of the file give the file_path.
+        file_name,file_ext = os.path.splitext(base_name)# The Separated Filename and Extension of the Base filename.
+
+        # Set the file weight if the file_name is a digit or contains "-\d{4}"
+        file_weight = ''
+        if level > 1:
+            if file_name.isdigit():
+                file_weight = int(file_name)
             else:
-                logger.info(f"Processing Unknown: {file_path}")
-                print(f"Unknown File extension: {ext} - Skipping.")
-                outpid,outfile = process_unknown(file_path)
-                row_data = {'id': pid, 'file': outfile}
-                updated_df = add_update_dataframe(df,pid,row_data,outfile)
-                return updated_df
-
-    # Process Directory... 
-    elif (file_type == "Directory"):
-        #print(f"Processing Directory")
-        ignore_dir_list = ["ignore"]
-        if file_path.casefold() in ignore_dir_list:
-            logger.info(f"Ignoreing: {file_path}")
-        else:
-            # Continue
-            logger.info(f"Processing Directory: {file_path}")
-            dir,ext = os.path.splitext(file_path)
-            pid = os.path.basename(dir)
-            
-            # Check if pid in Google Sheet.
-            if (value_exists_in_column(df,'id',pid)):
-                # Update Existing info.
-                print(f"Found: {pid} in Google Sheet")
-                logger.info(f"Found: {pid} in Google Sheet")
-
-                # Set Row Data
-                row_data = {'id': pid}
-
-                # Update the dataframe.
-                logger.info(f"Updating DataFrame: {pid}")
-                #updated_df,success,msg = update_dataframe(df, 'id', pid, row_data)
-                df,success,msg = update_dataframe(df, 'id', pid, row_data)
-
-                if not (success):
-                    logger.warning(f"Update DataFrame: Failed for PID: {pid} - {msg}")
-                    print(f"Update DataFrame: Failed for PID: {pid} - {msg}")
-                else:
-                    #return updated_df
-                    return df
+                pattern = r'.*-(\d{4})'
+                match = re.search(pattern,file_name)
+                if match:
+                    file_weight = int(match.group(1))
+
+        # Set parent information.
+        parent_path   = os.path.dirname(file_path)
+        parent_folder = os.path.basename(parent_path)
+
+        # Get the model from the map.
+        my_model = get_model(level)
+        field_model = get_taxonomy_tid('https://i2.digital.library.pitt.edu','islandora_models',my_model)
+        logger.info(f"File is model: {my_model}, TID: {field_model}")
+
+        # Get the resource_type from the map
+        model_info = get_model_info(my_model,models)
+        resource_type = model_info.get('resource_type','None')
+
+        # Process any .tif files.
+        if (file_ext.lower() == ".tif"):
+            logger.info("File is type: TIFF")
+            print(f"  Type: TIFF")
+
+            # Create .jp2 file.
+            pid,outfile = process_tiff(file_path)
+
+            # Handle top level files.
+            if not level == 1:
+                # Not in top level folder.
+                pid = f"{parent_folder}-{file_name}"
+            else:
+                pid = f"{file_name}"
+                parent_folder = ''
+
+            # Build Row Data.
+            row_data = {
+                'id': pid,
+                'file': outfile,
+                'parent_id': parent_folder,
+                'field_weight': file_weight,
+                'field_model': field_model,
+                'model': my_model,
+                'field_resource_type': resource_type,
+                'level': level,
+                }
+
+            # Add a page title if it is a model of 'Page'.
+            # if my_model == 'Page':
+            # row_data.update({
+            #   'title': f"{pid}-{file_name}",
+            # })
+
+            logger.info(f"Row Data: {row_data}")
+
+            # Update Dataframe.
+            df = add_update_dataframe(df,pid,row_data)
+
+        # Process any .jp2 files.
+        if (file_ext.lower() == ".jp2"):
+            print(f"  Type: JP2")
+
+            # Handle Top Level files.
+            if not level == 1:
+                pid = f"{parent_folder}-{file_name}"
+            else:
+                pid = f"{file_name}"
+                parent_folder = ''
+
+            # Build Row Data.
+            row_data = {
+                'id': pid,
+                'file': outfile,
+                'parent_id': parent_folder,
+                'field_weight': file_weight,
+                'field_model': field_model,
+                'model': my_model,
+                'field_resource_type': resource_type,
+                'level': level,
+                }
+
+            logger.info(f"Row Data: {row_data}")
+
+            # Update Dataframe.
+            df = add_update_dataframe(df,pid,row_data)
+
+        # Process any audio files.
+        if (file_ext.lower() == ".mp3"):
+            print(f"  Type: Audio")
+
+            # Handle Top Level files.
+            if not level == 1:
+                pid = f"{parent_folder}-{file_name}"
+            else:
+                pid = f"{file_name}"
+                parent_folder = ''
+
+            # Build Row Data.
+            row_data = {
+                'id': pid,
+                'file': file_path,
+                'parent_id': parent_folder,
+                'level': level,
+                'field_model': field_model,
+                'field_weight': file_weight,
+                'model': my_model,
+                'field_resource_type': resource_type,
+                }
+
+            logger.info(f"Row Data: {row_data}")
+
+            # Update Dataframe.
+            df = add_update_dataframe(df,pid,row_data)
+
+        # Process any video files.
+        if (file_ext.lower() == ".mkv" or file_ext.lower() == ".mp4"):
+            print(f"  Type: Video")
+
+            # Handle Top Level files.
+            if not level == 1:
+                pid = f"{parent_folder}-{file_name}"
+            else:
+                pid = f"{file_name}"
+                parent_folder = ''
+
+            # Build Row Data.
+            row_data = {
+                'id': pid,
+                'file': file_path,
+                'parent_id': parent_folder,
+                'level': level,
+                'field_model': field_model,
+                'field_weight': file_weight,
+                'model': my_model,
+                'field_resource_type': resource_type,
+                }
+
+            logger.info(f"Row Data: {row_data}")
+
+            # Update Dataframe.
+            df = add_update_dataframe(df,pid,row_data)
+
+        # Process any transcription files.
+        if (file_ext.lower() == ".srt" or file_ext.lower() == ".vtt"):
+            print(f"  Type: Transcript")
+
+            # Handle Top Level files.
+            if not level == 1:
+                pid = f"{parent_folder}-{file_name}"
+            else:
+                pid = f"{file_name}"
+
+            # Build Row Data.
+            row_data = {
+                'transcript': file_path,
+            }
 
+            logger.info(f"Row Data: {row_data}")
+
+            # Update Dataframe.
+            df = add_update_dataframe(df,pid,row_data)
+
+        # Process any PDF files.
+        if (file_ext.lower() == ".pdf"):
+            print(f"  Type: PDF")
+
+            # Handle Top Level files.
+            if not level == 1:
+                pid = f"{parent_folder}-{file_name}"
             else:
-                # Add new info.
-                print(f"Not Found: Adding {pid} to DataFrame")
-                logger.info(f"Not Found: Adding {pid} to DataFrame")
-
-                # Set Row Data.
-                row_data = {'id': pid}
-
-                # Update the dataframe.
-                logger.info(f"Updating DataFrame: {pid}")
-                print(f"Adding: {row_data}")
-                #updated_df,success,msg = add_row_to_dataframe(df,row_data,key_column='id')
-                df,success,msg = add_row_to_dataframe(df,row_data,'id')
-
-                if not (success):
-                    logger.warning(f"Add to DataFrame: failed for PID: {pid} - {msg}")
-                    print(f"Add to DataFrame: Failed for PID: {pid} - {msg}")
-                else:
-                    return df
-    
-    else:
-        print(f"Unknown object.")
-        exit()
+                pid = f"{file_name}"
+
+
+            # Build Row Data.
+            row_data = {
+                'id':   pid,
+                'file': file_path,
+            }
+
+            logger.info(f"Row Data: {row_data}")
+
+            # Update Dataframe.
+            df = add_update_dataframe(df,pid,row_data)
+
+        # Process any simple image files.
+        if (file_ext.lower() == ".png" or file_ext.lower() == ".jpg"):
+            print(f"  Type: Simple Image")
+
+        print(f"\n")
+
+    return(df)
 
 
-def process_objects(data, directory: str, df: pd.DataFrame):
+def get_directory(directory):
     """
-    Process a list of objects
+    Scan a directory and return a list of files and subdirectories.
 
     Args:
-        data (list of tupples) The result of scan_directory.
-        directory (str) The full directory path that we will be working with.
-        df (DataFrame)  The Pandas DataFrame that we will be working with.
+        directory (str): Path to the directory to scan.
 
     Returns:
-        N/a.
+        list: List of tuples containing file/subdirectory name, type, and path.
+    """
+    print(f"Processing directory: {directory}")
+    result = []
+    for item in os.listdir(directory):
+        item_path = os.path.join(directory, item)
+        if os.path.isfile(item_path):
+            result.append((item, "File", item_path))
+        elif os.path.isdir(item_path):
+            result.append((item, "Directory", item_path))
+    result.sort(key=lambda x: (x[1] != "Directory", x[0].lower()))
+    return result
+
+def process_objects(df, directory: str, level):
     """
-    #print(f"process_objects: Input DataFrame:")
-    #print(df)
+    Process the objects of the directory.
 
-    # Begin Processing the listing of the directory.
-    logger.info(f"Processing file data.")
-    print(f"Processing the directory: {directory}")
+    Args:
+      df: Pandas Dataframe
+      directory: Directory we are working with.
+      level: What level are we at directory wise.
+    """
+    #print(f"Level: {level}, {directory}")
+    # Get the objects in the directory.
+    data = get_directory(directory)
+
+    # Get the PID of the current directory.
+    pid = os.path.basename(directory)
+
+    # Set parent information.
+    parent_path   = os.path.dirname(directory)
+    parent_folder = os.path.basename(parent_path)
+
+    # Set model.
+    my_model = get_model(level)
+
+    # Get the resource_type from the map
+    model_info = get_model_info(my_model,models)
+    resource_type = model_info.get('resource_type','None')
+
+    # if level > 1
+    if level > 1:
+        # For this directory.
+        row_data = {
+            'id': pid,
+            'level': level,
+            'parent_id': parent_folder,
+            'model': my_model,
+            'field_resource_type': resource_type,
+        }
+        df = add_update_dataframe(df,pid,row_data)
+
+    if level == 1:
+        row_data = {
+            'id': pid,
+            'level': level,
+            'model': my_model,
+            'field_resource_type': resource_type,
+        }
+        df = add_update_dataframe(df,pid,row_data)
 
-    # Loop through the file listing.
+    # For each object in the directory do.
     for row, (file_name, file_type, file_path) in enumerate(data, start=2):
-        # Skip things we don't want to include.
-        if (( file_name == "target.tif" ) or ( file_name == "manifest.csv" ) or ( file_name == "manifest.xlsx" )):
-            next
-        else:
-            print(f"Inspecting: {file_path}")
-            # Define the parent
-            parent = file_path.replace(directory, "")
-            parent = parent.replace("/"+file_name, "")
-            parent = parent.replace("/","")
 
-            #print(f"File_type: {file_type}")
-            #print(f"File_Path: {file_path}")
-            #print(f"Parent:    {parent}")
+        # Skip objects that we don't want to process.
+        if not any(sub in file_path for sub in skip):
 
-            df = process_object(file_type,file_path,parent,df)
+            # If the object is a directory.
+            if file_type == "Directory":
+                # process the directory.
+                df = process_objects(df, file_path, level + 1)
 
-        print(f"\n")
+            # If the object is a file.
+            if file_type == "File":
+                # process the file.
+                df = process_file(df, file_path, level + 1)
+        #else:
+        #    print(f"Skip: {file_path}")
 
-    # Display the DataFrame before sending it to Google Sheets.
-    #print(f"Result Dataframe:")
-    #print(df)
+        # End if-else
+    # End For
 
-    # Save the DataFrame to Google Sheets.
-    logger.info(f"Updating Google Sheet with DataFrame.")
-    print(f"Updating Google Sheet with DataFrame.")
-    success,msg = update_google_sheet(df, google_sheet_id, google_sheet_name, google_credentials)
-    if (success):
-        logger.info(f"Successfully Updated Google Sheet.")
-        print(f"Successfully Updated Google Sheet.")
+    return df
+
+def get_model_paths(start_model, models_dict):
+    """
+    Recursively map paths from a starting model to all leaf models.
+
+    Args:
+        start_model: The key of the starting model in the models dictionary
+        models_dict: The dictionary containing all model definitions
+
+    Returns:
+        A list of paths, where each path is a list of model keys from start to leaf
+    """
+    # Check if the starting model exists
+    if start_model not in models_dict:
+        return []
+
+    model_info = models_dict[start_model]
+    child = model_info.get('child', 'None')
+    model = model_info.get('model', 'None')
+
+    # Base case: if there's no child or child is 'None', return path with just this model
+    if child == 'None' or child not in models_dict:
+        #return [[start_model]]
+        return [[model]]
+
+    # Recursive case: get all paths from child and prepend current model
+    child_paths = get_model_paths(child, models_dict)
+    #return [[start_model] + path for path in child_paths]
+    return [[model] + path for path in child_paths]
+
+def get_model_info(model,models_dict):
+    # Check if the starting model exists
+    if model not in models_dict:
+        return []
+
+    model_info = models_dict[model]
+    return model_info
+
+def get_model(level):
+    #if model_paths[0][level-1] == "File":
+        # Return parent model
+    #    return model_paths[0][level-2]
+    #else:
+        # Return model
+    model_length = len(model_paths[0])
+    if model_length <= 2:
+        return model_paths[0][0]
     else:
-        logger.warning(f"Failed to update Google Sheet: {msg}")
-        print(f"Failed to update Google Sheet: {msg}")
+        return model_paths[0][level-1]
+
+def get_resource_type(level):
+    return 0
 
 
+def is_target_in_list(target,list):
+    return target in list
+
+def get_value(map, key, default=None):
+    return map.get(key, default)
+
+def add_column(df: pd.DataFrame, column_name: str, default=None):
+    if column_name not in df.columns:
+        df[column_name] = default
+    return df
+
 def main():
     """
     Main Process that sets up the environment variables etc and kicks off
@@ -936,6 +1360,174 @@ def main():
     Returns:
         N/a
     """
+
+    # Build models list which produces a map based upon the model.
+    # Model should eventually arive at a "File" model.
+    # The 'model' is obtained from the list of models in Islandora 2.
+    # All 'model' should be referenced below as a possible type.
+    # The 'resource_type'...
+    # The 'child' points to another model with "File" model being the end.
+    globals()['models'] = {
+        'Compound Audio 1': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'Audio',
+        },
+        'Compound Audio 2': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'COA',
+        },
+        'COA': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'Audio',
+        },
+        'Compound Book': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'Paged Content',
+        },
+        'Compound Image 1': {
+            'model': 'Paged Content',
+            'resource_type': 'Collection',
+            'child': 'Page',
+        },
+        'Compound Image 2': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'Image',
+        },
+        'Compound Video 1': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'Video',
+        },
+        'Compound Video 2': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'COV',
+        },
+        'COV': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'Video',
+        },
+        'Compound Object': {
+            'model': 'Compound Object',
+            'resource_type': 'Collection',
+            'child': 'None',
+        },
+        'Collection': {
+            'model': 'Collection',
+            'resource_type': 'Collection',
+            'child': 'None',
+        },
+        'Serial': {
+            'model': 'Newspaper',
+            'resource_type': 'Collection',
+            'child': 'Issue',
+        },
+        'Newspaper': {
+            'model': 'Newspaper',
+            'resource_type': 'Collection',
+            'child': 'Publication Issue',
+        },
+        'Issue': {
+            'model': 'Publication Issue',
+            'resource_type': 'Collection',
+            'child': 'Page',
+        },
+        'Publication Issue': {
+            'model': 'Publication Issue',
+            'resource_type': 'Collection',
+            'child': 'Page',
+        },
+        'Book': {
+            'model': 'Paged Content',
+            'resource_type': 'Collection',
+            'child': 'Page',
+        },
+        'Paged Content': {
+            'model': 'Paged Content',
+            'resource_type': 'Collection',
+            'child': 'Page',
+        },
+        'Digital Document': {
+            'model': 'Digital Document',
+            'resource_type': 'Text',
+            'child': 'None',
+        },
+        'Page': {
+            'model': 'Page',
+            'resource_type': 'Text',
+            'child': 'File',
+        },
+        'Image': {
+            'model': 'Image',
+            'resource_type': 'Still Image',
+            'child': 'File',
+        },
+        'Audio': {
+            'model': 'Audio',
+            'resource_type': 'Sound',
+            'child': 'File',
+        },
+        'Video': {
+            'model': 'Video',
+            'resource_type': 'Moving Image',
+            'child': 'File',
+        },
+        'PDF': {
+            'model': 'Digital Document',
+            'resource_type': 'Text',
+            'child': 'File',
+        },
+        'Binary': {
+            'model': 'Binary',
+            'resource_type': 'Unspecified',
+            'child': 'File',
+        },
+        'File': {
+            'model': 'File',
+            'child': 'None',
+        },
+    }
+
+
+    # Valid models.
+    # This is the list of valid models that we will work with.
+    #
+    globals()['allowed_models'] = [
+        "compound audio",
+        "compound video",
+        "compound image",
+        "compound book",
+        "compound object",
+        "collection",
+        "serial",
+        "newspaper",
+        "publication issue",
+        "issue",
+        "book",
+        "digital document",
+        "page",
+        "image",
+        "audio",
+        "video",
+        "pdf",
+        "binary"
+    ]
+
+    ### Other models:
+    #    "collection": "Collection",
+    #    "digital document": "Digital Document",
+    #    "paged content": "Paged Content",
+    #    "publication issue": "Publication Issue",
+    #    "compound object": "Compound Object",
+    #    "newspaper": "Newspaper",
+
+
     # Setup the log file format.
     globals()['log_formatter'] = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(levelname)s %(message)s',datefmt="%Y%m%d %H:%M:%S")
 
@@ -959,17 +1551,41 @@ def main():
         globals()['google_credentials'] = args.in_gs_creds
     if args.in_gs_id is not None:
         globals()['google_sheet_id'] = args.in_gs_id
-    if args.in_gs_name is not None: 
+    if args.in_gs_name is not None:
         globals()['google_sheet_name'] = args.in_gs_name
     if args.log_file is not None:
         globals()['log_file'] = args.log_file
 
+    # Required Sheet Columns:
+    globals()['required_columns'] = ["id","file","level","parent_id",
+        "field_weight","field_model","model","field_resource_type","transcript"]
+
+    # Global file patterns to skip over.
+    globals()['skip'] = ["ignore",".jp2",".metadata","meta",".opex",".fits",
+        "target.tif","metadata.csv","metadata.xlsx","manifest.xlsx","manifest.csv"]
+
     # Create the Log file.
-    #print(f"Creating log file: {args.log_file}")
-    #globals()['logger'] = setup_logger('logger', args.log_file, level=logging.DEBUG)
+    print(f"Creating Log file: {log_file}")
     globals()['logger'] = setup_logger('logger', log_file, level=logging.DEBUG)
     logger.info(f"Begin log.")
 
+    # Read Content Models
+    # globals()['content_models'] = read_yaml_file('content_models.yml')
+
+    # Get Batch Model.
+    globals()['model'] = args.model
+    logger.info(f"Batch Model: {model}")
+    if not is_target_in_list(model.lower(), allowed_models):
+        logger.error(f"Model: {model} is not in the list of allowed_models.")
+        print(f"Model: {model} is not in the list of allowed_models.")
+
+    # Show Model path.
+    globals()['model_paths'] = get_model_paths(model,models)
+    logger.info(f"Model Path: {model_paths[0]}")
+    print(f"Model Path: {model_paths[0]}")
+    print(f"{json.dumps(model_paths[0],indent=4)}")
+
+
     # Get external command paths.
     gm_path = shutil.which("gm")
 
@@ -977,8 +1593,8 @@ def main():
     if gm_path:
         logger.info(f"GraphicsMagick Executable found at: {gm_path}")
     else:
-        logger.error(f"GraphicsMagick Executable 'gm' not found and is required.")
-        print(f"GraphicsMagick Executable 'gm' not found and is required.")
+        logger.error(f"GraphicsMagick Executable 'gm' not found in PATH and is required.")
+        print(f"GraphicsMagick Executable 'gm' not found in PATH and is required.")
         print(f"Exiting...")
         exit()
 
@@ -987,13 +1603,28 @@ def main():
     print(f"Reading Google Sheet: {google_sheet_id},{google_sheet_name}")
     df = read_google_sheet(google_sheet_id, google_sheet_name, google_credentials)
 
-    # Scan the directory and return a list of directory contents.
-    logger.info(f"Scan the directory: {args.directory}")
-    file_data = scan_directory(args.directory)
+    # Ensure all required_columns exist.
+    for col in required_columns:
+        df = add_column(df,col)
 
     # Process the contents.
-    logger.info(f"Process the directory: {args.directory}")
-    process_objects(file_data,args.directory,df)
+    globals()['top'] = args.directory
+    logger.info(f"Process the directory: {top}")
+    updated_df = process_objects(df,top,0)
+
+    # Save the DataFrame to Google Sheets.
+    logger.info(f"Updating Google Sheet with DataFrame.")
+    print(f"Updating Google Sheet with DataFrame.")
+    success,msg = update_google_sheet(updated_df, google_sheet_id, google_sheet_name, google_credentials)
+    if (success):
+        logger.info(f"Successfully Updated Google Sheet.")
+        print(f"Successfully Updated Google Sheet.")
+    else:
+        logger.warning(f"Failed to update Google Sheet: {msg}")
+        print(f"Failed to update Google Sheet: {msg}")
+
+    # Display df
+    print(f"Dataframe:\n{updated_df}")
 
     exit()
 
@@ -1005,5 +1636,3 @@ google_sheet_name = None
 
 if __name__ == "__main__":
     main()
-
-

From 7075968fece02a683d402dea74f7538e971b7eb2 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 11:37:12 -0500
Subject: [PATCH 02/21] Adjusting Islandora Models.

---
 scan-batch-dir | 48 ++++++++++++++----------------------------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 5a9fa25..268a29c 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -1373,16 +1373,6 @@ def main():
             'resource_type': 'Collection',
             'child': 'Audio',
         },
-        'Compound Audio 2': {
-            'model': 'Compound Object',
-            'resource_type': 'Collection',
-            'child': 'COA',
-        },
-        'COA': {
-            'model': 'Compound Object',
-            'resource_type': 'Collection',
-            'child': 'Audio',
-        },
         'Compound Book': {
             'model': 'Compound Object',
             'resource_type': 'Collection',
@@ -1393,26 +1383,11 @@ def main():
             'resource_type': 'Collection',
             'child': 'Page',
         },
-        'Compound Image 2': {
-            'model': 'Compound Object',
-            'resource_type': 'Collection',
-            'child': 'Image',
-        },
         'Compound Video 1': {
             'model': 'Compound Object',
             'resource_type': 'Collection',
             'child': 'Video',
         },
-        'Compound Video 2': {
-            'model': 'Compound Object',
-            'resource_type': 'Collection',
-            'child': 'COV',
-        },
-        'COV': {
-            'model': 'Compound Object',
-            'resource_type': 'Collection',
-            'child': 'Video',
-        },
         'Compound Object': {
             'model': 'Compound Object',
             'resource_type': 'Collection',
@@ -1423,21 +1398,26 @@ def main():
             'resource_type': 'Collection',
             'child': 'None',
         },
-        'Serial': {
+        'Serial 1': {
             'model': 'Newspaper',
             'resource_type': 'Collection',
-            'child': 'Issue',
+            'child': 'Issue 1',
         },
-        'Newspaper': {
+        'Serial 2': {
             'model': 'Newspaper',
             'resource_type': 'Collection',
-            'child': 'Publication Issue',
+            'child': 'Issue 2',
         },
-        'Issue': {
+        'Issue 1': {
             'model': 'Publication Issue',
             'resource_type': 'Collection',
             'child': 'Page',
         },
+        'Issue 2': {
+            'model': 'Publication Issue',
+            'resouce_type': 'Text',
+            'child': 'File',
+        },
         'Publication Issue': {
             'model': 'Publication Issue',
             'resource_type': 'Collection',
@@ -1505,10 +1485,11 @@ def main():
         "compound book",
         "compound object",
         "collection",
-        "serial",
-        "newspaper",
+        "serial 1",
+        "serial 2",
         "publication issue",
-        "issue",
+        "issue 1",
+        "issue 2",
         "book",
         "digital document",
         "page",
@@ -1585,7 +1566,6 @@ def main():
     print(f"Model Path: {model_paths[0]}")
     print(f"{json.dumps(model_paths[0],indent=4)}")
 
-
     # Get external command paths.
     gm_path = shutil.which("gm")
 

From fb439896262a64f252942cd60e983fddfa81c234 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 11:54:47 -0500
Subject: [PATCH 03/21] A bit of cleanup.

---
 scan-batch-dir | 214 +------------------------------------------------
 1 file changed, 4 insertions(+), 210 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 268a29c..8c3d783 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -258,8 +258,6 @@ def update_dataframe(df: pd.DataFrame, match_column: str, match_value: str, upda
         # Verify columns exist
         if match_column not in df.columns:
             return df, False, f"update_dataframe - Match column '{match_column}' not found"
-        #if update_column not in df.columns:
-        #    return df, False, f"update_dataframe - Update column '{update_column}' not found"
 
         # Make a copy of the dataframe
         df_copy = df.copy()
@@ -314,7 +312,6 @@ def add_row_to_dataframe(df, row_data, key_column):
 
         # Check if the key value already exists
         if row_data[key_column] in df[key_column].values:
-            #print(f"Warning: Key value '{row_data[key_column]}' already exists in column '{key_column}'. Row not added.")
             return df, False, "add_row_to_dataframe - Row with Key column {key_column} already exists."
 
         # Convert row_data to DataFrame
@@ -348,13 +345,11 @@ def value_exists_in_column(df, column_name, value):
         # Check if the column exists in the DataFrame
         if column_name not in df.columns:
             return False
-            #raise ValueError(f"The column '{column_name}' does not exist in the DataFrame")
 
         # Check if the value exists in the column
         return value in df[column_name].values
 
     except Exception as e:
-        #print(f"An unexpected error occurred: {e}")
         return False
 
 def scan_directory(directory):
@@ -417,7 +412,6 @@ def convert_tiff_to_jp2(tif_file_path, jp2_file_path):
             logger.info(f"Successfully converted TIFF to JP2: {tif_file_path}")
     except Exception as e:
         logger.error(f"Failed to convert TIFF to JP2: {str(e)}")
-        #raise Exception(f"Failed to convert TIFF to JP2: {str(e)}")
 
 def is_valid_image(image_path):
     """
@@ -541,7 +535,6 @@ def process_tiff(file_path:str):
 
     """
     # Process a .tif file.
-    #print(f"Processing a TIFF file: {file_path}.")
     pid,parent,file,dir,ext = get_file_info(file_path)
 
     # Create the JP2 derivative if it does not already exist.
@@ -572,7 +565,6 @@ def process_mp3(file_path:str):
         mp3_path (str) The Path to the MP3 file.
     """
     # Process a .mp3 file.
-    #print(f"Processing a MP3 file: {file_path}.")
     pid,parent,file,dir,ext = get_file_info(file_path)
     out_pid = f"{parent}-{pid}"
     mp3_path = f"{dir}/{pid}{ext}"
@@ -591,7 +583,6 @@ def process_transcript(file_path:str):
         vtt_path (str) The Path to the Transcript file.
     """
     # Process a .vtt or .srt file.
-    #print(f"Processing a WebVTT/SRT file: {file_path}.")
     pid,parent,file,dir,ext = get_file_info(file_path)
     out_pid = f"{parent}-{pid}"
     vtt_path = f"{dir}/{pid}{ext}"
@@ -610,7 +601,6 @@ def process_thumbnail(file_path:str):
         tn_path  (str) the Path to the thumbnail file.
     """
     # Process a thumbnail (.jpg/.png).
-    #print(f"Processing a Thumbnail file: {file_path}.")
     pid,parent,file,dir,ext = get_file_info(file_path)
     out_pid = f"{parent}-{pid}"
     tn_path = f"{dir}/{pid}{ext}"
@@ -629,7 +619,6 @@ def process_unknown(file_path:str):
         unknown_path (str) The path to the unknown file.
     """
     # Process a unknown file.
-    #print(f"Processing an Unknown file: {file_path}.")
     pid,parent,file,dir,ext = get_file_info(file_path)
     out_pid = f"{parent}-{pid}"
     unknown_path = f"{dir}/{pid}{ext}"
@@ -676,7 +665,6 @@ def dump_df_columns(df: pd.DataFrame, columns: list):
     missing_cols = [col for col in columns if col not in df.columns]
     if missing_cols:
         logger.error(f"The following columns do not exist in the DataFrame: {missing_cols}")
-        #raise ValueError(f"The following columns do not exist in the DataFrame: {missing_cols}")
 
     # Prints the DataFrame with selected columns
     with pd.option_context(
@@ -719,191 +707,6 @@ def add_update_dataframe(df: pd.DataFrame, pid: str, row_data):
             logger.info(f"Adding to DataFrame: Success for PID: {pid}")
             return updated_df
 
-
-#def process_object(file_type: str, file_path: str, parent: str, df: pd.DataFrame):
-#    """
-#    Process the incoming file object.
-#
-#    Args:
-#        file_type (str) The type of object ('File','Directory')
-#        file_path (str) The file_path to the object.
-#        parent    (str) The parent directory of the object.
-#        df (pd.DataFrame)  The Pandas DataFrame we will be updating.
-#
-#    Returns:
-#        df (pd.DataFrame)  The Updated Pandas DataFrame.
-#    """
-#    #logger.info(f"Processing an object.")
-#    # object files are kept within a folder of the top level folder.
-#    # files are .tif files which need to be converted.
-#
-#    if not (is_valid_dataframe(df)):
-#        logger.warning("process_object: Incoming dataframe is invalid.")
-#        print(f"process_object: Incoming dataframe is invalid.")
-#        print(f"Invalid DataFrame: {df}")
-#
-#    # Process File...
-#    if (file_type == "File" ) and ( parent != "" ):
-#        #print(f"Processing File")
-#        ignore_file_list = ["target.tif","manifest.csv","manifest.xlsx","manifest.xls"]
-#
-#        dir,ext = os.path.splitext(file_path)
-#        file_name = os.path.basename(dir)
-#        pid = file_name
-#
-#        if ( ext == ".tif" ):
-#            logger.info(f"Processing TIFF: {file_path}")
-#            print(f"Processing TIFF: {file_path}")
-#            outpid,outfile = process_tiff(file_path)
-#            if (is_valid_image(file_path) and is_valid_filesize(134,file_path) and is_valid_image(outfile) and is_valid_filesize(134,outfile)):
-#                row_data = {'id': outpid, 'file': outfile}
-#                updated_df = add_update_dataframe(df,outpid,row_data)
-#                return updated_df
-#
-#        elif ( ext == ".mp3" ):
-#            logger.info(f"Processing MP3: {file_path}")
-#            print(f"Procesing MP3: {file_path}")
-#            outpid,outfile = process_mp3(file_path)
-#            row_data = {'id': outpid, 'file': outfile}
-#            updated_df = add_update_dataframe(df,outpid,row_data)
-#            return updated_df
-#
-#        elif (( ext == ".vtt" ) or ( ext == ".srt" )):
-#            logger.info(f"Processing WebVTT/SRT: {file_path}")
-#            print(f"Processing WebVTT/SRT: {file_path}")
-#            outpid,outfile = process_transcript(file_path)
-#            row_data = {'id': outpid, 'transcript': outfile}
-#            updated_df = add_update_dataframe(df,outpid,row_data)
-#            return updated_df
-#
-#        elif (( ext == ".jpg" ) or ( ext == ".png" )):
-#            logger.info(f"Processing Thumbnail: {file_path}")
-#            print(f"Processing Thumbnail: {file_path}")
-#            outpid,outfile = process_thumbnail(file_path)
-#            row_data = {'id': outpid, 'thumbnail': outfile}
-#            updated_df = add_update_dataframe(df,outpid,row_data)
-#            return updated_df
-#
-#        else:
-#            logger.info(f"Processing Unknown: {file_path}")
-#            print(f"Unknown File extension: {ext} - Skipping.")
-#            outpid,outfile = process_unknown(file_path)
-#            row_data = {'id': outpid, 'file': outfile}
-#            updated_df = add_update_dataframe(df,outpid,row_data)
-#            return updated_df
-#
-#
-#    # Process Top Level file...
-#    elif (file_type == "File") and (parent == ""):
-#        # Top Level file.
-#        #print(f"Processing Top-Level File")
-#        ignore_file_list = ["ignore"]
-#        if file_path.casefold() in ignore_file_list:
-#            logger.info(f"Ingoring: {file_path}")
-#        else:
-#            logger.info(f"Processing Top Level file: {file_path}")
-#            dir,ext = os.path.splitext(file_path)
-#            pid = os.path.basename(dir)
-#
-#            if ( ext == ".tif" ):
-#                # Top Level file is a .tif file.
-#                logger.info(f"Processing Top Level TIFF: {file_path}")
-#                print(f"Processing Top Level TIFF: {file_path}")
-#                outpid,outfile = process_tiff(file_path)
-#                row_data = {'id': pid, 'file': outfile}
-#                updated_df = add_update_dataframe(df,pid,row_data)
-#                return updated_df
-#
-#            elif ( ext == ".mp3" ):
-#                logger.info(f"Processing MP3: {file_path}")
-#                print(f"Procesing MP3: {file_path}")
-#                outpid,outfile = process_mp3(file_path)
-#                row_data = {'id': pid, 'file': outfile}
-#                updated_df = add_update_dataframe(df,pid,row_data)
-#                return updated_df
-#
-#            elif (( ext == ".vtt" ) or ( ext == ".srt" )):
-#                logger.info(f"Processing WebVTT/SRT: {file_path}")
-#                print(f"Processing WebVTT/SRT: {file_path}")
-#                outpid,outfile = process_transcript(file_path)
-#                row_data = {'id': pid, 'transcript': outfile}
-#                updated_df = add_update_dataframe(df,pid,row_data)
-#                return updated_df
-#
-#            elif (( ext == ".jpg" ) or ( ext == ".png" )):
-#                logger.info(f"Processing Thumbnail: {file_path}")
-#                print(f"Processing Thumbnail: {file_path}")
-#                outpid,outfile = process_thumbnail(file_path)
-#                row_data = {'id': pid, 'thumbnail': outfile}
-#                updated_df = add_update_dataframe(df,pid,row_data)
-#                return updated_df
-#
-#            else:
-#                logger.info(f"Processing Unknown: {file_path}")
-#                print(f"Unknown File extension: {ext} - Skipping.")
-#                outpid,outfile = process_unknown(file_path)
-#                row_data = {'id': pid, 'file': outfile}
-#                updated_df = add_update_dataframe(df,pid,row_data)
-#                return updated_df
-#
-#    # Process Directory...
-#    elif (file_type == "Directory"):
-#        #print(f"Processing Directory")
-#        ignore_dir_list = ["ignore"]
-#        if file_path.casefold() in ignore_dir_list:
-#            logger.info(f"Ignoreing: {file_path}")
-#        else:
-#            # Continue
-#            logger.info(f"Processing Directory: {file_path}")
-#            dir,ext = os.path.splitext(file_path)
-#            pid = os.path.basename(dir)
-#
-#            # Check if pid in Google Sheet.
-#            if (value_exists_in_column(df,'id',pid)):
-#                # Update Existing info.
-#                print(f"Found: {pid} in Google Sheet")
-#                logger.info(f"Found: {pid} in Google Sheet")
-#
-#                # Set Row Data
-#                row_data = {'id': pid}
-#
-#                # Update the dataframe.
-#                logger.info(f"Updating DataFrame: {pid}")
-#                #updated_df,success,msg = update_dataframe(df, 'id', pid, row_data)
-#                df,success,msg = update_dataframe(df, 'id', pid, row_data)
-#
-#                if not (success):
-#                    logger.warning(f"Update DataFrame: Failed for PID: {pid} - {msg}")
-#                    print(f"Update DataFrame: Failed for PID: {pid} - {msg}")
-#                else:
-#                    #return updated_df
-#                    return df
-#
-#            else:
-#                # Add new info.
-#                print(f"Not Found: Adding {pid} to DataFrame")
-#                logger.info(f"Not Found: Adding {pid} to DataFrame")
-#
-#                # Set Row Data.
-#                row_data = {'id': pid}
-#
-#                # Update the dataframe.
-#                logger.info(f"Updating DataFrame: {pid}")
-#                print(f"Adding: {row_data}")
-#                #updated_df,success,msg = add_row_to_dataframe(df,row_data,key_column='id')
-#                df,success,msg = add_row_to_dataframe(df,row_data,'id')
-#
-#                if not (success):
-#                    logger.warning(f"Add to DataFrame: failed for PID: {pid} - {msg}")
-#                    print(f"Add to DataFrame: Failed for PID: {pid} - {msg}")
-#                else:
-#                    return df
-#
-#    else:
-#        print(f"Unknown object.")
-#        exit()
-
-
 def get_value_from_df(df: pd.DataFrame,match_column: str,match_value,return_column:str):
     """
     Given a pandas DataFrame, find the row where match_column == match_value
@@ -921,11 +724,9 @@ def get_value_from_df(df: pd.DataFrame,match_column: str,match_value,return_colu
 
     # Validate columns
     if match_column not in df.columns:
-        #raise ValueError(f"Column '{match_column}' does not exist in the DataFrame.")
         logger.warning(f"Column '{match_column}' does not exist in the DataFrame.")
         return None
     if return_column not in df.columns:
-        #raise ValueError(f"Column '{return_column}' does not exist in the DataFrame.")
         logger.warning(f"Column '{return_column}' does not exist in the DataFrame.")
         return None
 
@@ -1172,7 +973,6 @@ def process_file(df, file_path: str, level):
             else:
                 pid = f"{file_name}"
 
-
             # Build Row Data.
             row_data = {
                 'id':   pid,
@@ -1223,7 +1023,6 @@ def process_objects(df, directory: str, level):
       directory: Directory we are working with.
       level: What level are we at directory wise.
     """
-    #print(f"Level: {level}, {directory}")
     # Get the objects in the directory.
     data = get_directory(directory)
 
@@ -1277,10 +1076,7 @@ def process_objects(df, directory: str, level):
             if file_type == "File":
                 # process the file.
                 df = process_file(df, file_path, level + 1)
-        #else:
-        #    print(f"Skip: {file_path}")
-
-        # End if-else
+        # End if
     # End For
 
     return df
@@ -1306,12 +1102,10 @@ def get_model_paths(start_model, models_dict):
 
     # Base case: if there's no child or child is 'None', return path with just this model
     if child == 'None' or child not in models_dict:
-        #return [[start_model]]
         return [[model]]
 
     # Recursive case: get all paths from child and prepend current model
     child_paths = get_model_paths(child, models_dict)
-    #return [[start_model] + path for path in child_paths]
     return [[model] + path for path in child_paths]
 
 def get_model_info(model,models_dict):
@@ -1550,7 +1344,7 @@ def main():
     globals()['logger'] = setup_logger('logger', log_file, level=logging.DEBUG)
     logger.info(f"Begin log.")
 
-    # Read Content Models
+    # Read Content Models - Proposed for later.
     # globals()['content_models'] = read_yaml_file('content_models.yml')
 
     # Get Batch Model.
@@ -1569,7 +1363,7 @@ def main():
     # Get external command paths.
     gm_path = shutil.which("gm")
 
-    # Check 'gm' exists.
+    # Check that 'gm' program exists.
     if gm_path:
         logger.info(f"GraphicsMagick Executable found at: {gm_path}")
     else:
@@ -1606,7 +1400,7 @@ def main():
     # Display df
     print(f"Dataframe:\n{updated_df}")
 
-    exit()
+    sys.exit()
 
 
 # Setup global variables.

From 97078c791c104671f8c08bb7151252079f6af29a Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 15:30:33 -0500
Subject: [PATCH 04/21] Built out the PDF model, updated the model map, and
 explicitly set the islandora model in the model map.

---
 scan-batch-dir | 59 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 12 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 8c3d783..cd20558 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -798,7 +798,7 @@ def process_file(df, file_path: str, level):
 
         # Set the file weight if the file_name is a digit or contains "-\d{4}"
         file_weight = ''
-        if level > 1:
+        if level >= 1:
             if file_name.isdigit():
                 file_weight = int(file_name)
             else:
@@ -813,12 +813,17 @@ def process_file(df, file_path: str, level):
 
         # Get the model from the map.
         my_model = get_model(level)
-        field_model = get_taxonomy_tid('https://i2.digital.library.pitt.edu','islandora_models',my_model)
-        logger.info(f"File is model: {my_model}, TID: {field_model}")
+        #field_model = get_taxonomy_tid('https://i2.digital.library.pitt.edu','islandora_models',my_model)
+        #logger.info(f"File is model: {my_model}, TID: {field_model}")
 
-        # Get the resource_type from the map
+        # Get model info from the map.
         model_info = get_model_info(my_model,models)
         resource_type = model_info.get('resource_type','None')
+        imodel = model_info.get('imodel','None')
+
+        # Get the field_model from JSONAPI
+        field_model = get_taxonomy_tid('https://i2.digital.library.pitt.edu','islandora_models',imodel)
+        logger.info(f"File is model: {my_model}, TID: {field_model}")
 
         # Process any .tif files.
         if (file_ext.lower() == ".tif"):
@@ -843,7 +848,7 @@ def process_file(df, file_path: str, level):
                 'parent_id': parent_folder,
                 'field_weight': file_weight,
                 'field_model': field_model,
-                'model': my_model,
+                'model': imodel,
                 'field_resource_type': resource_type,
                 'level': level,
                 }
@@ -877,7 +882,7 @@ def process_file(df, file_path: str, level):
                 'parent_id': parent_folder,
                 'field_weight': file_weight,
                 'field_model': field_model,
-                'model': my_model,
+                'model': imodel,
                 'field_resource_type': resource_type,
                 'level': level,
                 }
@@ -906,7 +911,7 @@ def process_file(df, file_path: str, level):
                 'level': level,
                 'field_model': field_model,
                 'field_weight': file_weight,
-                'model': my_model,
+                'model': imodel,
                 'field_resource_type': resource_type,
                 }
 
@@ -934,9 +939,9 @@ def process_file(df, file_path: str, level):
                 'level': level,
                 'field_model': field_model,
                 'field_weight': file_weight,
-                'model': my_model,
+                'model': imodel,
                 'field_resource_type': resource_type,
-                }
+            }
 
             logger.info(f"Row Data: {row_data}")
 
@@ -977,6 +982,9 @@ def process_file(df, file_path: str, level):
             row_data = {
                 'id':   pid,
                 'file': file_path,
+                'field_weight': file_weight,
+                'field_model': field_model,
+                'model': imodel,
             }
 
             logger.info(f"Row Data: {row_data}")
@@ -1164,106 +1172,133 @@ def main():
     globals()['models'] = {
         'Compound Audio 1': {
             'model': 'Compound Object',
+            'imodel': 'Compound Object',
             'resource_type': 'Collection',
             'child': 'Audio',
         },
         'Compound Book': {
             'model': 'Compound Object',
+            'imodel': 'Compound Object',
             'resource_type': 'Collection',
             'child': 'Paged Content',
         },
         'Compound Image 1': {
             'model': 'Paged Content',
+            'imodel': 'Paged Content',
             'resource_type': 'Collection',
             'child': 'Page',
         },
         'Compound Video 1': {
             'model': 'Compound Object',
+            'imodel': 'Compound Object',
             'resource_type': 'Collection',
             'child': 'Video',
         },
         'Compound Object': {
             'model': 'Compound Object',
+            'imodel': 'Compound Object',
             'resource_type': 'Collection',
             'child': 'None',
         },
         'Collection': {
             'model': 'Collection',
+            'imodel': 'Collection',
             'resource_type': 'Collection',
             'child': 'None',
         },
         'Serial 1': {
             'model': 'Newspaper',
+            'imodel': 'Newspaper',
             'resource_type': 'Collection',
             'child': 'Issue 1',
         },
         'Serial 2': {
             'model': 'Newspaper',
+            'imodel': 'Newspaper',
             'resource_type': 'Collection',
             'child': 'Issue 2',
         },
         'Issue 1': {
-            'model': 'Publication Issue',
+            'model': 'Publication Issue 1',
+            'imodel': 'Publication Issue',
             'resource_type': 'Collection',
             'child': 'Page',
         },
         'Issue 2': {
-            'model': 'Publication Issue',
+            'model': 'Publication Issue 2',
+            'imodel': 'Publication Issue',
             'resouce_type': 'Text',
             'child': 'File',
         },
-        'Publication Issue': {
+        'Publication Issue 1': {
             'model': 'Publication Issue',
+            'imodel': 'Publication Issue',
             'resource_type': 'Collection',
             'child': 'Page',
         },
+        'Publication Issue 2': {
+            'model': 'Publication Issue',
+            'imodel': 'Publication Issue',
+            'resource_type': 'Text',
+            'child': 'PDF',
+        },
         'Book': {
             'model': 'Paged Content',
+            'imodel': 'Paged Content',
             'resource_type': 'Collection',
             'child': 'Page',
         },
         'Paged Content': {
             'model': 'Paged Content',
+            'imodel': 'Paged Content',
             'resource_type': 'Collection',
             'child': 'Page',
         },
         'Digital Document': {
             'model': 'Digital Document',
+            'imodel': 'Digital Document',
             'resource_type': 'Text',
             'child': 'None',
         },
         'Page': {
             'model': 'Page',
+            'imodel': 'Page',
             'resource_type': 'Text',
             'child': 'File',
         },
         'Image': {
             'model': 'Image',
+            'imodel': 'Image',
             'resource_type': 'Still Image',
             'child': 'File',
         },
         'Audio': {
             'model': 'Audio',
+            'imodel': 'Audio',
             'resource_type': 'Sound',
             'child': 'File',
         },
         'Video': {
             'model': 'Video',
+            'imodel': 'Video',
             'resource_type': 'Moving Image',
             'child': 'File',
         },
         'PDF': {
             'model': 'Digital Document',
+            'imodel': 'Digital Document',
             'resource_type': 'Text',
             'child': 'File',
         },
         'Binary': {
             'model': 'Binary',
+            'imodel': 'Binary',
             'resource_type': 'Unspecified',
             'child': 'File',
         },
         'File': {
             'model': 'File',
+            'imodel': 'File',
             'child': 'None',
         },
     }

From 2d83e102e8c4062130e6aab3c6c2170085f82e98 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 15:56:39 -0500
Subject: [PATCH 05/21] Added additional fields for PDFs and added minimal
 row_data for Simple Images.

---
 scan-batch-dir | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index cd20558..7cd2899 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -823,7 +823,7 @@ def process_file(df, file_path: str, level):
 
         # Get the field_model from JSONAPI
         field_model = get_taxonomy_tid('https://i2.digital.library.pitt.edu','islandora_models',imodel)
-        logger.info(f"File is model: {my_model}, TID: {field_model}")
+        logger.info(f"File is model: {imodel}, TID: {field_model}")
 
         # Process any .tif files.
         if (file_ext.lower() == ".tif"):
@@ -985,6 +985,8 @@ def process_file(df, file_path: str, level):
                 'field_weight': file_weight,
                 'field_model': field_model,
                 'model': imodel,
+                'field_resource_type': resource_type,
+                'level': level,
             }
 
             logger.info(f"Row Data: {row_data}")
@@ -995,6 +997,29 @@ def process_file(df, file_path: str, level):
         # Process any simple image files.
         if (file_ext.lower() == ".png" or file_ext.lower() == ".jpg"):
             print(f"  Type: Simple Image")
+        
+            # Handle Top Level files.
+            if not level == 1:
+                pid = f"{parent_folder}-{file_name}"
+            else:
+                pid = f"{file_name}"
+
+            # Build Row Data.
+            row_data = {
+                'id':   pid,
+                'file': file_path,
+                'field_weight': file_weight,
+                'field_model': field_model,
+                'model': imodel,
+                'field_resource_type': resource_type,
+                'level': level,
+            }
+
+            logger.info(f"Row Data: {row_data}")
+
+            # Update Dataframe.
+            df = add_update_dataframe(df,pid,row_data)
+
 
         print(f"\n")
 
@@ -1378,6 +1403,7 @@ def main():
     print(f"Creating Log file: {log_file}")
     globals()['logger'] = setup_logger('logger', log_file, level=logging.DEBUG)
     logger.info(f"Begin log.")
+    logger.info(f"Running User: {username}")
 
     # Read Content Models - Proposed for later.
     # globals()['content_models'] = read_yaml_file('content_models.yml')

From 0a9261063f8b5beb72272c9d32d40afe81e35f99 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 16:20:16 -0500
Subject: [PATCH 06/21] Added tables to the README.md file.

---
 README.md | 61 +++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 7f1c448..28f4bbf 100644
--- a/README.md
+++ b/README.md
@@ -7,15 +7,23 @@ few types of file directory layouts relating to these types of objects:
 
 - Books
 
+- Compound Books
+
 - Manuscripts
 
-- Newspaper Issues
+- Newspaper/Serial Issues (Paged)
+
+- Newspaper/Serial Issues (PDF)
 
 - Audio (Oral Histories or General Audio files)
 
 - Video (Oral Histories or General Videos)
 
-- Images
+- Image (Single Image)
+
+- PDF (Single File)
+
+- Page (Single Page)
 
 Currently no other object types are addressed, but the script will
 identify and add them as it finds them.
@@ -35,27 +43,40 @@ contents):
 
 Sheet Columns:
 
-  -----------------------------------------------------------------------
-  Required Columns         Description
-  ------------------------ ----------------------------------------------
-  'id'                     The PID of the object. This column must exist.
-
-  'file'                   Leave this empty but the column must exist.
+  |------------------------|----------------------------------------------|
+  |Required Columns        | Description |
+  |------------------------|----------------------------------------------|
+  |'id'                    |The PID of the object. This column must exist.|
+  |---                     |--- |
+  |'file'                  |Leave this empty but the column must exist.
                            This field will be updated by the script with
-                           the full path to the file.
-
-  Optional Columns         
-
-  'thumbnail'              Used for A/V media. If a .jpg or .png file is
+                           the full path to the file.|
+  |---                     |--- |
+  |'model'                 | This column will be added. This is the Islandora 
+						   Model.|
+  |---                     |--- |
+  |'field_model'           |This column will be added. This is the Islanodra
+                           Model Taxonomy ID.|
+  |---                     |--- |
+  |'field_weight'          |This column will be added. This is the order of 
+                           the objects in the sheet if it can be determined.|
+  |---                     |--- |
+  |'field_resource_type'   |This column will be added. This is the Resource 
+						   Type of the object in the sheet.|
+  |---                     |--- |
+
+
+  |------------------------|-----------------------------------------------|
+  |Optional Columns        | Description | 
+  |------------------------|-----------------------------------------------|
+  |'thumbnail'             |Used for A/V media. If a .jpg or .png file is
                            found the full path to the file will be added
-                           to this column.
-
-  'transcript'             Used for A/V media. If a .srt or .vtt file is
+                           to this column.|
+  |---                     |--- |
+  |'transcript'            |Used for A/V media. If a .srt or .vtt file is
                            found the full path to the file will be added
-                           to this column.
-
-                           
-  -----------------------------------------------------------------------
+                           to this column.|
+  |------------------------|-----------------------------------------------|
 
 Script Parameters:
 

From e946d26b539d28fb9a9c5b0bf66d5249c91664dc Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 16:22:32 -0500
Subject: [PATCH 07/21] Updated the tables in the README.md file.

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 28f4bbf..8b8410b 100644
--- a/README.md
+++ b/README.md
@@ -43,9 +43,9 @@ contents):
 
 Sheet Columns:
 
-  |------------------------|----------------------------------------------|
+  |--- |--- |
   |Required Columns        | Description |
-  |------------------------|----------------------------------------------|
+  |--- |--- |
   |'id'                    |The PID of the object. This column must exist.|
   |---                     |--- |
   |'file'                  |Leave this empty but the column must exist.
@@ -66,9 +66,9 @@ Sheet Columns:
   |---                     |--- |
 
 
-  |------------------------|-----------------------------------------------|
+  |--- |--- |
   |Optional Columns        | Description | 
-  |------------------------|-----------------------------------------------|
+  |--- |--- |
   |'thumbnail'             |Used for A/V media. If a .jpg or .png file is
                            found the full path to the file will be added
                            to this column.|
@@ -76,7 +76,7 @@ Sheet Columns:
   |'transcript'            |Used for A/V media. If a .srt or .vtt file is
                            found the full path to the file will be added
                            to this column.|
-  |------------------------|-----------------------------------------------|
+  |--- |--- |
 
 Script Parameters:
 

From cd956a3d37412a6d0d2fb2fad2195fbc6be8fe62 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 16:24:56 -0500
Subject: [PATCH 08/21] Updated the tables in the README.md file.

---
 README.md | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 8b8410b..66f5285 100644
--- a/README.md
+++ b/README.md
@@ -41,9 +41,8 @@ contents):
 
 ## Google Sheet requirements:
 
-Sheet Columns:
+### Sheet Columns:
 
-  |--- |--- |
   |Required Columns        | Description |
   |--- |--- |
   |'id'                    |The PID of the object. This column must exist.|
@@ -52,33 +51,22 @@ Sheet Columns:
                            This field will be updated by the script with
                            the full path to the file.|
   |---                     |--- |
-  |'model'                 | This column will be added. This is the Islandora 
-						   Model.|
-  |---                     |--- |
-  |'field_model'           |This column will be added. This is the Islanodra
-                           Model Taxonomy ID.|
+  |'model'                 |This column will be added. This is the Islandora Model.|
   |---                     |--- |
-  |'field_weight'          |This column will be added. This is the order of 
-                           the objects in the sheet if it can be determined.|
+  |'field_model'           |This column will be added. This is the Islanodra Model Taxonomy ID.|
   |---                     |--- |
-  |'field_resource_type'   |This column will be added. This is the Resource 
-						   Type of the object in the sheet.|
+  |'field_weight'          |This column will be added. This is the order of the objects in the sheet if it can be determined.|
   |---                     |--- |
+  |'field_resource_type'   |This column will be added. This is the Resource Type of the object in the sheet.|
 
 
-  |--- |--- |
   |Optional Columns        | Description | 
   |--- |--- |
-  |'thumbnail'             |Used for A/V media. If a .jpg or .png file is
-                           found the full path to the file will be added
-                           to this column.|
+  |'thumbnail'             |Used for A/V media. If a .jpg or .png file is found the full path to the file will be added to this column.|
   |---                     |--- |
-  |'transcript'            |Used for A/V media. If a .srt or .vtt file is
-                           found the full path to the file will be added
-                           to this column.|
-  |--- |--- |
+  |'transcript'            |Used for A/V media. If a .srt or .vtt file is found the full path to the file will be added to this column.|
 
-Script Parameters:
+### Script Parameters:
 
   -------------------------------------------------------------------------
   Required Parameters       Description

From a6cce3653706fd9c6f5fd67135615dc4115576d4 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 16:26:17 -0500
Subject: [PATCH 09/21] Updated the tables in the README.md file.

---
 README.md | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 66f5285..aa0f930 100644
--- a/README.md
+++ b/README.md
@@ -46,24 +46,15 @@ contents):
   |Required Columns        | Description |
   |--- |--- |
   |'id'                    |The PID of the object. This column must exist.|
-  |---                     |--- |
-  |'file'                  |Leave this empty but the column must exist.
-                           This field will be updated by the script with
-                           the full path to the file.|
-  |---                     |--- |
+  |'file'                  |Leave this empty but the column must exist. This field will be updated by the script with the full path to the file.|
   |'model'                 |This column will be added. This is the Islandora Model.|
-  |---                     |--- |
   |'field_model'           |This column will be added. This is the Islanodra Model Taxonomy ID.|
-  |---                     |--- |
   |'field_weight'          |This column will be added. This is the order of the objects in the sheet if it can be determined.|
-  |---                     |--- |
   |'field_resource_type'   |This column will be added. This is the Resource Type of the object in the sheet.|
 
 
   |Optional Columns        | Description | 
-  |--- |--- |
   |'thumbnail'             |Used for A/V media. If a .jpg or .png file is found the full path to the file will be added to this column.|
-  |---                     |--- |
   |'transcript'            |Used for A/V media. If a .srt or .vtt file is found the full path to the file will be added to this column.|
 
 ### Script Parameters:

From 05ed05cc3b1fcc18a411be39a32ac5beb87ffcb1 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 16:28:05 -0500
Subject: [PATCH 10/21] Updated the tables in the README.md file.

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index aa0f930..c00b3c0 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ contents):
 
 ## Google Sheet requirements:
 
-### Sheet Columns:
+### Sheet Required Columns:
 
   |Required Columns        | Description |
   |--- |--- |
@@ -52,8 +52,10 @@ contents):
   |'field_weight'          |This column will be added. This is the order of the objects in the sheet if it can be determined.|
   |'field_resource_type'   |This column will be added. This is the Resource Type of the object in the sheet.|
 
+### Sheet Optional Columns:
 
   |Optional Columns        | Description | 
+  |--- |--- |
   |'thumbnail'             |Used for A/V media. If a .jpg or .png file is found the full path to the file will be added to this column.|
   |'transcript'            |Used for A/V media. If a .srt or .vtt file is found the full path to the file will be added to this column.|
 

From 90a062479a2051c739ac2318792e7cf32e1c0a73 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Wed, 18 Feb 2026 16:32:31 -0500
Subject: [PATCH 11/21] Updated the tables in the README.md file.

---
 README.md | 34 ++++++++++++----------------------
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index c00b3c0..ae48099 100644
--- a/README.md
+++ b/README.md
@@ -59,31 +59,21 @@ contents):
   |'thumbnail'             |Used for A/V media. If a .jpg or .png file is found the full path to the file will be added to this column.|
   |'transcript'            |Used for A/V media. If a .srt or .vtt file is found the full path to the file will be added to this column.|
 
-### Script Parameters:
+### Script Required Parameters:
 
-  -------------------------------------------------------------------------
-  Required Parameters       Description
-  ------------------------- -----------------------------------------------
-  \--config-file            Full or relative path to the configuration file
-                            used for the script.
-
-  \--log-file               Full or relative path to the log file that will
-                            be generated.
-
-  \--directory              Full path of the directory we wish to scan.
-
-                            
-
-  Optional Parameters       
-
-  \--in-google-sheet-id     The ID number of the Google Sheet.
+  |Required Parameters      | Description |
+  |--- |--- |
+  |\--config-file           |Full or relative path to the configuration file used for the script.|
+  |\--log-file              |Full or relative path to the log file that will be generated.|
+  |\--directory             |Full path of the directory we wish to scan.|
 
-  \--in-google-sheet-name   The Name of the Tab in the Google Sheet (E.g.:
-                            Sheet1)
+### Script Optional Parameters:        
 
-  \--in-google-creds-file   The full or relative path to the Google
-                            Credentials File.
-  -------------------------------------------------------------------------
+  |Optional Parameters      | Description |
+  |--- |--- |
+  |\--in-google-sheet-id    |The ID number of the Google Sheet.|
+  |\--in-google-sheet-name  |The Name of the Tab in the Google Sheet (E.g.: Sheet1)|
+  |\--in-google-creds-file  |The full or relative path to the Google Credentials File.|
 
 ## Google Credentials File:
 

From ea1316c393709b8e6fc6cf02e2099f2a98468d54 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Thu, 26 Feb 2026 13:12:12 -0500
Subject: [PATCH 12/21] Fixed spacing in function call parameters.

---
 scan-batch-dir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 7cd2899..eb0f9f1 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -707,7 +707,7 @@ def add_update_dataframe(df: pd.DataFrame, pid: str, row_data):
             logger.info(f"Adding to DataFrame: Success for PID: {pid}")
             return updated_df
 
-def get_value_from_df(df: pd.DataFrame,match_column: str,match_value,return_column:str):
+def get_value_from_df(df: pd.DataFrame, match_column: str, match_value, return_column: str):
     """
     Given a pandas DataFrame, find the row where match_column == match_value
     and return the value from return_column.

From a7226d5d2cb4a7ee47a434c168d41eeb6edced02 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Thu, 26 Feb 2026 13:15:32 -0500
Subject: [PATCH 13/21] Adjust function documentation to correctly describe the
 return value as being the first value from the return column.

---
 scan-batch-dir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index eb0f9f1..a80b2e5 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -710,7 +710,7 @@ def add_update_dataframe(df: pd.DataFrame, pid: str, row_data):
 def get_value_from_df(df: pd.DataFrame, match_column: str, match_value, return_column: str):
     """
     Given a pandas DataFrame, find the row where match_column == match_value
-    and return the value from return_column.
+    and return the first value from return_column.
 
     Parameters:
         df (pd.DataFrame): The DataFrame to search.

From d2c1561e34ec5a85ebfd4f14e59a9c966b470f57 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Thu, 26 Feb 2026 13:20:32 -0500
Subject: [PATCH 14/21] Added function documentation to process_file.

---
 scan-batch-dir | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/scan-batch-dir b/scan-batch-dir
index a80b2e5..ecf9238 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -785,6 +785,17 @@ def get_taxonomy_tid(base_url, vocabulary, term_name, auth_token=None):
 
 
 def process_file(df, file_path: str, level):
+    """
+    Process the found file.
+
+    Parameters:
+        df: The Pandas dataframe.
+        file_path (str): The path to the file.
+        level: The level of the path in relation to the starting directory.
+
+    Returns:
+        df: The updated dataframe.
+    """
     logger.info(f"Function: process_file")
     logger.info(f"Processing File: {file_path}")
     print(f"Processing File: {file_path}")

From f04de97ca1524fd4faa8e1d9114edfb06295970e Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Thu, 26 Feb 2026 13:46:23 -0500
Subject: [PATCH 15/21] Adding some argument signatures to functions.

---
 scan-batch-dir | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index ecf9238..694bab0 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -784,7 +784,7 @@ def get_taxonomy_tid(base_url, vocabulary, term_name, auth_token=None):
         logger.error(f"get_taxonomy_tid - Error {response.status_code}: {response.text}")
 
 
-def process_file(df, file_path: str, level):
+def process_file(df: pd.DataFrame, file_path: str, level):
     """
     Process the found file.
 
@@ -1037,7 +1037,7 @@ def process_file(df, file_path: str, level):
     return(df)
 
 
-def get_directory(directory):
+def get_directory(directory: str):
     """
     Scan a directory and return a list of files and subdirectories.
 
@@ -1058,7 +1058,7 @@ def get_directory(directory):
     result.sort(key=lambda x: (x[1] != "Directory", x[0].lower()))
     return result
 
-def process_objects(df, directory: str, level):
+def process_objects(df: pd.DataFrame, directory: str, level):
     """
     Process the objects of the directory.
 
@@ -1125,7 +1125,7 @@ def process_objects(df, directory: str, level):
 
     return df
 
-def get_model_paths(start_model, models_dict):
+def get_model_paths(start_model: str, models_dict: dict):
     """
     Recursively map paths from a starting model to all leaf models.
 
@@ -1152,7 +1152,7 @@ def get_model_paths(start_model, models_dict):
     child_paths = get_model_paths(child, models_dict)
     return [[model] + path for path in child_paths]
 
-def get_model_info(model,models_dict):
+def get_model_info(model: str,models_dict: dict):
     # Check if the starting model exists
     if model not in models_dict:
         return []
@@ -1160,7 +1160,7 @@ def get_model_info(model,models_dict):
     model_info = models_dict[model]
     return model_info
 
-def get_model(level):
+def get_model(level:int):
     #if model_paths[0][level-1] == "File":
         # Return parent model
     #    return model_paths[0][level-2]
@@ -1172,11 +1172,11 @@ def get_model(level):
     else:
         return model_paths[0][level-1]
 
-def get_resource_type(level):
+def get_resource_type(level:int):
     return 0
 
 
-def is_target_in_list(target,list):
+def is_target_in_list(target, list):
     return target in list
 
 def get_value(map, key, default=None):

From 4726124c0dc830e5b5f67e7b8c4be29292157e9b Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Thu, 26 Feb 2026 13:54:51 -0500
Subject: [PATCH 16/21] Removed some unused functions.

---
 scan-batch-dir | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 694bab0..8323606 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -1172,16 +1172,9 @@ def get_model(level:int):
     else:
         return model_paths[0][level-1]
 
-def get_resource_type(level:int):
-    return 0
-
-
 def is_target_in_list(target, list):
     return target in list
 
-def get_value(map, key, default=None):
-    return map.get(key, default)
-
 def add_column(df: pd.DataFrame, column_name: str, default=None):
     if column_name not in df.columns:
         df[column_name] = default

From 736b240c1761caebf99188f3e1c90b29c93126dc Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Thu, 26 Feb 2026 15:49:04 -0500
Subject: [PATCH 17/21] Added the missing $ in the regex.

---
 scan-batch-dir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 8323606..639054e 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -813,7 +813,7 @@ def process_file(df: pd.DataFrame, file_path: str, level):
             if file_name.isdigit():
                 file_weight = int(file_name)
             else:
-                pattern = r'.*-(\d{4})'
+                pattern = r'.*-(\d{4})$'
                 match = re.search(pattern,file_name)
                 if match:
                     file_weight = int(match.group(1))

From 607681e67c80b20677fb03cb5e612f5a024ee23d Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Thu, 26 Feb 2026 15:56:59 -0500
Subject: [PATCH 18/21] Remove function in preference for in-line code.

---
 scan-batch-dir | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 639054e..1a24009 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -1172,9 +1172,6 @@ def get_model(level:int):
     else:
         return model_paths[0][level-1]
 
-def is_target_in_list(target, list):
-    return target in list
-
 def add_column(df: pd.DataFrame, column_name: str, default=None):
     if column_name not in df.columns:
         df[column_name] = default
@@ -1415,7 +1412,7 @@ def main():
     # Get Batch Model.
     globals()['model'] = args.model
     logger.info(f"Batch Model: {model}")
-    if not is_target_in_list(model.lower(), allowed_models):
+    if not (model.lower() in allowed_models):
         logger.error(f"Model: {model} is not in the list of allowed_models.")
         print(f"Model: {model} is not in the list of allowed_models.")
 

From 91d8a30ee94f2522dae45c4c2f7680c3770ae8c6 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Thu, 26 Feb 2026 16:02:26 -0500
Subject: [PATCH 19/21] Removed unused function dump_df_columns.

---
 scan-batch-dir | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index 1a24009..ba8734a 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -650,29 +650,6 @@ def get_file_info(file_path: str):
     return(pid,parent,file,dir,ext)
 
 
-def dump_df_columns(df: pd.DataFrame, columns: list):
-    """
-    Prints a DataFrame containing only the specified columns.
-
-    Parameters:
-        df (pd.DataFrame): The original DataFrame.
-        columns (list): List of column names to include.
-
-    Raises:
-        ValueError: If any requested column does not exist in the DataFrame.
-    """
-    # Validate columns
-    missing_cols = [col for col in columns if col not in df.columns]
-    if missing_cols:
-        logger.error(f"The following columns do not exist in the DataFrame: {missing_cols}")
-
-    # Prints the DataFrame with selected columns
-    with pd.option_context(
-        'display.max_rows', None,
-    ):
-        print(f"Dataframe:\n{df[columns]}")
-
-
 def add_update_dataframe(df: pd.DataFrame, pid: str, row_data):
     """
     Depending upon if the pid exists in the 'id' column of the dataframe

From fb5ca84faf61fe57905f7c90e40d5044c7184502 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Fri, 27 Feb 2026 10:13:31 -0500
Subject: [PATCH 20/21] Moved skip patterns to the config file to allow for
 customization.

---
 scan-batch-dir             | 12 ++++++++----
 scan-batch-dir.conf-sample |  3 +++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/scan-batch-dir b/scan-batch-dir
index ba8734a..1449f08 100755
--- a/scan-batch-dir
+++ b/scan-batch-dir
@@ -1358,6 +1358,7 @@ def main():
     globals()['google_sheet_id']     = cfg['google_sheet_id']
     globals()['google_sheet_name']   = cfg['google_sheet_name']
     globals()['log_file']            = cfg['log_file']
+    globals()['skip']                = cfg['skip'].split(",")
 
     # Override config file variables with command line parameters.
     if args.in_gs_creds is not None:
@@ -1373,16 +1374,19 @@ def main():
     globals()['required_columns'] = ["id","file","level","parent_id",
         "field_weight","field_model","model","field_resource_type","transcript"]
 
-    # Global file patterns to skip over.
-    globals()['skip'] = ["ignore",".jp2",".metadata","meta",".opex",".fits",
-        "target.tif","metadata.csv","metadata.xlsx","manifest.xlsx","manifest.csv"]
-
     # Create the Log file.
     print(f"Creating Log file: {log_file}")
     globals()['logger'] = setup_logger('logger', log_file, level=logging.DEBUG)
     logger.info(f"Begin log.")
     logger.info(f"Running User: {username}")
 
+    # Log running config values.
+    logger.info(f"Log File: {log_file}")
+    logger.info(f"Skip Patterns: {skip}")
+    logger.info(f"Google Credentials: {google_credentials}")
+    logger.info(f"Google Sheet ID: {google_sheet_id}")
+    logger.info(f"Google Sheet Name: {google_sheet_name}")
+
     # Read Content Models - Proposed for later.
     # globals()['content_models'] = read_yaml_file('content_models.yml')
 
diff --git a/scan-batch-dir.conf-sample b/scan-batch-dir.conf-sample
index 9ed7d6d..a6efd3e 100644
--- a/scan-batch-dir.conf-sample
+++ b/scan-batch-dir.conf-sample
@@ -14,3 +14,6 @@ google_sheet_name: Sheet1
 # Log file
 log_file: /path/to/file.log
 
+#--------------------------------------------------------------------
+# Skip patterns - Directory/File patterns to ignored.
+skip: ignore,meta,.jp2,.metadata,.opex,.fits,target.tif,metadata.csv,metadata.xlsx,manifest.xlsx,manifest.csv

From e356d520e86ef4e3cd3e21faf4a94ad170713867 Mon Sep 17 00:00:00 2001
From: Brian Gregg <bdgregg@pitt.edu>
Date: Fri, 27 Feb 2026 10:18:42 -0500
Subject: [PATCH 21/21] Updated the README.md file to address the \'skip\'
 parameter.

---
 README.md | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/README.md b/README.md
index ae48099..b793839 100644
--- a/README.md
+++ b/README.md
@@ -28,16 +28,7 @@ few types of file directory layouts relating to these types of objects:
 Currently no other object types are addressed, but the script will
 identify and add them as it finds them.
 
-The script will ignore the following files and directories (including
-contents):
-
-- Directory named 'ignore'
-
-- File named 'manuscript.csv'
-
-- File named 'manuscript.xls'
-
-- File named 'manuscript.xlsx'
+The script will ignore the files and directories (including contents) of the configuration file 'skip' parameter. This is a comma separated list of patterns that the script will not process.  These are file patterns that you do not want to process.
 
 ## Google Sheet requirements: