From d21ef3b1988294f078cc1c0264a973426918d799 Mon Sep 17 00:00:00 2001 From: Brennen Heinrich Date: Sat, 15 Feb 2025 19:57:13 -0600 Subject: [PATCH] feat: Add verbose flag (-v, --verbose) for enhanced system messages - Introduced a global `Verbose` flag in `config.go` to manage debug output. - Registered `--verbose` (`-v`) flag in `main.go` for user control. - Modified `Load()` in `config.go` to conditionally display `PrintNonEmptyFields()` output when `Verbose` is enabled. - Updated CLI tests in `main_test.go` to validate proper handling of CLI arguments. - Improved `TestCLIOverrides` to dynamically verify CLI argument overrides. - Standardized flag registration and execution flow for better maintainability. - Ensured verbose output is globally accessible without needing to pass it through function calls. This change improves debugging by allowing users to toggle detailed output while keeping default behavior concise. --- cmd/scrapeycli/main.go | 49 +++++++++--- cmd/scrapeycli/main_test.go | 82 +++++++++++++------- pkg/config/config.go | 146 +++++++++++++++++++++++++++++++++--- pkg/config/config_test.go | 139 ++++++++++++++++++++++++++++------ 4 files changed, 343 insertions(+), 73 deletions(-) diff --git a/cmd/scrapeycli/main.go b/cmd/scrapeycli/main.go index c5fc429..3cd3f9e 100644 --- a/cmd/scrapeycli/main.go +++ b/cmd/scrapeycli/main.go @@ -1,5 +1,3 @@ -// File: cmd/scrapeycli/main.go - package main import ( @@ -16,34 +14,51 @@ Global variables for storing command-line arguments. - configPath: The path to the configuration file. - url: The URL to be scraped, which may override the URL in the config. +- maxDepth: Overrides the scraping depth if set. +- rateLimit: Overrides the request rate limit. +- verbose: Enables verbose output. */ var ( configPath string url string + maxDepth int + rateLimit float64 + verbose bool ) /* init registers command-line flags for configuration. -It sets up two flags for the config file ("config" and its shorthand "c") -and a flag for the URL override. +It sets up flags for: +- The config file ("config" and its shorthand "c"). +- URL override. +- Scraping depth override. +- Rate limit override. +- Verbose output ("verbose" and its shorthand "v"). */ func init() { flag.StringVar(&configPath, "config", "", "Path to config file") flag.StringVar(&configPath, "c", "", "Path to config file (shorthand)") flag.StringVar(&url, "url", "", "URL to scrape (overrides config)") + flag.IntVar(&maxDepth, "maxDepth", 0, "Override max crawl depth") + flag.Float64Var(&rateLimit, "rateLimit", 0, "Override request rate limit (seconds)") + flag.BoolVar(&verbose, "verbose", false, "Enable verbose output") + flag.BoolVar(&verbose, "v", false, "Enable verbose output (shorthand)") } /* main is the entry point of Scrapey CLI. It parses command-line flags, prints a welcome message, loads the configuration, -handles URL overrides, and prints confirmation messages for each step. +handles overrides, and prints confirmation messages for each step. */ func main() { // Parse CLI flags. flag.Parse() + // Store the verbose flag in global state + config.Verbose = verbose + // Print a welcome message in cyan using our PrintColored utility. utils.PrintColored("Welcome to Scrapey CLI!", "", color.FgCyan) @@ -60,16 +75,28 @@ func main() { os.Exit(1) } - // If a URL is provided via the command line, override the configuration's base URL. + // Construct a partial Config struct for CLI overrides. + cliOverrides := config.Config{} + + // Apply URL override if provided. if url != "" { - utils.PrintColored("Overriding config with URL flag: ", url, color.FgHiMagenta) - cfg.URL.Base = url + cliOverrides.URL.Base = url } - // Print confirmation of loaded config. - utils.PrintColored("Loaded config from: ", configPath, color.FgHiGreen) + // Apply maxDepth override if provided. + if maxDepth > 0 { + cliOverrides.ScrapingOptions.MaxDepth = maxDepth + } - // Indicate that initialization is complete by printing a success message in green. + // Apply rateLimit override if provided. + if rateLimit > 0 { + cliOverrides.ScrapingOptions.RateLimit = rateLimit + } + + // Apply all CLI overrides dynamically. + cfg.OverrideWithCLI(cliOverrides) + + // Print confirmation of loaded config. utils.PrintColored("Scrapey CLI initialization complete.", "", color.FgGreen) // Print which routes will be scraped. diff --git a/cmd/scrapeycli/main_test.go b/cmd/scrapeycli/main_test.go index 6a4488e..f160fc5 100644 --- a/cmd/scrapeycli/main_test.go +++ b/cmd/scrapeycli/main_test.go @@ -15,7 +15,7 @@ It sets the working directory to the project root (two levels up from cmd/scrape and returns the combined output along with any error. Parameters: - - t: The current testing context (not used directly, but conforms to typical test helper function signatures). + - t: The current testing context. - args: A variadic list of arguments to be passed to the go run command. Usage: @@ -31,39 +31,39 @@ func runMainCommand(_ *testing.T, args ...string) (string, error) { /* TestFlagRegistration verifies that all necessary command-line flags are properly registered. -The application depends on these flags for configuration input and URL overrides. - -Checks: - - "config" and "c" flags - - "url" flag +The application depends on these flags for configuration input and CLI overrides. */ func TestFlagRegistration(t *testing.T) { - if f := flag.Lookup("config"); f == nil { - t.Error("Expected flag 'config' to be registered") - } - if f := flag.Lookup("c"); f == nil { - t.Error("Expected shorthand flag 'c' to be registered") - } - if f := flag.Lookup("url"); f == nil { - t.Error("Expected flag 'url' to be registered") + expectedFlags := []string{"config", "c", "url", "maxDepth", "rateLimit"} + for _, flagName := range expectedFlags { + if f := flag.Lookup(flagName); f == nil { + t.Errorf("Expected flag '%s' to be registered", flagName) + } } } /* -TestMainExecution runs the main program with a valid configuration file and checks for the expected output. +TestMainExecution runs the main program with a valid configuration file +and ensures it initializes correctly. */ func TestMainExecution(t *testing.T) { - output, err := runMainCommand(t, "--config", "configs/default.json") + output, err := runMainCommand(t) if err != nil { t.Fatalf("Failed to run main.go: %v\nOutput: %s", err, output) } - if !strings.Contains(output, "Welcome to Scrapey CLI!") { - t.Errorf("Expected welcome message not found in output.\nOutput: %s", output) + // Define expected phrases used multiple times + requiredPhrases := []string{ + "Welcome to Scrapey CLI!", + "Scrapey CLI initialization complete.", + "Base URL: https://example.com", } - if !strings.Contains(output, "Base URL: https://example.com") { - t.Errorf("Expected base URL output not found.\nOutput: %s", output) + // Validate presence of required phrases + for _, phrase := range requiredPhrases { + if !strings.Contains(output, phrase) { + t.Errorf("Expected output to contain '%s'.\nOutput: %s", phrase, output) + } } } @@ -77,6 +77,7 @@ func TestMainConfigFailure(t *testing.T) { t.Fatalf("Expected failure due to config load error, but got success") } + // Validate correct exit behavior if exitErr, ok := err.(*exec.ExitError); ok { if exitErr.ExitCode() != 1 { t.Errorf("Expected exit code 1, got %d", exitErr.ExitCode()) @@ -87,17 +88,42 @@ func TestMainConfigFailure(t *testing.T) { } /* -TestURLOverride verifies that specifying a URL via CLI correctly overrides the Base URL. +TestCLIOverrides verifies that CLI arguments correctly override the configuration. + +It ensures that: + - The base URL can be overridden. + - Scraping depth (maxDepth) can be overridden. + - Rate limit can be overridden. + +The test **does not rely on exact print statements** to avoid fragility. */ -func TestURLOverride(t *testing.T) { - output, err := runMainCommand(t, "--config", "configs/default.json", "--url", "https://example.org") +func TestCLIOverrides(t *testing.T) { + // CLI argument values (used multiple times) + newBaseURL := "https://cli-example.com" + newMaxDepth := "10" + newRateLimit := "2.5" + + // Run command + output, err := runMainCommand(t, + "--url", newBaseURL, + "--maxDepth", newMaxDepth, + "--rateLimit", newRateLimit, + ) if err != nil { - t.Fatalf("Failed to run main.go with URL override: %v\nOutput: %s", err, output) + t.Fatalf("Failed to run main.go with CLI overrides: %v\nOutput: %s", err, output) } - if !strings.Contains(output, "Overriding config with URL flag:") { - t.Errorf("Expected URL override message not found in output.\nOutput: %s", output) + + // Expected CLI override outputs (used multiple times) + expectedOutputs := map[string]string{ + "Base URL: ": newBaseURL, + "ScrapingOptions.MaxDepth: ": newMaxDepth, + "ScrapingOptions.RateLimit: ": newRateLimit, } - if !strings.Contains(output, "Base URL: https://example.org") { - t.Errorf("Expected overridden URL not found in output.\nOutput: %s", output) + + // Validate overrides dynamically + for key, expected := range expectedOutputs { + if !strings.Contains(output, key+expected) { + t.Errorf("Expected override '%s%s' not found in output.\nOutput: %s", key, expected, output) + } } } diff --git a/pkg/config/config.go b/pkg/config/config.go index 040ee4f..175f3de 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -6,11 +6,20 @@ import ( "encoding/json" "fmt" "os" + "reflect" "github.com/fatih/color" "github.com/heinrichb/scrapey-cli/pkg/utils" ) +/* +Global Verbose flag. + +This flag determines whether verbose output is enabled. +It is set in `main.go` and used throughout the application. +*/ +var Verbose bool + /* Config holds configuration data used by Scrapey CLI. @@ -55,6 +64,48 @@ type Config struct { } `json:"dataFormatting"` } +/* +ApplyDefaults populates missing fields in the Config struct with default values. + +Usage: + + cfg.ApplyDefaults() + +Notes: + - Ensures that a missing Base URL defaults to "https://example.com". + - Sets default scraping and storage parameters. + - Provides a sensible fallback for all configurable values. +*/ +func (cfg *Config) ApplyDefaults() { + if cfg.URL.Base == "" { + cfg.URL.Base = "https://example.com" + } + if len(cfg.URL.Routes) == 0 { + cfg.URL.Routes = []string{"/"} + } + if cfg.ScrapingOptions.MaxDepth == 0 { + cfg.ScrapingOptions.MaxDepth = 2 + } + if cfg.ScrapingOptions.RateLimit == 0 { + cfg.ScrapingOptions.RateLimit = 1.5 + } + if cfg.ScrapingOptions.RetryAttempts == 0 { + cfg.ScrapingOptions.RetryAttempts = 3 + } + if cfg.ScrapingOptions.UserAgent == "" { + cfg.ScrapingOptions.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + } + if len(cfg.Storage.OutputFormats) == 0 { + cfg.Storage.OutputFormats = []string{"json"} + } + if cfg.Storage.SavePath == "" { + cfg.Storage.SavePath = "output/" + } + if cfg.Storage.FileName == "" { + cfg.Storage.FileName = "scraped_data" + } +} + /* Load reads configuration data from the specified filePath. @@ -72,33 +123,108 @@ Usage: // Handle error } // Use cfg to configure the application. - -Notes: - - This function uses os.ReadFile to read the file. - - It prints a confirmation message in high-intensity green using the PrintColored utility. - - It then calls PrintNonEmptyFields from the utils package to display non-empty config fields. */ func Load(filePath string) (*Config, error) { if _, err := os.Stat(filePath); os.IsNotExist(err) { return nil, fmt.Errorf("config file %s does not exist", filePath) } - // Print confirmation that the config was loaded, using our PrintColored utility. utils.PrintColored("Loaded config from: ", filePath, color.FgHiGreen) - // Read file contents using os.ReadFile. content, err := os.ReadFile(filePath) if err != nil { return nil, fmt.Errorf("failed to read config file: %v", err) } - // Unmarshal JSON into a Config struct. var cfg Config if err := json.Unmarshal(content, &cfg); err != nil { return nil, fmt.Errorf("invalid JSON in config file: %v", err) } - // Print non-empty configuration fields using a utility function. - utils.PrintNonEmptyFields("", cfg) + // Apply default values where necessary. + cfg.ApplyDefaults() + + // **Verbose Mode: Print Non-Empty Fields** + if Verbose { + utils.PrintNonEmptyFields("", cfg) + } + return &cfg, nil } + +/* +OverrideWithCLI dynamically overrides config values based on the provided `overrides` struct. + +Parameters: + - overrides: A partial Config struct containing only the fields to override. + +Usage: + + cfg.OverrideWithCLI(Config{ + URL: struct { + Base string `json:"base"` + Routes []string `json:"routes"` + IncludeBase bool `json:"includeBase"` + }{ + Base: "https://example.org", + }, + ScrapingOptions: struct { + MaxDepth int `json:"maxDepth"` + RateLimit float64 `json:"rateLimit"` + RetryAttempts int `json:"retryAttempts"` + UserAgent string `json:"userAgent"` + }{ + MaxDepth: 5, + }, + }) + +Notes: + - Only **non-zero** values in `overrides` are applied. + - Uses **reflection** to dynamically override values while maintaining type safety. +*/ +func (cfg *Config) OverrideWithCLI(overrides Config) { + cfgValue := reflect.ValueOf(cfg).Elem() + overridesValue := reflect.ValueOf(overrides) + + for i := 0; i < overridesValue.NumField(); i++ { + field := overridesValue.Type().Field(i) + overrideField := overridesValue.Field(i) + configField := cfgValue.FieldByName(field.Name) + + if !configField.IsValid() || !configField.CanSet() { + continue + } + + if overrideField.Kind() == reflect.Struct { + for j := 0; j < overrideField.NumField(); j++ { + subField := overrideField.Type().Field(j) + overrideSubField := overrideField.Field(j) + configSubField := configField.FieldByName(subField.Name) + + if !configSubField.IsValid() || !configSubField.CanSet() { + continue + } + + // **Skip empty slices** + if overrideSubField.Kind() == reflect.Slice && overrideSubField.Len() == 0 { + continue + } + + if !overrideSubField.IsZero() { + utils.PrintColored(fmt.Sprintf("Overriding %s.%s: ", field.Name, subField.Name), fmt.Sprint(overrideSubField.Interface()), color.FgHiMagenta) + configSubField.Set(overrideSubField) + } + } + } else { + // **Skip empty slices** + if overrideField.Kind() == reflect.Slice && overrideField.Len() == 0 { + continue + } + + if !overrideField.IsZero() { + utils.PrintColored(fmt.Sprintf("Overriding %s: ", field.Name), fmt.Sprint(overrideField.Interface()), color.FgHiMagenta) + configField.Set(overrideField) + } + } + } +} diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 741a4dc..2a0fcee 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -4,16 +4,14 @@ package config import ( "os" + "reflect" "testing" ) /* -TestLoadValidConfig creates a temporary file with valid JSON configuration data, -writes a valid Base URL value, and attempts to load the configuration using Load. -It verifies that the returned Config object contains the expected Base URL. +TestLoadValidConfig ensures that loading a valid config file works as expected. */ func TestLoadValidConfig(t *testing.T) { - // Create a temporary file with valid JSON using os.CreateTemp. tmpFile, err := os.CreateTemp("", "valid_config_*.json") if err != nil { t.Fatalf("Failed to create temp file: %v", err) @@ -24,49 +22,51 @@ func TestLoadValidConfig(t *testing.T) { if _, err := tmpFile.Write([]byte(validJSON)); err != nil { t.Fatalf("Failed to write to temp file: %v", err) } - tmpFile.Close() // Close the file so it can be read by Load. + tmpFile.Close() - // Load the configuration from the temporary file. cfg, err := Load(tmpFile.Name()) if err != nil { t.Fatalf("Expected valid config, got error: %v", err) } - // Check that the Base URL in the configuration matches the expected value. - if cfg.URL.Base != "http://example.org" { - t.Errorf("Expected Base URL 'http://example.org', got '%s'", cfg.URL.Base) + expectedBaseURL := "http://example.org" + expectedRoute := "/test" + + if cfg.URL.Base != expectedBaseURL { + t.Errorf("Expected Base URL '%s', got '%s'", expectedBaseURL, cfg.URL.Base) } - // Check that the IncludeBase field is set correctly. if !cfg.URL.IncludeBase { t.Errorf("Expected IncludeBase to be true, got false") } - // Ensure at least one route exists. - if len(cfg.URL.Routes) == 0 || cfg.URL.Routes[0] != "/test" { - t.Errorf("Expected routes to include '/test', got %v", cfg.URL.Routes) + if len(cfg.URL.Routes) == 0 || cfg.URL.Routes[0] != expectedRoute { + t.Errorf("Expected routes to include '%s', got %v", expectedRoute, cfg.URL.Routes) } } /* -TestLoadNonexistentFile attempts to load a configuration from a non-existent file -and verifies that Load returns an error. +TestLoadEmptyFile ensures that loading an empty config file does not cause an unexpected crash. */ -func TestLoadNonexistentFile(t *testing.T) { - // Attempt to load a config from a non-existent file. - _, err := Load("nonexistent_file.json") +func TestLoadEmptyFile(t *testing.T) { + tmpFile, err := os.CreateTemp("", "empty_config_*.json") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + tmpFile.Close() // Empty file + + _, err = Load(tmpFile.Name()) if err == nil { - t.Fatalf("Expected error for non-existent file, got nil") + t.Fatalf("Expected an error for empty config file, got nil") } } /* -TestLoadInvalidJSON creates a temporary file with invalid JSON content -(missing a closing brace) and attempts to load the configuration. -The test confirms that an error is returned due to invalid JSON. +TestLoadInvalidJSON ensures that a badly formatted JSON file correctly returns an error. */ func TestLoadInvalidJSON(t *testing.T) { - // Create a temporary file with invalid JSON using os.CreateTemp. tmpFile, err := os.CreateTemp("", "invalid_config_*.json") if err != nil { t.Fatalf("Failed to create temp file: %v", err) @@ -79,9 +79,100 @@ func TestLoadInvalidJSON(t *testing.T) { } tmpFile.Close() - // Attempt to load the configuration from the temporary file. _, err = Load(tmpFile.Name()) if err == nil { t.Fatalf("Expected error for invalid JSON, got nil") } } + +/* +TestApplyDefaults verifies that ApplyDefaults correctly sets default values. +*/ +func TestApplyDefaults(t *testing.T) { + cfg := &Config{} + cfg.ApplyDefaults() + + expectedBaseURL := "https://example.com" + expectedOutputFormat := "json" + + if cfg.URL.Base != expectedBaseURL { + t.Errorf("Expected default Base URL '%s', got '%s'", expectedBaseURL, cfg.URL.Base) + } + if len(cfg.Storage.OutputFormats) == 0 || cfg.Storage.OutputFormats[0] != expectedOutputFormat { + t.Errorf("Expected default output format '%s', got %v", expectedOutputFormat, cfg.Storage.OutputFormats) + } +} + +/* +TestOverrideWithCLI ensures that OverrideWithCLI dynamically updates config values. +*/ +func TestOverrideWithCLI(t *testing.T) { + cfg := &Config{} + cfg.ApplyDefaults() + + newBaseURL := "https://cli-example.com" + newRoutes := []string{"/cli-route1", "/cli-route2"} + newMaxDepth := 10 + newUserAgent := "Custom CLI UserAgent" + + overrides := Config{ + URL: struct { + Base string `json:"base"` + Routes []string `json:"routes"` + IncludeBase bool `json:"includeBase"` + }{ + Base: newBaseURL, + Routes: newRoutes, + IncludeBase: true, + }, + ScrapingOptions: struct { + MaxDepth int `json:"maxDepth"` + RateLimit float64 `json:"rateLimit"` + RetryAttempts int `json:"retryAttempts"` + UserAgent string `json:"userAgent"` + }{ + MaxDepth: newMaxDepth, + UserAgent: newUserAgent, + }, + } + + cfg.OverrideWithCLI(overrides) + + if cfg.URL.Base != newBaseURL { + t.Errorf("Expected Base URL to be overridden to '%s', got '%s'", newBaseURL, cfg.URL.Base) + } + + if len(cfg.URL.Routes) != len(newRoutes) || cfg.URL.Routes[0] != newRoutes[0] || cfg.URL.Routes[1] != newRoutes[1] { + t.Errorf("Expected Routes to be '%v', got '%v'", newRoutes, cfg.URL.Routes) + } + + if !cfg.URL.IncludeBase { + t.Errorf("Expected IncludeBase to be true, got false") + } + + if cfg.ScrapingOptions.MaxDepth != newMaxDepth { + t.Errorf("Expected MaxDepth to be overridden to %d, got '%d'", newMaxDepth, cfg.ScrapingOptions.MaxDepth) + } + + if cfg.ScrapingOptions.UserAgent != newUserAgent { + t.Errorf("Expected UserAgent to be '%s', got '%s'", newUserAgent, cfg.ScrapingOptions.UserAgent) + } +} + +/* +TestOverrideWithEmptyCLI ensures that OverrideWithCLI does nothing if no values are provided. +*/ +func TestOverrideWithEmptyCLI(t *testing.T) { + cfg := &Config{} + cfg.ApplyDefaults() + + // Make a deep copy of the original config for comparison. + originalConfig := *cfg + + cfg.OverrideWithCLI(Config{}) // Pass an empty config override + + // Use reflect.DeepEqual to compare struct contents. + if !reflect.DeepEqual(*cfg, originalConfig) { + t.Errorf("Expected config to remain unchanged when empty overrides are applied.\nExpected: %+v\nGot: %+v", originalConfig, *cfg) + } +}