Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 38 additions & 11 deletions cmd/scrapeycli/main.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// File: cmd/scrapeycli/main.go

package main

import (
Expand All @@ -16,34 +14,51 @@ Global variables for storing command-line arguments.

- configPath: The path to the configuration file.
- url: The URL to be scraped, which may override the URL in the config.
- maxDepth: Overrides the scraping depth if set.
- rateLimit: Overrides the request rate limit.
- verbose: Enables verbose output.
*/
var (
configPath string
url string
maxDepth int
rateLimit float64
verbose bool
)

/*
init registers command-line flags for configuration.

It sets up two flags for the config file ("config" and its shorthand "c")
and a flag for the URL override.
It sets up flags for:
- The config file ("config" and its shorthand "c").
- URL override.
- Scraping depth override.
- Rate limit override.
- Verbose output ("verbose" and its shorthand "v").
*/
func init() {
flag.StringVar(&configPath, "config", "", "Path to config file")
flag.StringVar(&configPath, "c", "", "Path to config file (shorthand)")
flag.StringVar(&url, "url", "", "URL to scrape (overrides config)")
flag.IntVar(&maxDepth, "maxDepth", 0, "Override max crawl depth")
flag.Float64Var(&rateLimit, "rateLimit", 0, "Override request rate limit (seconds)")
flag.BoolVar(&verbose, "verbose", false, "Enable verbose output")
flag.BoolVar(&verbose, "v", false, "Enable verbose output (shorthand)")
}

/*
main is the entry point of Scrapey CLI.

It parses command-line flags, prints a welcome message, loads the configuration,
handles URL overrides, and prints confirmation messages for each step.
handles overrides, and prints confirmation messages for each step.
*/
func main() {
// Parse CLI flags.
flag.Parse()

// Store the verbose flag in global state
config.Verbose = verbose

// Print a welcome message in cyan using our PrintColored utility.
utils.PrintColored("Welcome to Scrapey CLI!", "", color.FgCyan)

Expand All @@ -60,16 +75,28 @@ func main() {
os.Exit(1)
}

// If a URL is provided via the command line, override the configuration's base URL.
// Construct a partial Config struct for CLI overrides.
cliOverrides := config.Config{}

// Apply URL override if provided.
if url != "" {
utils.PrintColored("Overriding config with URL flag: ", url, color.FgHiMagenta)
cfg.URL.Base = url
cliOverrides.URL.Base = url
}

// Print confirmation of loaded config.
utils.PrintColored("Loaded config from: ", configPath, color.FgHiGreen)
// Apply maxDepth override if provided.
if maxDepth > 0 {
cliOverrides.ScrapingOptions.MaxDepth = maxDepth
}

// Indicate that initialization is complete by printing a success message in green.
// Apply rateLimit override if provided.
if rateLimit > 0 {
cliOverrides.ScrapingOptions.RateLimit = rateLimit
}

// Apply all CLI overrides dynamically.
cfg.OverrideWithCLI(cliOverrides)

// Print confirmation of loaded config.
utils.PrintColored("Scrapey CLI initialization complete.", "", color.FgGreen)

// Print which routes will be scraped.
Expand Down
82 changes: 54 additions & 28 deletions cmd/scrapeycli/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ It sets the working directory to the project root (two levels up from cmd/scrape
and returns the combined output along with any error.

Parameters:
- t: The current testing context (not used directly, but conforms to typical test helper function signatures).
- t: The current testing context.
- args: A variadic list of arguments to be passed to the go run command.

Usage:
Expand All @@ -31,39 +31,39 @@ func runMainCommand(_ *testing.T, args ...string) (string, error) {

/*
TestFlagRegistration verifies that all necessary command-line flags are properly registered.
The application depends on these flags for configuration input and URL overrides.

Checks:
- "config" and "c" flags
- "url" flag
The application depends on these flags for configuration input and CLI overrides.
*/
func TestFlagRegistration(t *testing.T) {
if f := flag.Lookup("config"); f == nil {
t.Error("Expected flag 'config' to be registered")
}
if f := flag.Lookup("c"); f == nil {
t.Error("Expected shorthand flag 'c' to be registered")
}
if f := flag.Lookup("url"); f == nil {
t.Error("Expected flag 'url' to be registered")
expectedFlags := []string{"config", "c", "url", "maxDepth", "rateLimit"}
for _, flagName := range expectedFlags {
if f := flag.Lookup(flagName); f == nil {
t.Errorf("Expected flag '%s' to be registered", flagName)
}
}
}

/*
TestMainExecution runs the main program with a valid configuration file and checks for the expected output.
TestMainExecution runs the main program with a valid configuration file
and ensures it initializes correctly.
*/
func TestMainExecution(t *testing.T) {
output, err := runMainCommand(t, "--config", "configs/default.json")
output, err := runMainCommand(t)
if err != nil {
t.Fatalf("Failed to run main.go: %v\nOutput: %s", err, output)
}

if !strings.Contains(output, "Welcome to Scrapey CLI!") {
t.Errorf("Expected welcome message not found in output.\nOutput: %s", output)
// Define expected phrases used multiple times
requiredPhrases := []string{
"Welcome to Scrapey CLI!",
"Scrapey CLI initialization complete.",
"Base URL: https://example.com",
}

if !strings.Contains(output, "Base URL: https://example.com") {
t.Errorf("Expected base URL output not found.\nOutput: %s", output)
// Validate presence of required phrases
for _, phrase := range requiredPhrases {
if !strings.Contains(output, phrase) {
t.Errorf("Expected output to contain '%s'.\nOutput: %s", phrase, output)
}
}
}

Expand All @@ -77,6 +77,7 @@ func TestMainConfigFailure(t *testing.T) {
t.Fatalf("Expected failure due to config load error, but got success")
}

// Validate correct exit behavior
if exitErr, ok := err.(*exec.ExitError); ok {
if exitErr.ExitCode() != 1 {
t.Errorf("Expected exit code 1, got %d", exitErr.ExitCode())
Expand All @@ -87,17 +88,42 @@ func TestMainConfigFailure(t *testing.T) {
}

/*
TestURLOverride verifies that specifying a URL via CLI correctly overrides the Base URL.
TestCLIOverrides verifies that CLI arguments correctly override the configuration.

It ensures that:
- The base URL can be overridden.
- Scraping depth (maxDepth) can be overridden.
- Rate limit can be overridden.

The test **does not rely on exact print statements** to avoid fragility.
*/
func TestURLOverride(t *testing.T) {
output, err := runMainCommand(t, "--config", "configs/default.json", "--url", "https://example.org")
func TestCLIOverrides(t *testing.T) {
// CLI argument values (used multiple times)
newBaseURL := "https://cli-example.com"
newMaxDepth := "10"
newRateLimit := "2.5"

// Run command
output, err := runMainCommand(t,
"--url", newBaseURL,
"--maxDepth", newMaxDepth,
"--rateLimit", newRateLimit,
)
if err != nil {
t.Fatalf("Failed to run main.go with URL override: %v\nOutput: %s", err, output)
t.Fatalf("Failed to run main.go with CLI overrides: %v\nOutput: %s", err, output)
}
if !strings.Contains(output, "Overriding config with URL flag:") {
t.Errorf("Expected URL override message not found in output.\nOutput: %s", output)

// Expected CLI override outputs (used multiple times)
expectedOutputs := map[string]string{
"Base URL: ": newBaseURL,
"ScrapingOptions.MaxDepth: ": newMaxDepth,
"ScrapingOptions.RateLimit: ": newRateLimit,
}
if !strings.Contains(output, "Base URL: https://example.org") {
t.Errorf("Expected overridden URL not found in output.\nOutput: %s", output)

// Validate overrides dynamically
for key, expected := range expectedOutputs {
if !strings.Contains(output, key+expected) {
t.Errorf("Expected override '%s%s' not found in output.\nOutput: %s", key, expected, output)
}
}
}
Loading