versecafe · versecafe · Jan 25, 2026 · Jan 25, 2026 · Jan 26, 2026 · Jan 26, 2026
diff --git a/README.md b/README.md
@@ -78,17 +78,16 @@ Further documentation can be found at <https://hexdocs.pm/webls>.
 
 ## Utility Support
 
-| Type       | to_string | Builder Functions | Validators |
-| ---------- | --------- | ----------------- | ---------- |
-| Sitemap    | Complete  | Complete          | None       |
-| RSS v2.0   | Complete  | Complete          | None       |
-| Robots.txt | Complete  | Complete          | None       |
-| Atom       | Complete  | Complete          | None       |
+| Type       | Builder Functions | to_string | from_string |
+| ---------- | ----------------- | --------- | ----------- |
+| Sitemap    | Complete          | Complete  | Complete    |
+| RSS v2.0   | Complete          | Complete  | Complete    |
+| Robots.txt | Complete          | Complete  | Complete    |
+| Atom       | Complete          | Complete  | None        |
 
 ## Development
 
 ```sh
-gleam run   # Run the project
 gleam test  # Run the tests
 ```
 

diff --git a/gleam.toml b/gleam.toml
@@ -1,5 +1,5 @@
 name = "webls"
-version = "1.6.1"
+version = "2.0.0"
 
 description = "A simple web utility library for RSS feeds, Sitemaps, Robots.txt, etc."
 licences = ["Apache-2.0"]
@@ -8,6 +8,7 @@ repository = { type = "github", user = "versecafe", repo = "webls" }
 [dependencies]
 gleam_stdlib = ">= 0.34.0 and < 2.0.0"
 gleam_time = ">= 1.6.0 and < 2.0.0"
+parsed_it = ">= 0.1.1 and < 0.2.0"
 
 [dev-dependencies]
 gleeunit = ">= 1.0.0 and < 2.0.0"

diff --git a/manifest.toml b/manifest.toml
@@ -4,13 +4,15 @@
 packages = [
   { name = "filepath", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "B06A9AF0BF10E51401D64B98E4B627F1D2E48C154967DA7AF4D0914780A6D40A" },
   { name = "gleam_stdlib", version = "0.68.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "F7FAEBD8EF260664E86A46C8DBA23508D1D11BB3BCC6EE1B89B3BC3E5C83FF1E" },
-  { name = "gleam_time", version = "1.6.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_time", source = "hex", outer_checksum = "0DF3834D20193F0A38D0EB21F0A78D48F2EC276C285969131B86DF8D4EF9E762" },
+  { name = "gleam_time", version = "1.7.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_time", source = "hex", outer_checksum = "56DB0EF9433826D3B99DB0B4AF7A2BFED13D09755EC64B1DAAB46F804A9AD47D" },
   { name = "gleeunit", version = "1.9.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "DA9553CE58B67924B3C631F96FE3370C49EB6D6DC6B384EC4862CC4AAA718F3C" },
+  { name = "parsed_it", version = "0.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "parsed_it", source = "hex", outer_checksum = "9F8BA3C634FEA847AD195E3322FD1DA51980F57C4171B02DCF069C6FC807944A" },
   { name = "simplifile", version = "2.3.2", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "E049B4DACD4D206D87843BCF4C775A50AE0F50A52031A2FFB40C9ED07D6EC70A" },
 ]
 
 [requirements]
 gleam_stdlib = { version = ">= 0.34.0 and < 2.0.0" }
 gleam_time = { version = ">= 1.6.0 and < 2.0.0" }
 gleeunit = { version = ">= 1.0.0 and < 2.0.0" }
+parsed_it = { version = ">= 0.1.1 and < 0.2.0" }
 simplifile = { version = ">= 2.3.2 and < 3.0.0" }
diff --git a/src/webls/robots.gleam b/src/webls/robots.gleam
@@ -1,16 +1,57 @@
+//// Functions for building and parsing robots.txt files.
+////
+//// ## Building a robots.txt
+////
+//// ```gleam
+//// import webls/robots
+////
+//// robots.config("https://example.com/sitemap.xml")
+//// |> robots.with_config_robot(
+////   robots.robot("*")
+////   |> robots.with_robot_disallowed_route("/admin/")
+//// )
+//// |> robots.to_string
+//// ```
+////
+//// ## Parsing a robots.txt
+////
+//// ```gleam
+//// import webls/robots
+////
+//// let assert Ok(config) = robots.from_string(robots_txt_content)
+//// // Access config.sitemap_url and config.robots
+//// ```
+////
+//// The parser handles comments, extra whitespace, and case-insensitive
+//// directives. Unknown directives are ignored. Malformed lines (missing `:`)
+//// return an error.
+
 import gleam/list
+import gleam/option.{type Option, None, Some}
 import gleam/result
+import gleam/string
 
 // Stringify ------------------------------------------------------------------
 
+/// Converts a RobotsConfig to a robots.txt formatted string.
+///
+/// The output format follows the standard robots.txt specification:
+/// - Sitemap directive at the top (if present)
+/// - User-agent blocks separated by blank lines
+/// - Allow directives followed by Disallow directives for each agent
 pub fn to_string(config: RobotsConfig) -> String {
-  "Sitemap: "
-  <> config.sitemap_url
-  <> "\n\n"
-  <> config.robots
-  |> list.map(fn(robot) { robot |> robot_to_string })
-  |> list.reduce(fn(acc, line) { acc <> "\n\n" <> line })
-  |> result.unwrap("")
+  let sitemap_section = case config.sitemap_url {
+    Some(url) -> "Sitemap: " <> url <> "\n\n"
+    None -> ""
+  }
+
+  let robots_section =
+    config.robots
+    |> list.map(fn(robot) { robot |> robot_to_string })
+    |> list.reduce(fn(acc, line) { acc <> "\n\n" <> line })
+    |> result.unwrap("")
+
+  sitemap_section <> robots_section
 }
 
 fn robot_to_string(robot: Robot) -> String {
@@ -28,11 +69,24 @@ fn robot_to_string(robot: Robot) -> String {
   |> result.unwrap("")
 }
 
-// Builder Patern -------------------------------------------------------------
+// Builder Pattern ------------------------------------------------------------
 
 /// Creates a robots config with a sitemap url
 pub fn config(sitemap_url: String) -> RobotsConfig {
-  RobotsConfig(sitemap_url: sitemap_url, robots: [])
+  RobotsConfig(sitemap_url: Some(sitemap_url), robots: [])
+}
+
+/// Creates a robots config without a sitemap url
+pub fn config_without_sitemap() -> RobotsConfig {
+  RobotsConfig(sitemap_url: None, robots: [])
+}
+
+/// Sets the sitemap url on a robots config
+pub fn with_config_sitemap(
+  config: RobotsConfig,
+  sitemap_url: String,
+) -> RobotsConfig {
+  RobotsConfig(..config, sitemap_url: Some(sitemap_url))
 }
 
 /// Adds a list of robots to the robots config
@@ -58,7 +112,7 @@ pub fn with_robot_allowed_routes(robot: Robot, routes: List(String)) -> Robot {
   Robot(..robot, allowed_routes: list.flatten([robot.allowed_routes, routes]))
 }
 
-/// Adds a allowed route to the robot policy
+/// Adds an allowed route to the robot policy
 pub fn with_robot_allowed_route(robot: Robot, route: String) -> Robot {
   Robot(..robot, allowed_routes: [route, ..robot.allowed_routes])
 }
@@ -81,8 +135,8 @@ pub fn with_robot_disallowed_route(robot: Robot, route: String) -> Robot {
 /// The configuration for a robots.txt file
 pub type RobotsConfig {
   RobotsConfig(
-    /// The url of the sitemap for crawlers to use
-    sitemap_url: String,
+    /// The optional url of the sitemap for crawlers to use
+    sitemap_url: Option(String),
     /// A list of robot policies
     robots: List(Robot),
   )
@@ -99,3 +153,140 @@ pub type Robot {
     disallowed_routes: List(String),
   )
 }
+
+/// Error returned when parsing a malformed robots.txt line
+pub type RobotsParseError {
+  /// A line could not be parsed as a valid directive (missing `:`)
+  InvalidDirective(line: String)
+}
+
+// Parse ----------------------------------------------------------------------
+
+/// Parses a robots.txt string into a RobotsConfig.
+///
+/// The parser handles:
+/// - Case-insensitive directives (e.g., `USER-AGENT`, `user-agent`)
+/// - Comments (lines starting with `#` or inline `# comment`)
+/// - Extra whitespace around directives and values
+/// - Unknown directives (silently ignored)
+///
+/// Returns an error if a non-empty, non-comment line is malformed (missing `:`).
+/// An empty config (no sitemap, no robots) is valid.
+/// Directives appearing before any `User-agent:` line are ignored.
+pub fn from_string(input: String) -> Result(RobotsConfig, RobotsParseError) {
+  let lines =
+    input
+    |> string.split("\n")
+    |> list.map(strip_comment)
+    |> list.map(string.trim)
+    |> list.filter(fn(line) { line != "" })
+
+  case validate_lines(lines) {
+    Error(e) -> Error(e)
+    Ok(_) -> {
+      let sitemap_url = find_sitemap(lines)
+      let robot_lines = list.filter(lines, fn(line) { !is_sitemap_line(line) })
+      let robots = parse_robots(robot_lines, [], None)
+      Ok(RobotsConfig(sitemap_url: sitemap_url, robots: robots))
+    }
+  }
+}
+
+/// Validates that all lines are valid directives (contain `:`)
+fn validate_lines(lines: List(String)) -> Result(Nil, RobotsParseError) {
+  case lines {
+    [] -> Ok(Nil)
+    [line, ..rest] ->
+      case string.contains(line, ":") {
+        True -> validate_lines(rest)
+        False -> Error(InvalidDirective(line))
+      }
+  }
+}
+
+/// Strips inline comments from a line (everything after `#`)
+fn strip_comment(line: String) -> String {
+  case string.split_once(line, "#") {
+    Ok(#(before, _)) -> before
+    Error(_) -> line
+  }
+}
+
+/// Splits a directive line into key and value on the first `:`
+fn split_directive(line: String) -> Result(#(String, String), Nil) {
+  case string.split_once(line, ":") {
+    Ok(#(key, value)) -> Ok(#(string.trim(key), string.trim(value)))
+    Error(_) -> Error(Nil)
+  }
+}
+
+fn is_sitemap_line(line: String) -> Bool {
+  case split_directive(line) {
+    Ok(#(key, _)) -> string.lowercase(key) == "sitemap"
+    Error(_) -> False
+  }
+}
+
+fn find_sitemap(lines: List(String)) -> Option(String) {
+  lines
+  |> list.find(is_sitemap_line)
+  |> result.map(fn(line) {
+    case split_directive(line) {
+      Ok(#(_, value)) -> value
+      Error(_) -> ""
+    }
+  })
+  |> option.from_result
+}
+
+fn parse_robots(
+  lines: List(String),
+  acc: List(Robot),
+  current: Option(Robot),
+) -> List(Robot) {
+  case lines {
+    [] ->
+      case current {
+        Some(r) -> list.reverse([r, ..acc])
+        None -> list.reverse(acc)
+      }
+    [line, ..rest] -> {
+      case split_directive(line) {
+        Ok(#(key, value)) -> {
+          let lower_key = string.lowercase(key)
+          case lower_key {
+            "user-agent" -> {
+              let new_robot = Robot(value, [], [])
+              case current {
+                Some(r) -> parse_robots(rest, [r, ..acc], Some(new_robot))
+                None -> parse_robots(rest, acc, Some(new_robot))
+              }
+            }
+            _ ->
+              case current {
+                Some(r) -> {
+                  let updated = parse_directive(lower_key, value, r)
+                  parse_robots(rest, acc, Some(updated))
+                }
+                None -> parse_robots(rest, acc, None)
+              }
+          }
+        }
+        Error(_) -> parse_robots(rest, acc, current)
+      }
+    }
+  }
+}
+
+fn parse_directive(key: String, value: String, robot: Robot) -> Robot {
+  case key {
+    "allow" ->
+      Robot(..robot, allowed_routes: list.append(robot.allowed_routes, [value]))
+    "disallow" ->
+      Robot(
+        ..robot,
+        disallowed_routes: list.append(robot.disallowed_routes, [value]),
+      )
+    _ -> robot
+  }
+}