Compare commits
	
		
			9 Commits
		
	
	
		
			e1b994bae1
			...
			main
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| b30c9f39fd | |||
| 5feb4de7a4 | |||
| 7793bbb3a3 | |||
| 6634406a78 | |||
| 2d8532b40e | |||
| c848824583 | |||
| b66b696391 | |||
| 516066d322 | |||
| 85f1377328 | 
							
								
								
									
										5
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,5 @@
 | 
				
			|||||||
 | 
					# Changelog
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## v1.0.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Initial release
 | 
				
			||||||
@@ -12,7 +12,7 @@ gleam add lancaster_stemmer@1
 | 
				
			|||||||
import lancaster_stemmer
 | 
					import lancaster_stemmer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn main() -> Nil {
 | 
					pub fn main() -> Nil {
 | 
				
			||||||
  // TODO: An example of the project in use
 | 
					  lancaster_stemmer.stem("breathe", lancaster_stemmer.default_rules())
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										46
									
								
								dev/benchmark.gleam
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								dev/benchmark.gleam
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,46 @@
 | 
				
			|||||||
 | 
					import gleam/list
 | 
				
			||||||
 | 
					import glychee/benchmark
 | 
				
			||||||
 | 
					import glychee/configuration
 | 
				
			||||||
 | 
					import lancaster_stemmer
 | 
				
			||||||
 | 
					import porter_stemmer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@target(erlang)
 | 
				
			||||||
 | 
					pub fn main() {
 | 
				
			||||||
 | 
					  configuration.initialize()
 | 
				
			||||||
 | 
					  configuration.set_pair(configuration.Warmup, 2)
 | 
				
			||||||
 | 
					  configuration.set_pair(configuration.Parallel, 2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // pop_benchmark()
 | 
				
			||||||
 | 
					  benchmark()
 | 
				
			||||||
 | 
					  // reg_name_benchmark()
 | 
				
			||||||
 | 
					  // ip_benchmark()
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@target(erlang)
 | 
				
			||||||
 | 
					fn benchmark() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  benchmark.run(
 | 
				
			||||||
 | 
					    [
 | 
				
			||||||
 | 
					      benchmark.Function("Lancaster", fn(data) {
 | 
				
			||||||
 | 
					        fn() { list.each(data, lancaster_stemmer.stem(_, rules)) }
 | 
				
			||||||
 | 
					      }),
 | 
				
			||||||
 | 
					      benchmark.Function("Porter", fn(data) {
 | 
				
			||||||
 | 
					        fn() { list.each(data, porter_stemmer.stem) }
 | 
				
			||||||
 | 
					      }),
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					    [
 | 
				
			||||||
 | 
					      benchmark.Data("10 words", [
 | 
				
			||||||
 | 
					        "abbreviate",
 | 
				
			||||||
 | 
					        "aberdeen",
 | 
				
			||||||
 | 
					        "abode",
 | 
				
			||||||
 | 
					        "abovementioned",
 | 
				
			||||||
 | 
					        "absent",
 | 
				
			||||||
 | 
					        "adherent",
 | 
				
			||||||
 | 
					        "adhesion",
 | 
				
			||||||
 | 
					        "adhesive",
 | 
				
			||||||
 | 
					        "adiabatic",
 | 
				
			||||||
 | 
					        "anisotropic",
 | 
				
			||||||
 | 
					      ]),
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					  )
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -19,3 +19,5 @@ splitter = ">= 1.1.0 and < 2.0.0"
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[dev-dependencies]
 | 
					[dev-dependencies]
 | 
				
			||||||
gleeunit = ">= 1.0.0 and < 2.0.0"
 | 
					gleeunit = ">= 1.0.0 and < 2.0.0"
 | 
				
			||||||
 | 
					glychee = ">= 1.1.2 and < 2.0.0"
 | 
				
			||||||
 | 
					porter_stemmer = ">= 1.0.0 and < 2.0.0"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,15 +2,23 @@
 | 
				
			|||||||
# You typically do not need to edit this file
 | 
					# You typically do not need to edit this file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
packages = [
 | 
					packages = [
 | 
				
			||||||
 | 
					  { name = "benchee", version = "1.5.0", build_tools = ["mix"], requirements = ["deep_merge", "statistex", "table"], otp_app = "benchee", source = "hex", outer_checksum = "5B075393AEA81B8AE74EADD1C28B1D87E8A63696C649D8293DB7C4DF3EB67535" },
 | 
				
			||||||
 | 
					  { name = "deep_merge", version = "1.0.0", build_tools = ["mix"], requirements = [], otp_app = "deep_merge", source = "hex", outer_checksum = "CE708E5F094B9CD4E8F2BE4F00D2F4250C4095BE93F8CD6D018C753894885430" },
 | 
				
			||||||
  { name = "filepath", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "B06A9AF0BF10E51401D64B98E4B627F1D2E48C154967DA7AF4D0914780A6D40A" },
 | 
					  { name = "filepath", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "B06A9AF0BF10E51401D64B98E4B627F1D2E48C154967DA7AF4D0914780A6D40A" },
 | 
				
			||||||
  { name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" },
 | 
					  { name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" },
 | 
				
			||||||
  { name = "gleeunit", version = "1.7.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "CD701726CBCE5588B375D157B4391CFD0F2F134CD12D9B6998A395484DE05C58" },
 | 
					  { name = "gleeunit", version = "1.8.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "7AE0F64B26CC065ED705FF7CA5F4EDAB8015E72A883736FE251E46FACCCE1E08" },
 | 
				
			||||||
 | 
					  { name = "glychee", version = "1.1.2", build_tools = ["gleam"], requirements = ["benchee"], otp_app = "glychee", source = "hex", outer_checksum = "41784216C213F223095BB3FC3EDDB60CC537835B2340A868EA3931193F7F3824" },
 | 
				
			||||||
 | 
					  { name = "porter_stemmer", version = "1.0.0", build_tools = ["gleam"], requirements = ["porter_stemming"], otp_app = "porter_stemmer", source = "hex", outer_checksum = "02248CA76802B75BE1EE7EE1878BAD088088E67E791ECE6813128B965560C99C" },
 | 
				
			||||||
 | 
					  { name = "porter_stemming", version = "1.0.1", build_tools = ["rebar3"], requirements = [], otp_app = "porter_stemming", source = "hex", outer_checksum = "8531E709A731C9A6A52477C44175411A6B5F5327CF55C18D9B9F5FD701C606B0" },
 | 
				
			||||||
  { name = "simplifile", version = "2.3.0", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "0A868DAC6063D9E983477981839810DC2E553285AB4588B87E3E9C96A7FB4CB4" },
 | 
					  { name = "simplifile", version = "2.3.0", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "0A868DAC6063D9E983477981839810DC2E553285AB4588B87E3E9C96A7FB4CB4" },
 | 
				
			||||||
  { name = "splitter", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "splitter", source = "hex", outer_checksum = "05564A381580395DCDEFF4F88A64B021E8DAFA6540AE99B4623962F52976AA9D" },
 | 
					  { name = "splitter", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "splitter", source = "hex", outer_checksum = "05564A381580395DCDEFF4F88A64B021E8DAFA6540AE99B4623962F52976AA9D" },
 | 
				
			||||||
 | 
					  { name = "statistex", version = "1.1.0", build_tools = ["mix"], requirements = [], otp_app = "statistex", source = "hex", outer_checksum = "F5950EA26AD43246BA2CCE54324AC394A4E7408FDCF98B8E230F503A0CBA9CF5" },
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[requirements]
 | 
					[requirements]
 | 
				
			||||||
gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" }
 | 
					gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" }
 | 
				
			||||||
gleeunit = { version = ">= 1.0.0 and < 2.0.0" }
 | 
					gleeunit = { version = ">= 1.0.0 and < 2.0.0" }
 | 
				
			||||||
 | 
					glychee = { version = ">= 1.1.2 and < 2.0.0" }
 | 
				
			||||||
 | 
					porter_stemmer = { version = ">= 1.0.0 and < 2.0.0" }
 | 
				
			||||||
simplifile = { version = ">= 2.3.0 and < 3.0.0" }
 | 
					simplifile = { version = ">= 2.3.0 and < 3.0.0" }
 | 
				
			||||||
splitter = { version = ">= 1.1.0 and < 2.0.0" }
 | 
					splitter = { version = ">= 1.1.0 and < 2.0.0" }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,7 +1,6 @@
 | 
				
			|||||||
import gleam/bool
 | 
					import gleam/bool
 | 
				
			||||||
import gleam/dict
 | 
					import gleam/dict
 | 
				
			||||||
import gleam/int
 | 
					import gleam/int
 | 
				
			||||||
import gleam/io
 | 
					 | 
				
			||||||
import gleam/list
 | 
					import gleam/list
 | 
				
			||||||
import gleam/option.{None, Some}
 | 
					import gleam/option.{None, Some}
 | 
				
			||||||
import gleam/result
 | 
					import gleam/result
 | 
				
			||||||
@@ -237,18 +236,26 @@ const default_rules_list = [
 | 
				
			|||||||
  ),
 | 
					  ),
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn main() {
 | 
					/// Constructs the default ruleset
 | 
				
			||||||
  io.println("Hello from paicehusk!")
 | 
					 | 
				
			||||||
  let assert Ok(rules) = load_rules("paice-husk-rules.txt")
 | 
					 | 
				
			||||||
  stem("abominable", rules)
 | 
					 | 
				
			||||||
  |> echo
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn default_rules() -> Rules {
 | 
					pub fn default_rules() -> Rules {
 | 
				
			||||||
  dict.from_list(default_rules_list)
 | 
					  dict.from_list(default_rules_list)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Lancaster (Paice-Husk) stemming algorithm
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// ## Example
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// ```gleam
 | 
				
			||||||
 | 
					/// lancaster_stemmer.stem("Gleam", lancaster_stemmer.stem.default_rules())
 | 
				
			||||||
 | 
					/// // -> gleam
 | 
				
			||||||
 | 
					/// ```
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// ```gleam
 | 
				
			||||||
 | 
					/// lancaster_stemmer.stem("fancy", lancaster_stemmer.stem.default_rules())
 | 
				
			||||||
 | 
					/// // -> fant
 | 
				
			||||||
 | 
					/// ```
 | 
				
			||||||
pub fn stem(word: String, rules: Rules) -> String {
 | 
					pub fn stem(word: String, rules: Rules) -> String {
 | 
				
			||||||
 | 
					  let word = string.lowercase(word)
 | 
				
			||||||
  case is_valid(word) {
 | 
					  case is_valid(word) {
 | 
				
			||||||
    True -> {
 | 
					    True -> {
 | 
				
			||||||
      do_stem(word, rules, True)
 | 
					      do_stem(word, rules, True)
 | 
				
			||||||
@@ -258,45 +265,75 @@ pub fn stem(word: String, rules: Rules) -> String {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn do_stem(word: String, rules: Rules, intact: Bool) -> String {
 | 
					fn do_stem(word: String, rules: Rules, intact: Bool) -> String {
 | 
				
			||||||
  case string.reverse(word) |> string.pop_grapheme {
 | 
					  case string.reverse(word) {
 | 
				
			||||||
    Ok(#(letter, _)) -> {
 | 
					    "a" as letter <> _
 | 
				
			||||||
      case dict.get(rules, letter) {
 | 
					    | "b" as letter <> _
 | 
				
			||||||
        Ok(specific_rules) -> {
 | 
					    | "c" as letter <> _
 | 
				
			||||||
          let #(stem, restem, intact) =
 | 
					    | "d" as letter <> _
 | 
				
			||||||
            list.fold_until(
 | 
					    | "e" as letter <> _
 | 
				
			||||||
              specific_rules,
 | 
					    | "f" as letter <> _
 | 
				
			||||||
              #(word, False, intact),
 | 
					    | "g" as letter <> _
 | 
				
			||||||
              fn(state, rule) {
 | 
					    | "h" as letter <> _
 | 
				
			||||||
                case rule_matches(rule, word, intact) {
 | 
					    | "i" as letter <> _
 | 
				
			||||||
                  True -> {
 | 
					    | "j" as letter <> _
 | 
				
			||||||
                    let result = apply_rule(rule, word)
 | 
					    | "k" as letter <> _
 | 
				
			||||||
                    case is_valid(result) {
 | 
					    | "l" as letter <> _
 | 
				
			||||||
                      False -> list.Continue(state)
 | 
					    | "m" as letter <> _
 | 
				
			||||||
                      True -> {
 | 
					    | "n" as letter <> _
 | 
				
			||||||
                        list.Stop(#(result, rule.restem, False))
 | 
					    | "o" as letter <> _
 | 
				
			||||||
                      }
 | 
					    | "p" as letter <> _
 | 
				
			||||||
                    }
 | 
					    | "q" as letter <> _
 | 
				
			||||||
                  }
 | 
					    | "r" as letter <> _
 | 
				
			||||||
                  False -> list.Continue(state)
 | 
					    | "s" as letter <> _
 | 
				
			||||||
                }
 | 
					    | "t" as letter <> _
 | 
				
			||||||
              },
 | 
					    | "u" as letter <> _
 | 
				
			||||||
            )
 | 
					    | "v" as letter <> _
 | 
				
			||||||
          case restem {
 | 
					    | "w" as letter <> _
 | 
				
			||||||
            True -> do_stem(stem, rules, intact)
 | 
					    | "x" as letter <> _
 | 
				
			||||||
            False -> stem
 | 
					    | "y" as letter <> _
 | 
				
			||||||
          }
 | 
					    | "z" as letter <> _ -> {
 | 
				
			||||||
        }
 | 
					      case stem_letter(rules, letter, word, intact) {
 | 
				
			||||||
        Error(_) -> word
 | 
					        #(stem, True, intact) -> do_stem(stem, rules, intact)
 | 
				
			||||||
 | 
					        #(stem, _, _) -> stem
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    Error(_) -> word
 | 
					
 | 
				
			||||||
 | 
					    _ -> word
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn stem_letter(
 | 
				
			||||||
 | 
					  rules: dict.Dict(String, List(Rule)),
 | 
				
			||||||
 | 
					  letter: String,
 | 
				
			||||||
 | 
					  word: String,
 | 
				
			||||||
 | 
					  intact: Bool,
 | 
				
			||||||
 | 
					) -> #(String, Bool, Bool) {
 | 
				
			||||||
 | 
					  case dict.get(rules, letter) {
 | 
				
			||||||
 | 
					    Ok(specific_rules) -> {
 | 
				
			||||||
 | 
					      // let #(stem, restem, intact) =
 | 
				
			||||||
 | 
					      list.fold_until(specific_rules, #(word, False, intact), fn(state, rule) {
 | 
				
			||||||
 | 
					        case rule_matches(rule, word, intact) {
 | 
				
			||||||
 | 
					          True -> {
 | 
				
			||||||
 | 
					            let result = apply_rule(rule, word)
 | 
				
			||||||
 | 
					            case is_valid(result) {
 | 
				
			||||||
 | 
					              False -> list.Continue(state)
 | 
				
			||||||
 | 
					              True -> {
 | 
				
			||||||
 | 
					                list.Stop(#(result, rule.restem, False))
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          False -> list.Continue(state)
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      })
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    Error(_) -> #(word, False, False)
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn rule_matches(rule: Rule, word: String, stem_intact: Bool) -> Bool {
 | 
					fn rule_matches(rule: Rule, word: String, stem_intact: Bool) -> Bool {
 | 
				
			||||||
  case !stem_intact && rule.intact {
 | 
					  case stem_intact || !rule.intact {
 | 
				
			||||||
    True -> False
 | 
					    True -> string.ends_with(word, rule.suffix)
 | 
				
			||||||
    False -> string.ends_with(word, rule.suffix)
 | 
					    False -> False
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -325,7 +362,7 @@ fn is_valid_internal(word: String, length: Int) -> Bool {
 | 
				
			|||||||
    | "o" <> rest
 | 
					    | "o" <> rest
 | 
				
			||||||
    | "u" <> rest
 | 
					    | "u" <> rest
 | 
				
			||||||
    | "y" <> rest -> {
 | 
					    | "y" <> rest -> {
 | 
				
			||||||
      { length + 1 + string.length(rest) } >= 3
 | 
					      { length + 1 + string.byte_size(rest) } >= 3
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    _ -> {
 | 
					    _ -> {
 | 
				
			||||||
      is_valid_internal(string.drop_start(word, 1), length + 1)
 | 
					      is_valid_internal(string.drop_start(word, 1), length + 1)
 | 
				
			||||||
@@ -333,6 +370,21 @@ fn is_valid_internal(word: String, length: Int) -> Bool {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Constructs a ruleset from the specified file
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// Format of the file is as follows:
 | 
				
			||||||
 | 
					/// Each line contains a specific rule (order matters)
 | 
				
			||||||
 | 
					/// The rule consists of a string made up of the following parts
 | 
				
			||||||
 | 
					/// | Rule part | Description |
 | 
				
			||||||
 | 
					/// | ------ | ------ |
 | 
				
			||||||
 | 
					/// |suffix|the reverse of the required suffix, e.g. the suffix for winning, ing would be specified gni|
 | 
				
			||||||
 | 
					/// |* (optional)|if the rule is only to be used if a previous rule has not been applied then add an asterisk. For example ht*2. only applies if th is the final suffix, so the stem of breath would be brea but the stem of breathe would be breath because the suffix e has already been removed|
 | 
				
			||||||
 | 
					/// |number of chars to remove|this is the number of characters to remove after the suffix has been matched. For example psychoanalytic has the suffix ytic of which 3 characters should be removed to retain psychoanaly, this would be 'city3'. This can be 0|
 | 
				
			||||||
 | 
					/// |append string (optional)|this is the characters that are appended after the match and removal of characters|
 | 
				
			||||||
 | 
					/// |> or .|If > then you can continue stemming process after this one, if . then stemming stops|
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// So for example with the `psychoanalytic` stem of `psychoanalys` the rule would be `ytic3s.`
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
pub fn load_rules(filename: String) -> Result(Rules, Nil) {
 | 
					pub fn load_rules(filename: String) -> Result(Rules, Nil) {
 | 
				
			||||||
  case simplifile.read(filename) {
 | 
					  case simplifile.read(filename) {
 | 
				
			||||||
    Error(_) -> Error(Nil)
 | 
					    Error(_) -> Error(Nil)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,7 @@
 | 
				
			|||||||
import gleeunit
 | 
					import gleeunit
 | 
				
			||||||
 | 
					import lancaster_stemmer
 | 
				
			||||||
 | 
					import simplifile
 | 
				
			||||||
 | 
					import splitter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn main() -> Nil {
 | 
					pub fn main() -> Nil {
 | 
				
			||||||
  gleeunit.main()
 | 
					  gleeunit.main()
 | 
				
			||||||
@@ -6,8 +9,29 @@ pub fn main() -> Nil {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// gleeunit test functions end in `_test`
 | 
					// gleeunit test functions end in `_test`
 | 
				
			||||||
pub fn hello_world_test() {
 | 
					pub fn hello_world_test() {
 | 
				
			||||||
  let name = "Joe"
 | 
					  let line_split = splitter.new(["\n", "\r\n"])
 | 
				
			||||||
  let greeting = "Hello, " <> name <> "!"
 | 
					  let row_split = splitter.new([" ", "\t"])
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
  assert greeting == "Hello, Joe!"
 | 
					  let assert Ok(tests) = simplifile.read("./test/wordlist.txt")
 | 
				
			||||||
 | 
					  run_test(tests, line_split, row_split, rules)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn run_test(
 | 
				
			||||||
 | 
					  tests: String,
 | 
				
			||||||
 | 
					  line_split: splitter.Splitter,
 | 
				
			||||||
 | 
					  row_split: splitter.Splitter,
 | 
				
			||||||
 | 
					  rules: lancaster_stemmer.Rules,
 | 
				
			||||||
 | 
					) -> Nil {
 | 
				
			||||||
 | 
					  case splitter.split(line_split, tests) {
 | 
				
			||||||
 | 
					    #("", "", "") -> Nil
 | 
				
			||||||
 | 
					    #(line, _, rest) -> {
 | 
				
			||||||
 | 
					      case splitter.split(row_split, line) {
 | 
				
			||||||
 | 
					        #("", "", "") -> Nil
 | 
				
			||||||
 | 
					        #(word, _, stem) -> {
 | 
				
			||||||
 | 
					          assert lancaster_stemmer.stem(word, rules) == stem
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      run_test(rest, line_split, row_split, rules)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										51
									
								
								test/stem_cases.gleam
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								test/stem_cases.gleam
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,51 @@
 | 
				
			|||||||
 | 
					import gleeunit
 | 
				
			||||||
 | 
					import lancaster_stemmer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn main() -> Nil {
 | 
				
			||||||
 | 
					  gleeunit.main()
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_abbas_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("abbas", rules) == "abba"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_abbas_case_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("AbBaS", rules) == "abba"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_accomplish_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("accomplish", rules) == "accompl"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_accomplish_upper_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("ACCOMPLISH", rules) == "accompl"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_accompaniment_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("accompaniment", rules) == "accompany"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_test_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("test", rules) == "test"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_tessellate_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("tessellate", rules) == "tessel"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_a_invalid_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("a", rules) == "a"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn stem_i_invalid_test() {
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
 | 
					  assert lancaster_stemmer.stem("i", rules) == "i"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										25135
									
								
								test/wordlist.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25135
									
								
								test/wordlist.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user