Compare commits
	
		
			5 Commits
		
	
	
		
			e1b994bae1
			...
			v1.0.0
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 2d8532b40e | |||
| c848824583 | |||
| b66b696391 | |||
| 516066d322 | |||
| 85f1377328 | 
							
								
								
									
										5
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,5 @@
 | 
				
			|||||||
 | 
					# Changelog
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## v1.0.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Initial release
 | 
				
			||||||
@@ -12,7 +12,7 @@ gleam add lancaster_stemmer@1
 | 
				
			|||||||
import lancaster_stemmer
 | 
					import lancaster_stemmer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn main() -> Nil {
 | 
					pub fn main() -> Nil {
 | 
				
			||||||
  // TODO: An example of the project in use
 | 
					  lancaster_stemmer.stem("breathe", lancaster_stemmer.default_rules())
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -237,18 +237,26 @@ const default_rules_list = [
 | 
				
			|||||||
  ),
 | 
					  ),
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn main() {
 | 
					/// Constructs the default ruleset
 | 
				
			||||||
  io.println("Hello from paicehusk!")
 | 
					 | 
				
			||||||
  let assert Ok(rules) = load_rules("paice-husk-rules.txt")
 | 
					 | 
				
			||||||
  stem("abominable", rules)
 | 
					 | 
				
			||||||
  |> echo
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn default_rules() -> Rules {
 | 
					pub fn default_rules() -> Rules {
 | 
				
			||||||
  dict.from_list(default_rules_list)
 | 
					  dict.from_list(default_rules_list)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Lancaster (Paice-Husk) stemming algorithm
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// ## Example
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// ```gleam
 | 
				
			||||||
 | 
					/// lancaster_stemmer.stem("Gleam", lancaster_stemmer.stem.default_rules())
 | 
				
			||||||
 | 
					/// // -> gleam
 | 
				
			||||||
 | 
					/// ```
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// ```gleam
 | 
				
			||||||
 | 
					/// lancaster_stemmer.stem("fancy", lancaster_stemmer.stem.default_rules())
 | 
				
			||||||
 | 
					/// // -> fant
 | 
				
			||||||
 | 
					/// ```
 | 
				
			||||||
pub fn stem(word: String, rules: Rules) -> String {
 | 
					pub fn stem(word: String, rules: Rules) -> String {
 | 
				
			||||||
 | 
					  let word = string.lowercase(word)
 | 
				
			||||||
  case is_valid(word) {
 | 
					  case is_valid(word) {
 | 
				
			||||||
    True -> {
 | 
					    True -> {
 | 
				
			||||||
      do_stem(word, rules, True)
 | 
					      do_stem(word, rules, True)
 | 
				
			||||||
@@ -333,6 +341,21 @@ fn is_valid_internal(word: String, length: Int) -> Bool {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Constructs a ruleset from the specified file
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// Format of the file is as follows:
 | 
				
			||||||
 | 
					/// Each line contains a specific rule (order matters)
 | 
				
			||||||
 | 
					/// The rule consists of a string made up of the following parts
 | 
				
			||||||
 | 
					/// | Rule part | Description |
 | 
				
			||||||
 | 
					/// | ------ | ------ |
 | 
				
			||||||
 | 
					/// |suffix|the reverse of the required suffix, e.g. the suffix for winning, ing would be specified gni|
 | 
				
			||||||
 | 
					/// |* (optional)|if the rule is only to be used if a previous rule has not been applied then add an asterisk. For example ht*2. only applies if th is the final suffix, so the stem of breath would be brea but the stem of breathe would be breath because the suffix e has already been removed|
 | 
				
			||||||
 | 
					/// |number of chars to remove|this is the number of characters to remove after the suffix has been matched. For example psychoanalytic has the suffix ytic of which 3 characters should be removed to retain psychoanaly, this would be 'city3'. This can be 0|
 | 
				
			||||||
 | 
					/// |append string (optional)|this is the characters that are appended after the match and removal of characters|
 | 
				
			||||||
 | 
					/// |> or .|If > then you can continue stemming process after this one, if . then stemming stops|
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// So for example with the `psychoanalytic` stem of `psychoanalys` the rule would be `ytic3s.`
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
pub fn load_rules(filename: String) -> Result(Rules, Nil) {
 | 
					pub fn load_rules(filename: String) -> Result(Rules, Nil) {
 | 
				
			||||||
  case simplifile.read(filename) {
 | 
					  case simplifile.read(filename) {
 | 
				
			||||||
    Error(_) -> Error(Nil)
 | 
					    Error(_) -> Error(Nil)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,7 @@
 | 
				
			|||||||
import gleeunit
 | 
					import gleeunit
 | 
				
			||||||
 | 
					import lancaster_stemmer
 | 
				
			||||||
 | 
					import simplifile
 | 
				
			||||||
 | 
					import splitter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn main() -> Nil {
 | 
					pub fn main() -> Nil {
 | 
				
			||||||
  gleeunit.main()
 | 
					  gleeunit.main()
 | 
				
			||||||
@@ -6,8 +9,29 @@ pub fn main() -> Nil {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// gleeunit test functions end in `_test`
 | 
					// gleeunit test functions end in `_test`
 | 
				
			||||||
pub fn hello_world_test() {
 | 
					pub fn hello_world_test() {
 | 
				
			||||||
  let name = "Joe"
 | 
					  let line_split = splitter.new(["\n", "\r\n"])
 | 
				
			||||||
  let greeting = "Hello, " <> name <> "!"
 | 
					  let row_split = splitter.new([" ", "\t"])
 | 
				
			||||||
 | 
					  let rules = lancaster_stemmer.default_rules()
 | 
				
			||||||
  assert greeting == "Hello, Joe!"
 | 
					  let assert Ok(tests) = simplifile.read("./test/wordlist.txt")
 | 
				
			||||||
 | 
					  run_test(tests, line_split, row_split, rules)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn run_test(
 | 
				
			||||||
 | 
					  tests: String,
 | 
				
			||||||
 | 
					  line_split: splitter.Splitter,
 | 
				
			||||||
 | 
					  row_split: splitter.Splitter,
 | 
				
			||||||
 | 
					  rules: lancaster_stemmer.Rules,
 | 
				
			||||||
 | 
					) -> Nil {
 | 
				
			||||||
 | 
					  case splitter.split(line_split, tests) {
 | 
				
			||||||
 | 
					    #("", "", "") -> Nil
 | 
				
			||||||
 | 
					    #(line, _, rest) -> {
 | 
				
			||||||
 | 
					      case splitter.split(row_split, line) |> echo {
 | 
				
			||||||
 | 
					        #("", "", "") -> Nil
 | 
				
			||||||
 | 
					        #(word, _, stem) -> {
 | 
				
			||||||
 | 
					          assert lancaster_stemmer.stem(word, rules) == stem
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      run_test(rest, line_split, row_split, rules)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										25135
									
								
								test/wordlist.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25135
									
								
								test/wordlist.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user