Compare commits
5 Commits
e1b994bae1
...
v1.0.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 2d8532b40e | |||
| c848824583 | |||
| b66b696391 | |||
| 516066d322 | |||
| 85f1377328 |
5
CHANGELOG.md
Normal file
5
CHANGELOG.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Changelog
|
||||||
|
|
||||||
|
## v1.0.0
|
||||||
|
|
||||||
|
- Initial release
|
||||||
@@ -12,7 +12,7 @@ gleam add lancaster_stemmer@1
|
|||||||
import lancaster_stemmer
|
import lancaster_stemmer
|
||||||
|
|
||||||
pub fn main() -> Nil {
|
pub fn main() -> Nil {
|
||||||
// TODO: An example of the project in use
|
lancaster_stemmer.stem("breathe", lancaster_stemmer.default_rules())
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -237,18 +237,26 @@ const default_rules_list = [
|
|||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
pub fn main() {
|
/// Constructs the default ruleset
|
||||||
io.println("Hello from paicehusk!")
|
|
||||||
let assert Ok(rules) = load_rules("paice-husk-rules.txt")
|
|
||||||
stem("abominable", rules)
|
|
||||||
|> echo
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn default_rules() -> Rules {
|
pub fn default_rules() -> Rules {
|
||||||
dict.from_list(default_rules_list)
|
dict.from_list(default_rules_list)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Lancaster (Paice-Husk) stemming algorithm
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```gleam
|
||||||
|
/// lancaster_stemmer.stem("Gleam", lancaster_stemmer.stem.default_rules())
|
||||||
|
/// // -> gleam
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// ```gleam
|
||||||
|
/// lancaster_stemmer.stem("fancy", lancaster_stemmer.stem.default_rules())
|
||||||
|
/// // -> fant
|
||||||
|
/// ```
|
||||||
pub fn stem(word: String, rules: Rules) -> String {
|
pub fn stem(word: String, rules: Rules) -> String {
|
||||||
|
let word = string.lowercase(word)
|
||||||
case is_valid(word) {
|
case is_valid(word) {
|
||||||
True -> {
|
True -> {
|
||||||
do_stem(word, rules, True)
|
do_stem(word, rules, True)
|
||||||
@@ -333,6 +341,21 @@ fn is_valid_internal(word: String, length: Int) -> Bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Constructs a ruleset from the specified file
|
||||||
|
///
|
||||||
|
/// Format of the file is as follows:
|
||||||
|
/// Each line contains a specific rule (order matters)
|
||||||
|
/// The rule consists of a string made up of the following parts
|
||||||
|
/// | Rule part | Description |
|
||||||
|
/// | ------ | ------ |
|
||||||
|
/// |suffix|the reverse of the required suffix, e.g. the suffix for winning, ing would be specified gni|
|
||||||
|
/// |* (optional)|if the rule is only to be used if a previous rule has not been applied then add an asterisk. For example ht*2. only applies if th is the final suffix, so the stem of breath would be brea but the stem of breathe would be breath because the suffix e has already been removed|
|
||||||
|
/// |number of chars to remove|this is the number of characters to remove after the suffix has been matched. For example psychoanalytic has the suffix ytic of which 3 characters should be removed to retain psychoanaly, this would be 'city3'. This can be 0|
|
||||||
|
/// |append string (optional)|this is the characters that are appended after the match and removal of characters|
|
||||||
|
/// |> or .|If > then you can continue stemming process after this one, if . then stemming stops|
|
||||||
|
///
|
||||||
|
/// So for example with the `psychoanalytic` stem of `psychoanalys` the rule would be `ytic3s.`
|
||||||
|
///
|
||||||
pub fn load_rules(filename: String) -> Result(Rules, Nil) {
|
pub fn load_rules(filename: String) -> Result(Rules, Nil) {
|
||||||
case simplifile.read(filename) {
|
case simplifile.read(filename) {
|
||||||
Error(_) -> Error(Nil)
|
Error(_) -> Error(Nil)
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
import gleeunit
|
import gleeunit
|
||||||
|
import lancaster_stemmer
|
||||||
|
import simplifile
|
||||||
|
import splitter
|
||||||
|
|
||||||
pub fn main() -> Nil {
|
pub fn main() -> Nil {
|
||||||
gleeunit.main()
|
gleeunit.main()
|
||||||
@@ -6,8 +9,29 @@ pub fn main() -> Nil {
|
|||||||
|
|
||||||
// gleeunit test functions end in `_test`
|
// gleeunit test functions end in `_test`
|
||||||
pub fn hello_world_test() {
|
pub fn hello_world_test() {
|
||||||
let name = "Joe"
|
let line_split = splitter.new(["\n", "\r\n"])
|
||||||
let greeting = "Hello, " <> name <> "!"
|
let row_split = splitter.new([" ", "\t"])
|
||||||
|
let rules = lancaster_stemmer.default_rules()
|
||||||
assert greeting == "Hello, Joe!"
|
let assert Ok(tests) = simplifile.read("./test/wordlist.txt")
|
||||||
|
run_test(tests, line_split, row_split, rules)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_test(
|
||||||
|
tests: String,
|
||||||
|
line_split: splitter.Splitter,
|
||||||
|
row_split: splitter.Splitter,
|
||||||
|
rules: lancaster_stemmer.Rules,
|
||||||
|
) -> Nil {
|
||||||
|
case splitter.split(line_split, tests) {
|
||||||
|
#("", "", "") -> Nil
|
||||||
|
#(line, _, rest) -> {
|
||||||
|
case splitter.split(row_split, line) |> echo {
|
||||||
|
#("", "", "") -> Nil
|
||||||
|
#(word, _, stem) -> {
|
||||||
|
assert lancaster_stemmer.stem(word, rules) == stem
|
||||||
|
}
|
||||||
|
}
|
||||||
|
run_test(rest, line_split, row_split, rules)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
25135
test/wordlist.txt
Normal file
25135
test/wordlist.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user