commit 1c82cef2919679298b5023cd2fdfd9709f48114b Author: elee Date: Sun Mar 6 22:35:04 2022 -0600 aaaa diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..b766723 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/profanity.iml b/.idea/profanity.iml new file mode 100644 index 0000000..5e764c4 --- /dev/null +++ b/.idea/profanity.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0059b37 --- /dev/null +++ b/Makefile @@ -0,0 +1,3 @@ + +run: + go run ./cmd/example \ No newline at end of file diff --git a/cmd/example/main.go b/cmd/example/main.go new file mode 100644 index 0000000..2ede2e9 --- /dev/null +++ b/cmd/example/main.go @@ -0,0 +1,16 @@ +package main + +import ( + "log" + "os" + "profanity" +) + +func main() { + filter := profanity.New() + filter.MustAddFile(os.Open("./data/wordlist.txt")) + bad, curse, _ := filter.FilterWord("ꜰuck") + log.Println(bad, curse) + bad, curse, _ = filter.FilterWord("not a bad word") + log.Println(bad, curse) +} diff --git a/common/common.go b/common/common.go new file mode 100644 index 0000000..e4d7bc1 --- /dev/null +++ b/common/common.go @@ -0,0 +1,20 @@ +package common + +type TransformFunc func(string) []string + +func FlattenTransformFunc(tx []TransformFunc) TransformFunc { + return func(s string) []string { + out := make([]string, len(tx)^2) + for _, t := range tx { + ans := t(s) + for _, t2 := range tx { + for _, v := range ans { + out = append(out, t2(v)...) + } + } + + out = append(out, ans...) + } + return out + } +} diff --git a/common/decanter/decanter.go b/common/decanter/decanter.go new file mode 100644 index 0000000..fc1e281 --- /dev/null +++ b/common/decanter/decanter.go @@ -0,0 +1,86 @@ +package decanter + +import "strings" + +type Decanter map[rune]byte + +const abcs = "abcdefghijklmnopqrstuvwxyz" + +func New() Decanter { + out := Decanter(map[rune]byte{}) + return out +} + +func (D Decanter) AddAlphabets(as []string) Decanter { + for _, a := range as { + D.AddAlphabet(a) + } + return D +} +func (D Decanter) AddAlphabet(a string) { + idx := 0 + for _, r := range a { + if r != '_' && r != rune(abcs[idx]) { + D[r] = abcs[idx] + } + idx = idx + 1 + if idx > 25 { + break + } + } +} +func (D Decanter) AddLetter(b byte, r rune) { + D[r] = b +} + +func (D Decanter) AddLetters(b byte, rs []rune) { + for _, r := range rs { + D[r] = b + } +} + +func (D Decanter) DecantTransform(s string) []string { + forms := []string{ + D.DecantString(s), + D.DecantStringStrip(s, ' '), + D.DecantStringNoRepeat(s), + } + return forms +} + +func (D Decanter) Decant(r rune) rune { + if _, ok := D[r]; ok { + return rune(D[r]) + } + return r +} + +func (D Decanter) DecantString(s string) string { + b := new(strings.Builder) + for _, v := range s { + b.WriteRune(D.Decant(v)) + } + return b.String() +} + +func (D Decanter) DecantStringStrip(s string, strip rune) string { + b := new(strings.Builder) + for _, v := range s { + if v != strip { + b.WriteRune(D.Decant(v)) + } + } + return b.String() +} + +func (D Decanter) DecantStringNoRepeat(s string) string { + b := new(strings.Builder) + last := ' ' + for _, v := range s { + if v != last { + b.WriteRune(D.Decant(v)) + } + v = last + } + return b.String() +} diff --git a/common/decanter/decanter_test.go b/common/decanter/decanter_test.go new file mode 100644 index 0000000..cf874dd --- /dev/null +++ b/common/decanter/decanter_test.go @@ -0,0 +1,84 @@ +package decanter + +import ( + "testing" +) + +var dec = NewDefaultDecanter() + +func TestDecantCharacters(t *testing.T) { + for _, a := range alphabets { + idx := 0 + for _, r := range a { + if rune(abcs[idx]) != dec.Decant(r) { + t.Fatalf("%s, expected %v to be %v for %v", a, string(dec.Decant(r)), string(abcs[idx]), string(r)) + } + idx = idx + 1 + } + } +} + +func TestDecantStrings(t *testing.T) { + cases := [][2]string{ + {"my dog is dead", "my dog is dead"}, + {"4wesome", "awesome"}, + {"enything wrong?", "enything wrong?"}, + {"abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"}, + {"道家", "道家"}, + } + for _, c := range cases { + if dec.DecantString(c[0]) != c[1] { + t.Fatalf("expected %v to be %v", dec.DecantString(c[0]), c[1]) + } + } +} + +func BenchmarkDecantStrings(b *testing.B) { + cases := [][2]string{ + {"my dog is dead", "my dog is dead"}, + {"4wesome", "awesome"}, + {"enything wrong?", "enything wrong?"}, + {"abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"}, + {"道家", "道家"}, + } + for i := 0; i < b.N; i++ { + for _, c := range cases { + dec.DecantString(c[0]) + } + } +} + +var alphabets = []string{ + "4bcd3f9hijk|mn0pqr5tuvwxyz", + "𝔞𝔟𝔠𝔡𝔢𝔣𝔤𝔥𝔦𝔧𝔨𝔩𝔪𝔫𝔬𝔭𝔮𝔯𝔰𝔱𝔲𝔳𝔴𝔵𝔶𝔷", + "𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟", + "Ã𝓑c𝔻𝐞𝐅𝓖ђƗĴ𝓚𝓛мŇ𝔬𝕡Ⓠ𝓇𝕊𝕥Ⓤ𝐯𝐰א¥𝓏", + "𝓪𝓫𝓬𝓭𝓮𝓯𝓰𝓱𝓲𝓳𝓴𝓵𝓶𝓷𝓸𝓹𝓺𝓻𝓼𝓽𝓾𝓿𝔀𝔁𝔂𝔃", + "𝒶𝒷𝒸𝒹𝑒𝒻𝑔𝒽𝒾𝒿𝓀𝓁𝓂𝓃𝑜𝓅𝓆𝓇𝓈𝓉𝓊𝓋𝓌𝓍𝓎𝓏", + "𝕒𝕓𝕔𝕕𝕖𝕗𝕘𝕙𝕚𝕛𝕜𝕝𝕞𝕟𝕠𝕡𝕢𝕣𝕤𝕥𝕦𝕧𝕨𝕩𝕪𝕫", + "abcdefghijklmnopqrstuvwxyz", + "ᴀʙᴄᴅᴇꜰɢʜɪᴊᴋʟᴍɴᴏᴘQʀꜱᴛᴜᴠᴡxʏᴢ", + "ₐbcdₑfgₕᵢⱼₖₗₘₙₒₚqᵣₛₜᵤᵥwₓyz", + "ᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖqʳˢᵗᵘᵛʷˣʸᶻ", + "𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳", + "𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇", + "𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻", + "𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯", + "abcdefghijklmnopqrstuvwxyz", + "𝚊𝚋𝚌𝚍𝚎𝚏𝚐𝚑𝚒𝚓𝚔𝚕𝚖𝚗𝚘𝚙𝚚𝚛𝚜𝚝𝚞𝚟𝚠𝚡𝚢𝚣", + "𝔄𝔅ℭ𝔇𝔈𝔉𝔊ℌℑ𝔍𝔎𝔏𝔐𝔑𝔒𝔓𝔔ℜ𝔖𝔗𝔘𝔙𝔚𝔛𝔜ℨ", + "𝕬𝕭𝕮𝕯𝕰𝕱𝕲𝕳𝕴𝕵𝕶𝕷𝕸𝕹𝕺𝕻𝕼𝕽𝕾𝕿𝖀𝖁𝖂𝖃𝖄𝖅", + "𝓐𝓑𝓒𝓓𝓔𝓕𝓖𝓗𝓘𝓙𝓚𝓛𝓜𝓝𝓞𝓟𝓠𝓡𝓢𝓣𝓤𝓥𝓦𝓧𝓨𝓩", + "𝒜𝐵𝒞𝒟𝐸𝐹𝒢𝐻𝐼𝒥𝒦𝐿𝑀𝒩𝒪𝒫𝒬𝑅𝒮𝒯𝒰𝒱𝒲𝒳𝒴𝒵", + "𝔸𝔹ℂ𝔻𝔼𝔽𝔾ℍ𝕀𝕁𝕂𝕃𝕄ℕ𝕆ℙℚℝ𝕊𝕋𝕌𝕍𝕎𝕏𝕐ℤ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "𝙰𝙱𝙲𝙳𝙴𝙵𝙶𝙷𝙸𝙹𝙺𝙻𝙼𝙽𝙾𝙿𝚀𝚁𝚂𝚃𝚄𝚅𝚆𝚇𝚈𝚉", + "ᴀʙᴄᴅᴇꜰɢʜɪᴊᴋʟᴍɴᴏᴘQʀꜱᴛᴜᴠᴡxʏᴢ", + "∀ᙠƆᗡƎℲ⅁HIſ⋊˥WNOԀΌᴚS⊥∩ΛMX⅄Z", + "🄰🄱🄲🄳🄴🄵🄶🄷🄸🄹🄺🄻🄼🄽🄾🄿🅀🅁🅂🅃🅄🅅🅆🅇🅈🅉", + "ᴬᴮᶜᴰᴱᶠᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾQᴿˢᵀᵁⱽᵂˣʸᶻ", + "𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙", + "𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭", + "𝘈𝘉𝘊𝘋𝘌𝘍𝘎𝘏𝘐𝘑𝘒𝘓𝘔𝘕𝘖𝘗𝘘𝘙𝘚𝘛𝘜𝘝𝘞𝘟𝘠𝘡", + "𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕", +} diff --git a/common/decanter/default.go b/common/decanter/default.go new file mode 100644 index 0000000..558434c --- /dev/null +++ b/common/decanter/default.go @@ -0,0 +1,42 @@ +package decanter + +func NewDefaultDecanter() Decanter { + dec := New() + dec.AddAlphabets([]string{ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "@bcd3f9hijk|mn0pqr5tuvwxyz", + "4bcd3f9hijk|mn0pqr5tuvwxyz", + "𝔞𝔟𝔠𝔡𝔢𝔣𝔤𝔥𝔦𝔧𝔨𝔩𝔪𝔫𝔬𝔭𝔮𝔯𝔰𝔱𝔲𝔳𝔴𝔵𝔶𝔷", + "𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟", + "Ã𝓑c𝔻𝐞𝐅𝓖ђƗĴ𝓚𝓛мŇ𝔬𝕡Ⓠ𝓇𝕊𝕥Ⓤ𝐯𝐰א¥𝓏", + "𝓪𝓫𝓬𝓭𝓮𝓯𝓰𝓱𝓲𝓳𝓴𝓵𝓶𝓷𝓸𝓹𝓺𝓻𝓼𝓽𝓾𝓿𝔀𝔁𝔂𝔃", + "𝒶𝒷𝒸𝒹𝑒𝒻𝑔𝒽𝒾𝒿𝓀𝓁𝓂𝓃𝑜𝓅𝓆𝓇𝓈𝓉𝓊𝓋𝓌𝓍𝓎𝓏", + "𝕒𝕓𝕔𝕕𝕖𝕗𝕘𝕙𝕚𝕛𝕜𝕝𝕞𝕟𝕠𝕡𝕢𝕣𝕤𝕥𝕦𝕧𝕨𝕩𝕪𝕫", + "abcdefghijklmnopqrstuvwxyz", + "ᴀʙᴄᴅᴇꜰɢʜɪᴊᴋʟᴍɴᴏᴘQʀꜱᴛᴜᴠᴡxʏᴢ", + "ₐbcdₑfgₕᵢⱼₖₗₘₙₒₚqᵣₛₜᵤᵥwₓyz", + "ᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖqʳˢᵗᵘᵛʷˣʸᶻ", + "𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳", + "𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇", + "𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻", + "𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯", + "abcdefghijklmnopqrstuvwxyz", + "𝚊𝚋𝚌𝚍𝚎𝚏𝚐𝚑𝚒𝚓𝚔𝚕𝚖𝚗𝚘𝚙𝚚𝚛𝚜𝚝𝚞𝚟𝚠𝚡𝚢𝚣", + "𝔄𝔅ℭ𝔇𝔈𝔉𝔊ℌℑ𝔍𝔎𝔏𝔐𝔑𝔒𝔓𝔔ℜ𝔖𝔗𝔘𝔙𝔚𝔛𝔜ℨ", + "𝕬𝕭𝕮𝕯𝕰𝕱𝕲𝕳𝕴𝕵𝕶𝕷𝕸𝕹𝕺𝕻𝕼𝕽𝕾𝕿𝖀𝖁𝖂𝖃𝖄𝖅", + "𝓐𝓑𝓒𝓓𝓔𝓕𝓖𝓗𝓘𝓙𝓚𝓛𝓜𝓝𝓞𝓟𝓠𝓡𝓢𝓣𝓤𝓥𝓦𝓧𝓨𝓩", + "𝒜𝐵𝒞𝒟𝐸𝐹𝒢𝐻𝐼𝒥𝒦𝐿𝑀𝒩𝒪𝒫𝒬𝑅𝒮𝒯𝒰𝒱𝒲𝒳𝒴𝒵", + "𝔸𝔹ℂ𝔻𝔼𝔽𝔾ℍ𝕀𝕁𝕂𝕃𝕄ℕ𝕆ℙℚℝ𝕊𝕋𝕌𝕍𝕎𝕏𝕐ℤ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "𝙰𝙱𝙲𝙳𝙴𝙵𝙶𝙷𝙸𝙹𝙺𝙻𝙼𝙽𝙾𝙿𝚀𝚁𝚂𝚃𝚄𝚅𝚆𝚇𝚈𝚉", + "ᴀʙᴄᴅᴇꜰɢʜɪᴊᴋʟᴍɴᴏᴘQʀꜱᴛᴜᴠᴡxʏᴢ", + "🄰🄱🄲🄳🄴🄵🄶🄷🄸🄹🄺🄻🄼🄽🄾🄿🅀🅁🅂🅃🅄🅅🅆🅇🅈🅉", + "∀ᙠƆᗡƎℲ⅁HIſ⋊˥WNOԀΌᴚS⊥∩ΛMX⅄Z", + "ᴬᴮᶜᴰᴱᶠᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾQᴿˢᵀᵁⱽᵂˣʸᶻ", + "𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙", + "𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭", + "𝘈𝘉𝘊𝘋𝘌𝘍𝘎𝘏𝘐𝘑𝘒𝘓𝘔𝘕𝘖𝘗𝘘𝘙𝘚𝘛𝘜𝘝𝘞𝘟𝘠𝘡", + "𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕", + }) + return dec +} diff --git a/data/wordlist.txt b/data/wordlist.txt new file mode 100644 index 0000000..367e259 --- /dev/null +++ b/data/wordlist.txt @@ -0,0 +1,317 @@ +2g1c +2 girls 1 cup +anal +anus +arse +ass +asshole +arsehole +asswhole +assmunch +auto erotic +autoerotic +ballsack +bastard +beastial +bellend +bdsm +bestiality +bitch +bitches +bitchin +bitching +bimbo +bimbos +blow job +blowjob +blowjobs +blue waffle +boob +boobs +booobs +boooobs +booooobs +booooooobs +breasts +booty call +brown shower +brown showers +boner +bondage +buceta +bukake +bukkake +bullshit +bull shit +busty +butthole +carpet muncher +cawk +chink +cipa +clit +clits +clitoris +cnut +cock +cocks +cockface +cockhead +cockmunch +cockmuncher +cocksuck +cocksucked +cocksucking +cocksucks +cocksucker +cokmuncher +coon +cow girl +cow girls +cowgirl +cowgirls +crap +crotch +cum +cummer +cumming +cuming +cums +cumshot +cunilingus +cunillingus +cunnilingus +cunt +cuntlicker +cuntlicking +cunts +damn +dick +dickhead +dildo +dildos +dink +dinks +deepthroat +deep throat +dog style +doggie style +doggiestyle +doggy style +doggystyle +donkeyribber +doosh +douche +duche +dyke +ejaculate +ejaculated +ejaculates +ejaculating +ejaculatings +ejaculation +ejakulate +erotic +erotism +fag +faggot +fagging +faggit +faggitt +faggs +fagot +fagots +fags +fatass +femdom +fingering +footjob +foot job +fuck +fucks +fucker +fuckers +fucked +fuckhead +fuckheads +fuckin +fucking +fcuk +fcuker +fcuking +felching +fellate +fellatio +fingerfuck +fingerfucked +fingerfucker +fingerfuckers +fingerfucking +fingerfucks +fistfuck +fistfucked +fistfucker +fistfuckers +fistfucking +fistfuckings +fistfucks +flange +fook +fooker +fucka +fuk +fuks +fuker +fukker +fukkin +fukking +futanari +futanary +gangbang +gangbanged +gang bang +gokkun +golden shower +goldenshower +gay +gaylord +gaysex +goatse +handjob +hand job +hentai +hooker +hoer +homo +horny +incest +jackoff +jack off +jerkoff +jerk off +jizz +knob +kinbaku +labia +lesbian +masturbate +masochist +mofo +mothafuck +motherfuck +motherfucker +mothafucka +mothafuckas +mothafuckaz +mothafucked +mothafucker +mothafuckers +mothafuckin +mothafucking +mothafuckings +mothafucks +mother fucker +motherfucked +motherfucker +motherfuckers +motherfuckin +motherfucking +motherfuckings +motherfuckka +motherfucks +milf +muff +nigga +nigger +nigg +nipple +nipples +nob +nob jokey +nobhead +nobjocky +nobjokey +numbnuts +nutsack +nude +nudes +orgy +orgasm +orgasms +panty +panties +penis +playboy +porn +porno +pornography +pron +pussy +pussies +rape +raping +rapist +rectum +retard +rimming +sadist +sadism +schlong +scrotum +sex +semen +shemale +she male +shibari +shibary +shit +shitdick +shitfuck +shitfull +shithead +shiting +shitings +shits +shitted +shitters +shitting +shittings +shitty +shota +skank +slut +sluts +smut +smegma +spunk +strip club +stripclub +tit +tits +titties +titty +titfuck +tittiefucker +titties +tittyfuck +tittywank +titwank +threesome +three some +throating +twat +twathead +twatty +twunt +viagra +vagina +vulva +wank +wanker +wanky +whore +whoar +xxx +xx +yaoi +yury \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b674de8 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module profanity + +go 1.17 diff --git a/profanity.go b/profanity.go new file mode 100644 index 0000000..aec119b --- /dev/null +++ b/profanity.go @@ -0,0 +1,110 @@ +package profanity + +import ( + "bufio" + "os" + "profanity/common" + "profanity/common/decanter" + "strings" +) + +type Filter struct { + words map[string]string + Decanter decanter.Decanter + + transforms []common.TransformFunc +} + +func New() *Filter { + dec := decanter.NewDefaultDecanter() + return &Filter{ + words: make(map[string]string, 3000), + Decanter: dec, + transforms: []common.TransformFunc{dec.DecantTransform}, + } +} + +//FilterWord +//curse is "" if bad == false, else curse is populated with the match +func (F *Filter) FilterWord(s string) (bad bool, curse string, form string) { + forms := common.FlattenTransformFunc(F.transforms)(s) + for _, form := range forms { + if curse, ok := F.words[form]; ok { + return true, curse, form + } + for _, v := range F.words { + if strings.Contains(form, v) { + return true, curse, form + } + } + } + return false, "", "" +} + +func (F *Filter) CensorSentence(s string, replacer string) (censored string) { + return F.CensorSentenceN(s, replacer, 0) +} + +func (F *Filter) CensorSentenceMany(s string, replacer string, widths ...uint8) (censored string) { + sentence := s + for _, width := range widths { + sentence = F.CensorSentenceN(sentence, replacer, width) + } + return sentence +} + +func (F *Filter) CensorSentenceToN(s string, replacer string, maxwidth uint8) (censored string) { + sentence := s + for width := uint8(0); width < maxwidth; width++ { + sentence = F.CensorSentenceN(sentence, replacer, width) + } + return sentence +} + +func (F *Filter) CensorSentenceN(s string, replacer string, width uint8) (censored string) { + sep := " " + sb := new(strings.Builder) + words := strings.Split(s, sep) + for idx := 0; idx < len(words); idx++ { + original := words[idx] + word := words[idx] + for i := 1; i <= int(width); i++ { + if len(words) > (idx + i) { + word = word + " " + words[idx+i] + } + } + if bad, _, form := F.FilterWord(word); bad { + idx = idx + int(width) + sb.WriteString(strings.Repeat(replacer, len(form))) + } else { + sb.WriteString(original) + } + sb.WriteString(sep) + } + return strings.TrimSpace(sb.String()) +} + +func (F *Filter) MustAddFile(file *os.File, err error) { + if err != nil { + panic(err) + } + F.AddFile(file) +} + +func (F *Filter) AddFile(file *os.File) { + scanner := bufio.NewScanner(file) + for scanner.Scan() { + txt := scanner.Text() + F.AddWord(txt) + } +} + +func (F *Filter) AddWords(wx []string) { + for _, w := range wx { + F.AddWord(w) + } +} + +func (F *Filter) AddWord(w string) { + F.words[w] = w +} diff --git a/profanity_test.go b/profanity_test.go new file mode 100644 index 0000000..635c642 --- /dev/null +++ b/profanity_test.go @@ -0,0 +1,28 @@ +package profanity + +import ( + "os" + "testing" +) + +func init() { +} + +func TestWordFilter(t *testing.T) { + + filter := New() + filter.MustAddFile(os.Open("./data/wordlist.txt")) + cases := [][2]string{ + { + "That wh0re gave m3 a very good H@nD j0b.", + "That ***** gave m3 a very good *********", + }, + } + for _, tc := range cases { + filtered := filter.CensorSentenceToN(tc[0], "*", 6) + if filtered != tc[1] { + t.Errorf("expected '%s' to be '%s'", filtered, tc[1]) + } + } + +}