package common import ( "bytes" "net/url" "regexp" "strconv" "strings" "unicode/utf8" ) var SpaceGap = []byte(" ") var httpProtBytes = []byte("http://") var InvalidURL = []byte("[Invalid URL]") var InvalidTopic = []byte("[Invalid Topic]") var InvalidProfile = []byte("[Invalid Profile]") var InvalidForum = []byte("[Invalid Forum]") var unknownMedia = []byte("[Unknown Media]") var URLOpen = []byte("") var bytesSinglequote = []byte("'") var bytesGreaterthan = []byte(">") var urlMention = []byte(" class='mention'") var URLClose = []byte("") var imageOpen = []byte("") var urlPattern = `(?s)([ {1}])((http|https|ftp|mailto)*)(:{??)\/\/([\.a-zA-Z\/]+)([ {1}])` var urlReg *regexp.Regexp func init() { urlReg = regexp.MustCompile(urlPattern) } // TODO: Write a test for this func shortcodeToUnicode(msg string) string { //re := regexp.MustCompile(":(.):") msg = strings.Replace(msg, ":grinning:", "😀", -1) msg = strings.Replace(msg, ":grin:", "😁", -1) msg = strings.Replace(msg, ":joy:", "😂", -1) msg = strings.Replace(msg, ":rofl:", "đŸ¤Ŗ", -1) msg = strings.Replace(msg, ":smiley:", "😃", -1) msg = strings.Replace(msg, ":smile:", "😄", -1) msg = strings.Replace(msg, ":sweat_smile:", "😅", -1) msg = strings.Replace(msg, ":laughing:", "😆", -1) msg = strings.Replace(msg, ":satisfied:", "😆", -1) msg = strings.Replace(msg, ":wink:", "😉", -1) msg = strings.Replace(msg, ":blush:", "😊", -1) msg = strings.Replace(msg, ":yum:", "😋", -1) msg = strings.Replace(msg, ":sunglasses:", "😎", -1) msg = strings.Replace(msg, ":heart_eyes:", "😍", -1) msg = strings.Replace(msg, ":kissing_heart:", "😘", -1) msg = strings.Replace(msg, ":kissing:", "😗", -1) msg = strings.Replace(msg, ":kissing_smiling_eyes:", "😙", -1) msg = strings.Replace(msg, ":kissing_closed_eyes:", "😚", -1) msg = strings.Replace(msg, ":relaxed:", "â˜ēī¸", -1) msg = strings.Replace(msg, ":slight_smile:", "🙂", -1) msg = strings.Replace(msg, ":hugging:", "🤗", -1) msg = strings.Replace(msg, ":thinking:", "🤔", -1) msg = strings.Replace(msg, ":neutral_face:", "😐", -1) msg = strings.Replace(msg, ":expressionless:", "😑", -1) msg = strings.Replace(msg, ":no_mouth:", "đŸ˜ļ", -1) msg = strings.Replace(msg, ":rolling_eyes:", "🙄", -1) msg = strings.Replace(msg, ":smirk:", "😏", -1) msg = strings.Replace(msg, ":persevere:", "đŸ˜Ŗ", -1) msg = strings.Replace(msg, ":disappointed_relieved:", "đŸ˜Ĩ", -1) msg = strings.Replace(msg, ":open_mouth:", "😮", -1) msg = strings.Replace(msg, ":zipper_mouth:", "🤐", -1) msg = strings.Replace(msg, ":hushed:", "đŸ˜¯", -1) msg = strings.Replace(msg, ":sleepy:", "đŸ˜Ē", -1) msg = strings.Replace(msg, ":tired_face:", "đŸ˜Ģ", -1) msg = strings.Replace(msg, ":sleeping:", "😴", -1) msg = strings.Replace(msg, ":relieved:", "😌", -1) msg = strings.Replace(msg, ":nerd:", "🤓", -1) msg = strings.Replace(msg, ":stuck_out_tongue:", "😛", -1) msg = strings.Replace(msg, ":worried:", "😟", -1) msg = strings.Replace(msg, ":drooling_face:", "🤤", -1) msg = strings.Replace(msg, ":disappointed:", "😞", -1) msg = strings.Replace(msg, ":astonished:", "😲", -1) msg = strings.Replace(msg, ":slight_frown:", "🙁", -1) msg = strings.Replace(msg, ":skull_crossbones:", "☠ī¸", -1) msg = strings.Replace(msg, ":skull:", "💀", -1) msg = strings.Replace(msg, ":point_up:", "☝ī¸", -1) msg = strings.Replace(msg, ":v:", "✌ī¸ī¸", -1) msg = strings.Replace(msg, ":writing_hand:", "✍ī¸", -1) msg = strings.Replace(msg, ":heart:", "❤ī¸ī¸", -1) msg = strings.Replace(msg, ":heart_exclamation:", "âŖī¸", -1) msg = strings.Replace(msg, ":hotsprings:", "♨ī¸", -1) msg = strings.Replace(msg, ":airplane:", "✈ī¸ī¸", -1) msg = strings.Replace(msg, ":hourglass:", "⌛", -1) msg = strings.Replace(msg, ":watch:", "⌚", -1) msg = strings.Replace(msg, ":comet:", "☄ī¸", -1) msg = strings.Replace(msg, ":snowflake:", "❄ī¸", -1) msg = strings.Replace(msg, ":cloud:", "☁ī¸", -1) msg = strings.Replace(msg, ":sunny:", "☀ī¸", -1) msg = strings.Replace(msg, ":spades:", "♠ī¸", -1) msg = strings.Replace(msg, ":hearts:", "â™Ĩī¸ī¸", -1) msg = strings.Replace(msg, ":diamonds:", "â™Ļī¸", -1) msg = strings.Replace(msg, ":clubs:", "â™Ŗī¸", -1) msg = strings.Replace(msg, ":phone:", "☎ī¸", -1) msg = strings.Replace(msg, ":telephone:", "☎ī¸", -1) msg = strings.Replace(msg, ":biohazard:", "â˜Ŗī¸", -1) msg = strings.Replace(msg, ":radioactive:", "â˜ĸī¸", -1) msg = strings.Replace(msg, ":scissors:", "✂ī¸", -1) msg = strings.Replace(msg, ":arrow_upper_right:", "↗ī¸", -1) msg = strings.Replace(msg, ":arrow_right:", "➡ī¸", -1) msg = strings.Replace(msg, ":arrow_lower_right:", "↘ī¸", -1) msg = strings.Replace(msg, ":arrow_lower_left:", "↙ī¸", -1) msg = strings.Replace(msg, ":arrow_upper_left:", "↖ī¸", -1) msg = strings.Replace(msg, ":arrow_up_down:", "↕ī¸", -1) msg = strings.Replace(msg, ":left_right_arrow:", "↔ī¸", -1) msg = strings.Replace(msg, ":leftwards_arrow_with_hook:", "↩ī¸", -1) msg = strings.Replace(msg, ":arrow_right_hook:", "â†Ēī¸", -1) msg = strings.Replace(msg, ":arrow_forward:", "â–ļī¸", -1) msg = strings.Replace(msg, ":arrow_backward:", "◀ī¸", -1) msg = strings.Replace(msg, ":female:", "♀ī¸", -1) msg = strings.Replace(msg, ":male:", "♂ī¸", -1) msg = strings.Replace(msg, ":ballot_box_with_check:", "☑ī¸", -1) msg = strings.Replace(msg, ":heavy_check_mark:", "✔ī¸ī¸", -1) msg = strings.Replace(msg, ":heavy_multiplication_x:", "✖ī¸", -1) msg = strings.Replace(msg, ":pisces:", "♓", -1) msg = strings.Replace(msg, ":aquarius:", "♒", -1) msg = strings.Replace(msg, ":capricorn:", "♑", -1) msg = strings.Replace(msg, ":sagittarius:", "♐", -1) msg = strings.Replace(msg, ":scorpius:", "♏", -1) msg = strings.Replace(msg, ":libra:", "♎", -1) msg = strings.Replace(msg, ":virgo:", "♍", -1) msg = strings.Replace(msg, ":leo:", "♌", -1) msg = strings.Replace(msg, ":cancer:", "♋", -1) msg = strings.Replace(msg, ":gemini:", "♊", -1) msg = strings.Replace(msg, ":taurus:", "♉", -1) msg = strings.Replace(msg, ":aries:", "♈", -1) msg = strings.Replace(msg, ":peace:", "☎ī¸", -1) msg = strings.Replace(msg, ":eight_spoked_asterisk:", "âœŗī¸", -1) msg = strings.Replace(msg, ":eight_pointed_black_star:", "✴ī¸", -1) msg = strings.Replace(msg, ":snowman2:", "☃ī¸", -1) msg = strings.Replace(msg, ":umbrella2:", "☂ī¸", -1) msg = strings.Replace(msg, ":pencil2:", "✏ī¸", -1) msg = strings.Replace(msg, ":black_nib:", "✒ī¸", -1) msg = strings.Replace(msg, ":email:", "✉ī¸", -1) msg = strings.Replace(msg, ":envelope:", "✉ī¸", -1) msg = strings.Replace(msg, ":keyboard:", "⌨ī¸", -1) msg = strings.Replace(msg, ":white_small_square:", "â–Ģī¸", -1) msg = strings.Replace(msg, ":black_small_square:", "â–Ēī¸", -1) msg = strings.Replace(msg, ":secret:", "㊙ī¸", -1) msg = strings.Replace(msg, ":congratulations:", "㊗ī¸", -1) msg = strings.Replace(msg, ":m:", "Ⓜī¸", -1) msg = strings.Replace(msg, ":tm:", "â„ĸī¸ī¸", -1) msg = strings.Replace(msg, ":registered:", "ÂŽī¸", -1) msg = strings.Replace(msg, ":copyright:", "Šī¸", -1) msg = strings.Replace(msg, ":wavy_dash:", "〰ī¸", -1) msg = strings.Replace(msg, ":bangbang:", "â€ŧī¸", -1) msg = strings.Replace(msg, ":sparkle:", "❇ī¸", -1) msg = strings.Replace(msg, ":star_of_david:", "✡ī¸", -1) msg = strings.Replace(msg, ":wheel_of_dharma:", "☸ī¸", -1) msg = strings.Replace(msg, ":yin_yang:", "☯ī¸", -1) msg = strings.Replace(msg, ":cross:", "✝ī¸", -1) msg = strings.Replace(msg, ":orthodox_cross:", "â˜Ļī¸", -1) msg = strings.Replace(msg, ":star_and_crescent:", "â˜Ēī¸", -1) msg = strings.Replace(msg, ":frowning2:", "☚ī¸", -1) msg = strings.Replace(msg, ":information_source:", "ℹī¸", -1) msg = strings.Replace(msg, ":interrobang:", "⁉ī¸", -1) return msg } type TagToAction struct { Suffix string Do func(*TagToAction, bool, int, []rune) (int, string) // func(tagToAction,open,i,runes) (newI, output) Depth int // For use by Do PartialMode bool } // TODO: Write a test for this func tryStepForward(i int, step int, runes []rune) (int, bool) { i += step if i < len(runes) { return i, true } return i - step, false } // TODO: Preparse Markdown and normalize it into HTML? func PreparseMessage(msg string) string { //fmt.Println("initial msg: ", msg) //fmt.Println("initial []byte(msg): ", []byte(msg)) // TODO: Kick this check down a level into SanitiseBody? if !utf8.ValidString(msg) { return "" } msg = strings.Replace(msg, "


", "\n\n", -1) msg = strings.Replace(msg, "

", "\n\n", -1) msg = strings.Replace(msg, "

", "", -1) // TODO: Make this looser by moving it to the reverse HTML parser? msg = strings.Replace(msg, "
", "\n\n", -1) msg = strings.Replace(msg, "
", "\n\n", -1) // XHTML style msg = strings.Replace(msg, " ", "", -1) msg = strings.Replace(msg, "\r", "", -1) // Windows artifact //msg = strings.Replace(msg, "\n\n\n\n", "\n\n\n", -1) msg = RunSshook("preparse_preassign", msg) // There are a few useful cases for having spaces, but I'd like to stop the WYSIWYG from inserting random lines here and there msg = SanitiseBody(msg) //fmt.Println("before msg: ", msg) //fmt.Println("before []byte(msg): ", []byte(msg)) var runes = []rune(msg) msg = "" // TODO: We can maybe reduce the size of this by using an offset? // TODO: Move some of these closures out of this function to make things a little more efficient var allowedTags = [][]string{ 'e': []string{"m"}, 's': []string{"", "trong", "pan"}, 'd': []string{"el"}, 'u': []string{""}, 'b': []string{""}, 'i': []string{""}, } var buildLitMatch = func(tag string) func(*TagToAction, bool, int, []rune) (int, string) { return func(action *TagToAction, open bool, _ int, _ []rune) (int, string) { if open { action.Depth++ return -1, "<" + tag + ">" } if action.Depth <= 0 { return -1, "" } action.Depth-- return -1, "" } } var tagToAction = [][]*TagToAction{ 'e': []*TagToAction{&TagToAction{"m", buildLitMatch("em"), 0, false}}, 's': []*TagToAction{ &TagToAction{"", buildLitMatch("del"), 0, false}, &TagToAction{"trong", buildLitMatch("strong"), 0, false}, // Hides the span tags Trumbowyg loves blasting out randomly &TagToAction{"pan", func(act *TagToAction, open bool, i int, runes []rune) (int, string) { if open { act.Depth++ //fmt.Println("skipping attributes") for ; i < len(runes); i++ { if runes[i] == '&' && peekMatch(i, "gt;", runes) { //fmt.Println("found tag exit") return i + 3, " " } } return -1, " " } if act.Depth <= 0 { return -1, " " } act.Depth-- return -1, " " }, 0, true}, }, 'd': []*TagToAction{&TagToAction{"el", buildLitMatch("del"), 0, false}}, 'u': []*TagToAction{&TagToAction{"", buildLitMatch("u"), 0, false}}, 'b': []*TagToAction{&TagToAction{"", buildLitMatch("strong"), 0, false}}, 'i': []*TagToAction{&TagToAction{"", buildLitMatch("em"), 0, false}}, } // TODO: Implement a less literal parser for i := 0; i < len(runes); i++ { char := runes[i] if char == '&' && peekMatch(i, "lt;", runes) { //fmt.Println("found less than") var ok bool i, ok = tryStepForward(i, 4, runes) if !ok { msg += "<" break } char := runes[i] //fmt.Println("char: ", char) //fmt.Println("string(char): ", string(char)) if int(char) >= len(allowedTags) { //fmt.Println("sentinel char out of bounds") msg += "&" i -= 4 continue } var closeTag bool if char == '/' { //fmt.Println("found close tag") i, ok = tryStepForward(i, 1, runes) if !ok { msg += "</" break } char = runes[i] closeTag = true } tags := allowedTags[char] if len(tags) == 0 { //fmt.Println("couldn't find char in allowedTags") if closeTag { //msg += "</" msg += "&" i -= 5 } else { msg += "&" i -= 4 } continue } // TODO: Scan through tags and make sure the suffix is present to reduce the number of false positives which hit the loop below //fmt.Printf("tags: %+v\n", tags) var newI = -1 var out string toActionList := tagToAction[char] //fmt.Println("toActionList: ", toActionList) for _, toAction := range toActionList { //fmt.Printf("toAction: %+v\n", toAction) // TODO: Optimise this, maybe with goto or a function call to avoid scanning the text twice? if (toAction.PartialMode && !closeTag && peekMatch(i, toAction.Suffix, runes)) || peekMatch(i, toAction.Suffix+">", runes) { //fmt.Println("peekMatched") newI, out = toAction.Do(toAction, !closeTag, i, runes) //fmt.Println("newI: ", newI) //fmt.Println("i: ", i) //fmt.Println("string(runes[i]): ", string(runes[i])) if newI != -1 { i = newI } else if out != "" { i += len(toAction.Suffix + ">") } //fmt.Println("i: ", i) //fmt.Println("string(runes[i]): ", string(runes[i])) //fmt.Println("out: ", out) break } } if out == "" { //fmt.Println("no out") msg += "&" if closeTag { i -= 5 } else { i -= 4 } } else if out != " " { msg += out } } else { msg += string(char) } } //fmt.Println("running autoclosers") //fmt.Println("msg: ", msg) for _, actionList := range tagToAction { //if len(actionList) > 0 { // fmt.Println("actionList: ", actionList) //} for _, toAction := range actionList { //fmt.Printf("toAction: %+v\n", toAction) if toAction.Depth > 0 { //fmt.Println("autoclosing") for ; toAction.Depth > 0; toAction.Depth-- { _, out := toAction.Do(toAction, false, len(runes), runes) //fmt.Println("out: ", out) if out != "" { msg += out } } } } } //fmt.Println("msg: ", msg) return strings.TrimSpace(shortcodeToUnicode(msg)) } // TODO: Test this // TODO: Use this elsewhere in the parser? func peek(cur int, skip int, runes []rune) rune { if (cur + skip) < len(runes) { return runes[cur+skip] } return 0 // null byte } // TODO: Test this func peekMatch(cur int, phrase string, runes []rune) bool { if cur+len(phrase) > len(runes) { return false } for i, char := range phrase { if cur+i+1 >= len(runes) { return false } if runes[cur+i+1] != char { return false } } return true } // TODO: Write a test for this // TODO: We need a lot more hooks here. E.g. To add custom media types and handlers. // TODO: Use templates to reduce the amount of boilerplate? func ParseMessage(msg string, sectionID int, sectionType string /*, user User*/) string { // TODO: Word boundary detection for these to avoid mangling code msg = strings.Replace(msg, ":)", "😀", -1) msg = strings.Replace(msg, ":(", "😞", -1) msg = strings.Replace(msg, ":D", "😃", -1) msg = strings.Replace(msg, ":P", "😛", -1) msg = strings.Replace(msg, ":O", "😲", -1) msg = strings.Replace(msg, ":p", "😛", -1) msg = strings.Replace(msg, ":o", "😲", -1) msg = strings.Replace(msg, ";)", "😉", -1) // Word filter list. E.g. Swear words and other things the admins don't like wordFilters, err := WordFilters.GetAll() if err != nil { LogError(err) return "" } for _, filter := range wordFilters { msg = strings.Replace(msg, filter.Find, filter.Replacement, -1) } // Search for URLs, mentions and hashlinks in the messages... var msgbytes = []byte(msg) var outbytes []byte msgbytes = append(msgbytes, SpaceGap...) var lastItem = 0 var i = 0 for ; len(msgbytes) > (i + 1); i++ { if (i == 0 && (msgbytes[0] > 32)) || ((msgbytes[i] < 33) && (msgbytes[i+1] > 32)) { if (i != 0) || msgbytes[i] < 33 { i++ } if msgbytes[i] == '#' { if bytes.Equal(msgbytes[i+1:i+5], []byte("tid-")) { outbytes = append(outbytes, msgbytes[lastItem:i]...) i += 5 start := i tid, intLen := CoerceIntBytes(msgbytes[start:]) i += intLen topic, err := Topics.Get(tid) if err != nil || !Forums.Exists(topic.ParentID) { outbytes = append(outbytes, InvalidTopic...) lastItem = i continue } outbytes = append(outbytes, URLOpen...) var urlBit = []byte(BuildTopicURL("", tid)) outbytes = append(outbytes, urlBit...) outbytes = append(outbytes, URLOpen2...) var tidBit = []byte("#tid-" + strconv.Itoa(tid)) outbytes = append(outbytes, tidBit...) outbytes = append(outbytes, URLClose...) lastItem = i } else if bytes.Equal(msgbytes[i+1:i+5], []byte("rid-")) { outbytes = append(outbytes, msgbytes[lastItem:i]...) i += 5 start := i rid, intLen := CoerceIntBytes(msgbytes[start:]) i += intLen topic, err := TopicByReplyID(rid) if err != nil || !Forums.Exists(topic.ParentID) { outbytes = append(outbytes, InvalidTopic...) lastItem = i continue } outbytes = append(outbytes, URLOpen...) var urlBit = []byte(BuildTopicURL("", topic.ID)) outbytes = append(outbytes, urlBit...) outbytes = append(outbytes, URLOpen2...) var ridBit = []byte("#rid-" + strconv.Itoa(rid)) outbytes = append(outbytes, ridBit...) outbytes = append(outbytes, URLClose...) lastItem = i } else if bytes.Equal(msgbytes[i+1:i+5], []byte("fid-")) { outbytes = append(outbytes, msgbytes[lastItem:i]...) i += 5 start := i fid, intLen := CoerceIntBytes(msgbytes[start:]) i += intLen if !Forums.Exists(fid) { outbytes = append(outbytes, InvalidForum...) lastItem = i continue } outbytes = append(outbytes, URLOpen...) var urlBit = []byte(BuildForumURL("", fid)) outbytes = append(outbytes, urlBit...) outbytes = append(outbytes, URLOpen2...) var fidBit = []byte("#fid-" + strconv.Itoa(fid)) outbytes = append(outbytes, fidBit...) outbytes = append(outbytes, URLClose...) lastItem = i } else { // TODO: Forum Shortcode Link } } else if msgbytes[i] == '@' { outbytes = append(outbytes, msgbytes[lastItem:i]...) i++ start := i uid, intLen := CoerceIntBytes(msgbytes[start:]) i += intLen menUser, err := Users.Get(uid) if err != nil { outbytes = append(outbytes, InvalidProfile...) lastItem = i continue } outbytes = append(outbytes, URLOpen...) var urlBit = []byte(menUser.Link) outbytes = append(outbytes, urlBit...) outbytes = append(outbytes, bytesSinglequote...) outbytes = append(outbytes, urlMention...) outbytes = append(outbytes, bytesGreaterthan...) var uidBit = []byte("@" + menUser.Name) outbytes = append(outbytes, uidBit...) outbytes = append(outbytes, URLClose...) lastItem = i } else if msgbytes[i] == 'h' || msgbytes[i] == 'f' || msgbytes[i] == 'g' || msgbytes[i] == '/' { if msgbytes[i+1] == 't' && msgbytes[i+2] == 't' && msgbytes[i+3] == 'p' { if msgbytes[i+4] == 's' && msgbytes[i+5] == ':' && msgbytes[i+6] == '/' { // Do nothing } else if msgbytes[i+4] == ':' && msgbytes[i+5] == '/' { // Do nothing } else { continue } } else if msgbytes[i+1] == 't' && msgbytes[i+2] == 'p' && msgbytes[i+3] == ':' && msgbytes[i+4] == '/' { // Do nothing } else if msgbytes[i+1] == 'i' && msgbytes[i+2] == 't' && msgbytes[i+3] == ':' && msgbytes[i+4] == '/' { // Do nothing } else if msgbytes[i+1] == '/' { // Do nothing } else { continue } //log.Print("Normal URL") outbytes = append(outbytes, msgbytes[lastItem:i]...) urlLen := PartialURLBytesLen(msgbytes[i:]) if msgbytes[i+urlLen] > 32 { // space and invisibles //log.Print("INVALID URL") outbytes = append(outbytes, InvalidURL...) i += urlLen continue } media, ok := parseMediaBytes(msgbytes[i : i+urlLen]) if !ok { outbytes = append(outbytes, InvalidURL...) i += urlLen continue } // TODO: Reduce the amount of code duplication if media.Type == "attach" { outbytes = append(outbytes, imageOpen...) outbytes = append(outbytes, []byte(media.URL+"?sectionID="+strconv.Itoa(sectionID)+"§ionType="+sectionType)...) outbytes = append(outbytes, imageOpen2...) outbytes = append(outbytes, []byte(media.URL+"?sectionID="+strconv.Itoa(sectionID)+"§ionType="+sectionType)...) outbytes = append(outbytes, imageClose...) i += urlLen lastItem = i continue } else if media.Type == "image" { outbytes = append(outbytes, imageOpen...) outbytes = append(outbytes, []byte(media.URL)...) outbytes = append(outbytes, imageOpen2...) outbytes = append(outbytes, []byte(media.URL)...) outbytes = append(outbytes, imageClose...) i += urlLen lastItem = i continue } else if media.Type == "raw" { outbytes = append(outbytes, []byte(media.Body)...) i += urlLen lastItem = i continue } else if media.Type != "" { outbytes = append(outbytes, unknownMedia...) i += urlLen continue } outbytes = append(outbytes, URLOpen...) outbytes = append(outbytes, msgbytes[i:i+urlLen]...) outbytes = append(outbytes, URLOpen2...) outbytes = append(outbytes, msgbytes[i:i+urlLen]...) outbytes = append(outbytes, URLClose...) i += urlLen lastItem = i } } } if lastItem != i && len(outbytes) != 0 { calclen := len(msgbytes) - 10 if calclen <= lastItem { calclen = lastItem } outbytes = append(outbytes, msgbytes[lastItem:calclen]...) msg = string(outbytes) } msg = strings.Replace(msg, "\n", "
", -1) msg = RunSshook("parse_assign", msg) return msg } // 6, 7, 8, 6, 2, 7 // ftp://, http://, https:// git://, //, mailto: (not a URL, just here for length comparison purposes) // TODO: Write a test for this func validateURLBytes(data []byte) bool { datalen := len(data) i := 0 if datalen >= 6 { if bytes.Equal(data[0:6], []byte("ftp://")) || bytes.Equal(data[0:6], []byte("git://")) { i = 6 } else if datalen >= 7 && bytes.Equal(data[0:7], httpProtBytes) { i = 7 } else if datalen >= 8 && bytes.Equal(data[0:8], []byte("https://")) { i = 8 } } else if datalen >= 2 && data[0] == '/' && data[1] == '/' { i = 2 } // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. for ; datalen > i; i++ { if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { return false } } return true } // TODO: Write a test for this func validatedURLBytes(data []byte) (url []byte) { datalen := len(data) i := 0 if datalen >= 6 { if bytes.Equal(data[0:6], []byte("ftp://")) || bytes.Equal(data[0:6], []byte("git://")) { i = 6 } else if datalen >= 7 && bytes.Equal(data[0:7], httpProtBytes) { i = 7 } else if datalen >= 8 && bytes.Equal(data[0:8], []byte("https://")) { i = 8 } } else if datalen >= 2 && data[0] == '/' && data[1] == '/' { i = 2 } // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. for ; datalen > i; i++ { if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { return InvalidURL } } url = append(url, data...) return url } // TODO: Write a test for this func PartialURLBytes(data []byte) (url []byte) { datalen := len(data) i := 0 end := datalen - 1 if datalen >= 6 { if bytes.Equal(data[0:6], []byte("ftp://")) || bytes.Equal(data[0:6], []byte("git://")) { i = 6 } else if datalen >= 7 && bytes.Equal(data[0:7], httpProtBytes) { i = 7 } else if datalen >= 8 && bytes.Equal(data[0:8], []byte("https://")) { i = 8 } } else if datalen >= 2 && data[0] == '/' && data[1] == '/' { i = 2 } // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. for ; end >= i; i++ { if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { end = i } } url = append(url, data[0:end]...) return url } // TODO: Write a test for this func PartialURLBytesLen(data []byte) int { datalen := len(data) i := 0 if datalen >= 6 { //log.Print(string(data[0:5])) if bytes.Equal(data[0:6], []byte("ftp://")) || bytes.Equal(data[0:6], []byte("git://")) { i = 6 } else if datalen >= 7 && bytes.Equal(data[0:7], httpProtBytes) { i = 7 } else if datalen >= 8 && bytes.Equal(data[0:8], []byte("https://")) { i = 8 } } else if datalen >= 2 && data[0] == '/' && data[1] == '/' { i = 2 } // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. for ; datalen > i; i++ { if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { //log.Print("Bad Character: ", data[i]) return i } } //log.Print("Data Length: ",datalen) return datalen } type MediaEmbed struct { Type string //image URL string Body string } // TODO: Write a test for this func parseMediaBytes(data []byte) (media MediaEmbed, ok bool) { if !validateURLBytes(data) { return media, false } url, err := parseURL(data) if err != nil { return media, false } hostname := url.Hostname() scheme := url.Scheme port := url.Port() query := url.Query() var samesite = hostname == "localhost" || hostname == Site.URL if samesite { hostname = strings.Split(Site.URL, ":")[0] // ?- Test this as I'm not sure it'll do what it should. If someone's running SSL on port 80 or non-SSL on port 443 then... Well... They're in far worse trouble than this... port = Site.Port if Site.EnableSsl { scheme = "https" } } if scheme == "" { scheme = "http" } path := url.EscapedPath() pathFrags := strings.Split(path, "/") if len(pathFrags) >= 2 { if samesite && pathFrags[1] == "attachs" && (scheme == "http" || scheme == "https") { media.Type = "attach" var sport string // ? - Assumes the sysadmin hasn't mixed up the two standard ports if port != "443" && port != "80" { sport = ":" + port } media.URL = scheme + "://" + hostname + sport + path return media, true } } // ? - I don't think this hostname will hit every YT domain // TODO: Make this a more customisable handler rather than hard-coding it in here if hostname == "www.youtube.com" && path == "/watch" { video, ok := query["v"] if ok && len(video) >= 1 && video[0] != "" { media.Type = "raw" // TODO: Filter the URL to make sure no nasties end up in there media.Body = "" return media, true } } lastFrag := pathFrags[len(pathFrags)-1] if lastFrag != "" { // TODO: Write a function for getting the file extension of a string extarr := strings.Split(lastFrag, ".") if len(extarr) >= 2 { ext := extarr[len(extarr)-1] if ImageFileExts.Contains(ext) { media.Type = "image" var sport string if port != "443" && port != "80" { sport = ":" + port } media.URL = scheme + "://" + hostname + sport + path return media, true } } } return media, true } func parseURL(data []byte) (*url.URL, error) { return url.Parse(string(data)) } // TODO: Write a test for this func CoerceIntBytes(data []byte) (res int, length int) { if !(data[0] > 47 && data[0] < 58) { return 0, 1 } i := 0 for ; len(data) > i; i++ { if !(data[i] > 47 && data[i] < 58) { conv, err := strconv.Atoi(string(data[0:i])) if err != nil { return 0, i } return conv, i } } conv, err := strconv.Atoi(string(data)) if err != nil { return 0, i } return conv, i } // TODO: Write tests for this func Paginate(count int, perPage int, maxPages int) []int { if count < perPage { return []int{1} } var page int var out []int for current := 0; current < count; current += perPage { page++ out = append(out, page) if len(out) >= maxPages { break } } return out } // TODO: Write tests for this func PageOffset(count int, page int, perPage int) (int, int, int) { var offset int lastPage := (count / perPage) + 1 if page > 1 { offset = (perPage * page) - perPage } else if page == -1 { page = lastPage offset = (perPage * page) - perPage } else { page = 1 } // We don't want the offset to overflow the slices, if everything's in memory if offset >= (count - 1) { offset = 0 } return offset, page, lastPage }