From bc20c495c3b458975cb4aabe25f0a751a2ddeef4 Mon Sep 17 00:00:00 2001 From: Azareal Date: Fri, 24 May 2019 09:39:24 +1000 Subject: [PATCH] Support IPv6 hosts in the URL Parser. Support m and gaming variants of YouTube in the Media Parser. Reduce repetition in the URL utility functions slightly. Added 30 new parser test cases. --- common/parser.go | 27 +++++++++++++++++---------- parser_test.go | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/common/parser.go b/common/parser.go index 24b8afcc..e45c69b4 100644 --- a/common/parser.go +++ b/common/parser.go @@ -744,7 +744,8 @@ func validateURLString(data string) bool { // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. for ; len(data) > i; i++ { - if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { + char := data[i] + if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [ return false } } @@ -770,7 +771,8 @@ func validatedURLBytes(data []byte) (url []byte) { // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. for ; datalen > i; i++ { - if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { + char := data[i] + if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [ return InvalidURL } } @@ -797,7 +799,8 @@ func PartialURLString(data string) (url []byte) { // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. for ; end >= i; i++ { - if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { + char := data[i] + if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [ end = i } } @@ -807,6 +810,7 @@ func PartialURLString(data string) (url []byte) { } // TODO: Write a test for this +// TODO: Handle the host bits differently from the paths... func PartialURLStringLen(data string) (int, bool) { i := 0 if len(data) >= 6 { @@ -831,11 +835,12 @@ func PartialURLStringLen(data string) (int, bool) { f := i //fmt.Println("f:",f) for ; len(data) > i; i++ { - if data[i] < 33 { // space and invisibles + char := data[i] + if char < 33 { // space and invisibles //fmt.Println("e2:",i) return i, i != f - } else if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { - //log.Print("Bad Character: ", data[i]) + } else if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [ + //log.Print("Bad Character: ", char) //fmt.Println("e3") return i, false } @@ -850,6 +855,7 @@ func PartialURLStringLen(data string) (int, bool) { } // TODO: Write a test for this +// TODO: Get this to support IPv6 hosts, this isn't currently done as this is used in the bbcode plugin where it thinks the [ is a IPv6 host func PartialURLStringLen2(data string) int { i := 0 if len(data) >= 6 { @@ -867,8 +873,9 @@ func PartialURLStringLen2(data string) int { // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. for ; len(data) > i; i++ { - if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { - //log.Print("Bad Character: ", data[i]) + char := data[i] + if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && !(char > 44 && char < 58) && !(char > 64 && char < 91) && !(char > 96 && char < 123) { // 90 is Z, 91 is [ + //log.Print("Bad Character: ", char) return i } } @@ -938,12 +945,12 @@ func parseMediaString(data string) (media MediaEmbed, ok bool) { // ? - I don't think this hostname will hit every YT domain // TODO: Make this a more customisable handler rather than hard-coding it in here - if hostname == "www.youtube.com" && path == "/watch" { + if strings.HasSuffix(hostname,".youtube.com") && path == "/watch" { video, ok := query["v"] if ok && len(video) >= 1 && video[0] != "" { media.Type = "raw" // TODO: Filter the URL to make sure no nasties end up in there - media.Body = "" + media.Body = "" return media, true } } diff --git a/parser_test.go b/parser_test.go index 61ccfd03..acf9a705 100644 --- a/parser_test.go +++ b/parser_test.go @@ -190,6 +190,34 @@ func TestParser(t *testing.T) { msgList.Add("//"+c.Site.URL+"\n", "//"+c.Site.URL+"
") msgList.Add("//"+c.Site.URL+"\n//"+c.Site.URL, "//"+c.Site.URL+"
//"+c.Site.URL+"") + var local = func(url string) { + msgList.Add("//"+url, "//"+url+"") + msgList.Add("//"+url+"\n", "//"+url+"
") + msgList.Add("//"+url+"\n//"+url, "//"+url+"
//"+url+"") + } + local("localhost") + local("127.0.0.1") + local("[::1]") + + msgList.Add("https://www.youtube.com/watch?v=lalalalala","") + //msgList.Add("https://www.youtube.com/watch?v=;","") + msgList.Add("https://www.youtube.com/watch?v=d;","") + msgList.Add("https://www.youtube.com/watch?v=d;d","") + msgList.Add("https://www.youtube.com/watch?v=alert()","[Invalid URL]()") + msgList.Add("https://www.youtube.com/watch?v=js:alert()","[Invalid URL]()") + msgList.Add("https://www.youtube.com/watch?v='+><+'","[Invalid URL]'+><+'") + msgList.Add("https://www.youtube.com/watch?v='+onready='alert(\"\")'+'","[Invalid URL]'+onready='alert(\"\")'+'") + msgList.Add(" https://www.youtube.com/watch?v=lalalalala"," ") + msgList.Add("https://www.youtube.com/watch?v=lalalalala tt"," tt") + msgList.Add("https://www.youtube.com/watch?v=lalalalala&d=haha","") + msgList.Add("https://gaming.youtube.com/watch?v=lalalalala","") + msgList.Add("https://gaming.youtube.com/watch?v=lalalalala&d=haha","") + msgList.Add("https://m.youtube.com/watch?v=lalalalala","") + msgList.Add("https://m.youtube.com/watch?v=lalalalala&d=haha","") + msgList.Add("http://www.youtube.com/watch?v=lalalalala","") + msgList.Add("//www.youtube.com/watch?v=lalalalala","") + //msgList.Add("www.youtube.com/watch?v=lalalalala","") + msgList.Add("#tid-1", "#tid-1") msgList.Add("##tid-1", "##tid-1") msgList.Add("# #tid-1", "# #tid-1") @@ -201,9 +229,13 @@ func TestParser(t *testing.T) { msgList.Add("https://"+url+"/#tid-1", "https://"+url+"/#tid-1") msgList.Add("https://"+url+"/?hi=2", "https://"+url+"/?hi=2") msgList.Add("#fid-1", "#fid-1") + msgList.Add(" #fid-1", " #fid-1") msgList.Add("#fid-0", "[Invalid Forum]") + msgList.Add(" #fid-0", " [Invalid Forum]") msgList.Add("#", "#") msgList.Add("# ", "# ") + msgList.Add(" @", " @") + msgList.Add(" #", " #") msgList.Add("#@", "#@") msgList.Add("#@ ", "#@ ") msgList.Add("#@1", "#@1") @@ -224,6 +256,7 @@ func TestParser(t *testing.T) { msgList.Add("@2 ", "[Invalid Profile] ") msgList.Add("@2 @2", "[Invalid Profile] [Invalid Profile]") msgList.Add("@1", "@Admin") + msgList.Add(" @1", " @Admin") msgList.Add("@1t", "@Admint") msgList.Add("@1 ", "@Admin ") msgList.Add("@1 @1", "@Admin @Admin")