fix the url parser.

improve the url validator.
add a few more url parser test cases.
This commit is contained in:
Azareal 2019-10-13 17:32:05 +10:00
parent 38bbdfe0c2
commit 6147db18dd
2 changed files with 74 additions and 37 deletions

View File

@ -3,6 +3,7 @@ package common
import ( import (
"bytes" "bytes"
//"fmt" //"fmt"
//"log"
"encoding/json" "encoding/json"
"io/ioutil" "io/ioutil"
"net/url" "net/url"
@ -650,7 +651,7 @@ func ParseMessage(msg string, sectionID int, sectionType string /*, user User*/)
} else { } else {
sb.Write(URLOpenUser) sb.Write(URLOpenUser)
} }
sb.WriteString(msg[i : i+urlLen]) sb.WriteString(media.URL)
sb.Write(URLOpen2) sb.Write(URLOpen2)
sb.WriteString(media.FURL) sb.WriteString(media.FURL)
sb.Write(URLClose) sb.Write(URLClose)
@ -852,15 +853,20 @@ func parseMediaString(data string) (media MediaEmbed, ok bool) {
if !validateURLString(data) { if !validateURLString(data) {
return media, false return media, false
} }
url, err := url.Parse(data) uurl, err := url.Parse(data)
if err != nil { if err != nil {
return media, false return media, false
} }
hostname := url.Hostname() hostname := uurl.Hostname()
scheme := url.Scheme scheme := uurl.Scheme
port := url.Port() port := uurl.Port()
query := url.Query() query, err := url.ParseQuery(uurl.RawQuery)
if err != nil {
return media, false
}
//log.Print("hostname:",hostname)
//log.Print("Site.URL:",Site.URL)
samesite := hostname == "localhost" || hostname == "127.0.0.1" || hostname == "::1" || hostname == Site.URL samesite := hostname == "localhost" || hostname == "127.0.0.1" || hostname == "::1" || hostname == Site.URL
if samesite { if samesite {
@ -871,12 +877,12 @@ func parseMediaString(data string) (media MediaEmbed, ok bool) {
scheme = "https" scheme = "https"
} }
} }
if scheme == "" { if scheme != "" {
scheme = "http" scheme += ":"
} }
media.Trusted = samesite media.Trusted = samesite
path := url.EscapedPath() path := uurl.EscapedPath()
pathFrags := strings.Split(path, "/") pathFrags := strings.Split(path, "/")
if len(pathFrags) >= 2 { if len(pathFrags) >= 2 {
if samesite && pathFrags[1] == "attachs" && (scheme == "http" || scheme == "https") { if samesite && pathFrags[1] == "attachs" && (scheme == "http" || scheme == "https") {
@ -885,7 +891,7 @@ func parseMediaString(data string) (media MediaEmbed, ok bool) {
if port != "443" && port != "80" && port != "" { if port != "443" && port != "80" && port != "" {
sport = ":" + port sport = ":" + port
} }
media.URL = scheme + "://" + hostname + sport + path media.URL = scheme + "//" + hostname + sport + path
extarr := strings.Split(path, ".") extarr := strings.Split(path, ".")
if len(extarr) == 0 { if len(extarr) == 0 {
// TODO: Write a unit test for this // TODO: Write a unit test for this
@ -923,7 +929,7 @@ func parseMediaString(data string) (media MediaEmbed, ok bool) {
if port != "443" && port != "80" && port != "" { if port != "443" && port != "80" && port != "" {
sport = ":" + port sport = ":" + port
} }
media.URL = scheme + "://" + hostname + sport + path media.URL = scheme + "//" + hostname + sport + path
return media, true return media, true
} }
} }
@ -933,7 +939,16 @@ func parseMediaString(data string) (media MediaEmbed, ok bool) {
if port != "443" && port != "80" && port != "" { if port != "443" && port != "80" && port != "" {
sport = ":" + port sport = ":" + port
} }
media.FURL = hostname + sport + path var q string
if len(uurl.RawQuery) > 0 {
q = "?" + uurl.RawQuery
}
var frag string
if len(uurl.Fragment) > 0 {
frag = "#" + uurl.Fragment
}
media.URL = scheme + "//" + hostname + sport + path + q + frag
media.FURL = hostname + sport + path + q + frag
return media, true return media, true
} }
@ -943,7 +958,6 @@ func CoerceIntString(data string) (res int, length int) {
if !(data[0] > 47 && data[0] < 58) { if !(data[0] > 47 && data[0] < 58) {
return 0, 1 return 0, 1
} }
i := 0 i := 0
for ; len(data) > i; i++ { for ; len(data) > i; i++ {
if !(data[i] > 47 && data[i] < 58) { if !(data[i] > 47 && data[i] < 58) {

View File

@ -122,8 +122,7 @@ func TestPreparser(t *testing.T) {
// TODO: Do a test with invalid UTF-8 input // TODO: Do a test with invalid UTF-8 input
for _, item := range l.Items { for _, item := range l.Items {
res := c.PreparseMessage(item.Msg) if res := c.PreparseMessage(item.Msg); res != item.Expects {
if res != item.Expects {
if item.Name != "" { if item.Name != "" {
t.Error("Name: ", item.Name) t.Error("Name: ", item.Name)
} }
@ -143,7 +142,7 @@ func TestParser(t *testing.T) {
l := &METriList{nil} l := &METriList{nil}
url := "github.com/Azareal/Gosora" url := "github.com/Azareal/Gosora"
eurl := "<a rel='ugc' href='//" + url + "'>//" + url + "</a>" eurl := "<a rel='ugc' href='//" + url + "'>" + url + "</a>"
l.Add("", "") l.Add("", "")
l.Add("haha", "haha") l.Add("haha", "haha")
l.Add("<b>t</b>", "<b>t</b>") l.Add("<b>t</b>", "<b>t</b>")
@ -185,31 +184,36 @@ func TestParser(t *testing.T) {
l.Add("ss", "ss") l.Add("ss", "ss")
l.Add("haha\nhaha\nhaha", "haha<br>haha<br>haha") l.Add("haha\nhaha\nhaha", "haha<br>haha<br>haha")
l.Add("//"+url, eurl) l.Add("//"+url, eurl)
l.Add("//a", "<a rel='ugc' href='//a'>//a</a>") l.Add("//a", "<a rel='ugc' href='//a'>a</a>")
l.Add(" //a", " <a rel='ugc' href='//a'>//a</a>") l.Add(" //a", " <a rel='ugc' href='//a'>a</a>")
l.Add("//a ", "<a rel='ugc' href='//a'>//a</a> ") l.Add("//a ", "<a rel='ugc' href='//a'>a</a> ")
l.Add(" //a ", " <a rel='ugc' href='//a'>//a</a> ") l.Add(" //a ", " <a rel='ugc' href='//a'>a</a> ")
l.Add("d //a ", "d <a rel='ugc' href='//a'>//a</a> ") l.Add("d //a ", "d <a rel='ugc' href='//a'>a</a> ")
l.Add("ddd ddd //a ", "ddd ddd <a rel='ugc' href='//a'>//a</a> ") l.Add("ddd ddd //a ", "ddd ddd <a rel='ugc' href='//a'>a</a> ")
l.Add("https://"+url, "<a rel='ugc' href='https://"+url+"'>https://"+url+"</a>") l.Add("https://"+url, "<a rel='ugc' href='https://"+url+"'>"+url+"</a>")
l.Add("https://t", "<a rel='ugc' href='https://t'>https://t</a>") l.Add("https://t", "<a rel='ugc' href='https://t'>t</a>")
l.Add("http://"+url, "<a rel='ugc' href='http://"+url+"'>http://"+url+"</a>") l.Add("http://"+url, "<a rel='ugc' href='http://"+url+"'>"+url+"</a>")
l.Add("#http://"+url, "#http://"+url) l.Add("#http://"+url, "#http://"+url)
l.Add("@http://"+url, "<red>[Invalid Profile]</red>ttp://"+url) l.Add("@http://"+url, "<red>[Invalid Profile]</red>ttp://"+url)
l.Add("//"+url+"\n", "<a rel='ugc' href='//"+url+"'>//"+url+"</a><br>") l.Add("//"+url+"\n", "<a rel='ugc' href='//"+url+"'>"+url+"</a><br>")
l.Add("\n//"+url, "<br>"+eurl) l.Add("\n//"+url, "<br>"+eurl)
l.Add("\n//"+url+"\n", "<br>"+eurl+"<br>") l.Add("\n//"+url+"\n", "<br>"+eurl+"<br>")
l.Add("\n//"+url+"\n\n", "<br>"+eurl+"<br><br>") l.Add("\n//"+url+"\n\n", "<br>"+eurl+"<br><br>")
l.Add("//"+url+"\n//"+url, eurl+"<br>"+eurl) l.Add("//"+url+"\n//"+url, eurl+"<br>"+eurl)
l.Add("//"+url+"\n\n//"+url, eurl+"<br><br>"+eurl) l.Add("//"+url+"\n\n//"+url, eurl+"<br><br>"+eurl)
l.Add("//"+c.Site.URL, "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a>")
l.Add("//"+c.Site.URL+"\n", "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a><br>")
l.Add("//"+c.Site.URL+"\n//"+c.Site.URL, "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a><br><a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a>")
local := func(url string) { local := func(u string) {
l.Add("//"+url, "<a href='//"+url+"'>//"+url+"</a>") s := "//" + c.Site.URL
l.Add("//"+url+"\n", "<a href='//"+url+"'>//"+url+"</a><br>") fs := "http://" + c.Site.URL
l.Add("//"+url+"\n//"+url, "<a href='//"+url+"'>//"+url+"</a><br><a href='//"+url+"'>//"+url+"</a>") if c.Site.EnableSsl {
s = "https:" + s
fs = "https://" + c.Site.URL
}
l.Add("//"+u, "<a href='"+s+"'>"+c.Site.URL+"</a>")
l.Add("//"+u+"\n", "<a href='"+s+"'>"+c.Site.URL+"</a><br>")
l.Add("//"+u+"\n//"+u, "<a href='"+s+"'>"+c.Site.URL+"</a><br><a href='"+s+"'>"+c.Site.URL+"</a>")
l.Add("http://"+u, "<a href='"+fs+"'>"+c.Site.URL+"</a>")
l.Add("https://"+u, "<a href='"+fs+"'>"+c.Site.URL+"</a>")
} }
local("localhost") local("localhost")
local("127.0.0.1") local("127.0.0.1")
@ -243,8 +247,9 @@ func TestParser(t *testing.T) {
l.Add("@ #tid-@", "<red>[Invalid Profile]</red>#tid-@") l.Add("@ #tid-@", "<red>[Invalid Profile]</red>#tid-@")
l.Add("#tid-1 #tid-1", "<a href='/topic/1'>#tid-1</a> <a href='/topic/1'>#tid-1</a>") l.Add("#tid-1 #tid-1", "<a href='/topic/1'>#tid-1</a> <a href='/topic/1'>#tid-1</a>")
l.Add("#tid-0", "<red>[Invalid Topic]</red>") l.Add("#tid-0", "<red>[Invalid Topic]</red>")
l.Add("https://"+url+"/#tid-1", "<a rel='ugc' href='https://"+url+"/#tid-1'>https://"+url+"/#tid-1</a>") l.Add("https://"+url+"/#tid-1", "<a rel='ugc' href='https://"+url+"/#tid-1'>"+url+"/#tid-1</a>")
l.Add("https://"+url+"/?hi=2", "<a rel='ugc' href='https://"+url+"/?hi=2'>https://"+url+"/?hi=2</a>") l.Add("https://"+url+"/?hi=2", "<a rel='ugc' href='https://"+url+"/?hi=2'>"+url+"/?hi=2</a>")
l.Add("https://"+url+"/?hi=2#t=1", "<a rel='ugc' href='https://"+url+"/?hi=2#t=1'>"+url+"/?hi=2#t=1</a>")
l.Add("#fid-1", "<a href='/forum/1'>#fid-1</a>") l.Add("#fid-1", "<a href='/forum/1'>#fid-1</a>")
l.Add(" #fid-1", " <a href='/forum/1'>#fid-1</a>") l.Add(" #fid-1", " <a href='/forum/1'>#fid-1</a>")
l.Add("#fid-0", "<red>[Invalid Forum]</red>") l.Add("#fid-0", "<red>[Invalid Forum]</red>")
@ -283,8 +288,7 @@ func TestParser(t *testing.T) {
l.Add("@-1", "<red>[Invalid Profile]</red>1") l.Add("@-1", "<red>[Invalid Profile]</red>1")
for _, item := range l.Items { for _, item := range l.Items {
res := c.ParseMessage(item.Msg, 1, "forums") if res := c.ParseMessage(item.Msg, 1, "forums"); res != item.Expects {
if res != item.Expects {
if item.Name != "" { if item.Name != "" {
t.Error("Name: ", item.Name) t.Error("Name: ", item.Name)
} }
@ -295,6 +299,25 @@ func TestParser(t *testing.T) {
} }
} }
l = &METriList{nil}
pre := c.Site.URL // Just in case this is localhost...
c.Site.URL = "example.com"
l.Add("//"+c.Site.URL, "<a href='https://"+c.Site.URL+"'>"+c.Site.URL+"</a>")
l.Add("//"+c.Site.URL+"\n", "<a href='https://"+c.Site.URL+"'>"+c.Site.URL+"</a><br>")
l.Add("//"+c.Site.URL+"\n//"+c.Site.URL, "<a href='https://"+c.Site.URL+"'>"+c.Site.URL+"</a><br><a href='https://"+c.Site.URL+"'>"+c.Site.URL+"</a>")
for _, item := range l.Items {
if res := c.ParseMessage(item.Msg, 1, "forums"); res != item.Expects {
if item.Name != "" {
t.Error("Name: ", item.Name)
}
t.Error("Testing string '" + item.Msg + "'")
t.Error("Bad output:", "'"+res+"'")
t.Error("Expected:", "'"+item.Expects+"'")
break
}
}
c.Site.URL = pre
c.AddHashLinkType("nnid-", func(sb *strings.Builder, msg string, i *int) { c.AddHashLinkType("nnid-", func(sb *strings.Builder, msg string, i *int) {
tid, intLen := c.CoerceIntString(msg[*i:]) tid, intLen := c.CoerceIntString(msg[*i:])
*i += intLen *i += intLen