Files
mailpit/internal/linkcheck/main.go

110 lines
2.7 KiB
Go

// Package linkcheck handles message links checking
package linkcheck
import (
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/axllent/mailpit/internal/storage"
"github.com/axllent/mailpit/internal/tools"
)
var linkRe = regexp.MustCompile(`(?im)\b(http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;]+)`)
// RunTests will run all tests on an HTML string
func RunTests(msg *storage.Message, followRedirects bool) (Response, error) {
s := Response{}
allLinks := extractHTMLLinks(msg)
allLinks = strUnique(append(allLinks, extractTextLinks(msg)...))
s.Links = getHTTPStatuses(allLinks, followRedirects)
for _, l := range s.Links {
if l.StatusCode >= 400 || l.StatusCode == 0 {
s.Errors++
}
}
return s, nil
}
func extractTextLinks(msg *storage.Message) []string {
testLinkRe := regexp.MustCompile(`(?im)([^<]\b)((http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;]+))`)
// RFC2396 appendix E states angle brackets are recommended for text/plain emails to
// recognize potential spaces in between the URL
// @see https://www.rfc-editor.org/rfc/rfc2396#appendix-E
bracketLinkRe := regexp.MustCompile(`(?im)<((http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;][^>]+))>`)
links := []string{}
matches := testLinkRe.FindAllStringSubmatch(msg.Text, -1)
for _, match := range matches {
if len(match) > 0 {
links = append(links, match[2])
}
}
angleMatches := bracketLinkRe.FindAllStringSubmatch(msg.Text, -1)
for _, match := range angleMatches {
if len(match) > 0 {
link := strings.ReplaceAll(match[1], "\n", "")
links = append(links, link)
}
}
return links
}
func extractHTMLLinks(msg *storage.Message) []string {
links := []string{}
reader := strings.NewReader(msg.HTML)
// Load the HTML document
doc, err := goquery.NewDocumentFromReader(reader)
if err != nil {
return links
}
aLinks := doc.Find("a[href]").Nodes
for _, link := range aLinks {
l, err := tools.GetHTMLAttributeVal(link, "href")
if err == nil && linkRe.MatchString(l) {
links = append(links, l)
}
}
cssLinks := doc.Find("link[rel=\"stylesheet\"]").Nodes
for _, link := range cssLinks {
l, err := tools.GetHTMLAttributeVal(link, "href")
if err == nil && linkRe.MatchString(l) {
links = append(links, l)
}
}
imgLinks := doc.Find("img[src]").Nodes
for _, link := range imgLinks {
l, err := tools.GetHTMLAttributeVal(link, "src")
if err == nil && linkRe.MatchString(l) {
links = append(links, l)
}
}
return links
}
// strUnique return a slice of unique strings from a slice
func strUnique(strSlice []string) []string {
keys := make(map[string]bool)
list := []string{}
for _, entry := range strSlice {
if _, value := keys[entry]; !value {
keys[entry] = true
list = append(list, entry)
}
}
return list
}