mirror of
https://github.com/axllent/mailpit.git
synced 2026-06-28 06:56:06 +00:00
110 lines
2.7 KiB
Go
110 lines
2.7 KiB
Go
// Package linkcheck handles message links checking
|
|
package linkcheck
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/axllent/mailpit/internal/storage"
|
|
"github.com/axllent/mailpit/internal/tools"
|
|
)
|
|
|
|
var linkRe = regexp.MustCompile(`(?im)\b(http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;]+)`)
|
|
|
|
// RunTests will run all tests on an HTML string
|
|
func RunTests(msg *storage.Message, followRedirects bool) (Response, error) {
|
|
s := Response{}
|
|
|
|
allLinks := extractHTMLLinks(msg)
|
|
allLinks = strUnique(append(allLinks, extractTextLinks(msg)...))
|
|
s.Links = getHTTPStatuses(allLinks, followRedirects)
|
|
|
|
for _, l := range s.Links {
|
|
if l.StatusCode >= 400 || l.StatusCode == 0 {
|
|
s.Errors++
|
|
}
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
func extractTextLinks(msg *storage.Message) []string {
|
|
testLinkRe := regexp.MustCompile(`(?im)([^<]\b)((http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;]+))`)
|
|
// RFC2396 appendix E states angle brackets are recommended for text/plain emails to
|
|
// recognize potential spaces in between the URL
|
|
// @see https://www.rfc-editor.org/rfc/rfc2396#appendix-E
|
|
bracketLinkRe := regexp.MustCompile(`(?im)<((http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;][^>]+))>`)
|
|
|
|
links := []string{}
|
|
|
|
matches := testLinkRe.FindAllStringSubmatch(msg.Text, -1)
|
|
for _, match := range matches {
|
|
if len(match) > 0 {
|
|
links = append(links, match[2])
|
|
}
|
|
}
|
|
|
|
angleMatches := bracketLinkRe.FindAllStringSubmatch(msg.Text, -1)
|
|
for _, match := range angleMatches {
|
|
if len(match) > 0 {
|
|
link := strings.ReplaceAll(match[1], "\n", "")
|
|
links = append(links, link)
|
|
}
|
|
}
|
|
|
|
return links
|
|
}
|
|
|
|
func extractHTMLLinks(msg *storage.Message) []string {
|
|
links := []string{}
|
|
|
|
reader := strings.NewReader(msg.HTML)
|
|
|
|
// Load the HTML document
|
|
doc, err := goquery.NewDocumentFromReader(reader)
|
|
if err != nil {
|
|
return links
|
|
}
|
|
|
|
aLinks := doc.Find("a[href]").Nodes
|
|
for _, link := range aLinks {
|
|
l, err := tools.GetHTMLAttributeVal(link, "href")
|
|
if err == nil && linkRe.MatchString(l) {
|
|
links = append(links, l)
|
|
}
|
|
}
|
|
|
|
cssLinks := doc.Find("link[rel=\"stylesheet\"]").Nodes
|
|
for _, link := range cssLinks {
|
|
l, err := tools.GetHTMLAttributeVal(link, "href")
|
|
if err == nil && linkRe.MatchString(l) {
|
|
links = append(links, l)
|
|
}
|
|
}
|
|
|
|
imgLinks := doc.Find("img[src]").Nodes
|
|
for _, link := range imgLinks {
|
|
l, err := tools.GetHTMLAttributeVal(link, "src")
|
|
if err == nil && linkRe.MatchString(l) {
|
|
links = append(links, l)
|
|
}
|
|
}
|
|
|
|
return links
|
|
}
|
|
|
|
// strUnique return a slice of unique strings from a slice
|
|
func strUnique(strSlice []string) []string {
|
|
keys := make(map[string]bool)
|
|
list := []string{}
|
|
for _, entry := range strSlice {
|
|
if _, value := keys[entry]; !value {
|
|
keys[entry] = true
|
|
list = append(list, entry)
|
|
}
|
|
}
|
|
|
|
return list
|
|
}
|