fix: get title from HTML with monolith

2026-03-03 03:47:02 +00:00 · 2025-10-10 22:48:38 -07:00
parent b21e2c6ffd
commit fbafa3df4e
3 changed files with 26 additions and 14 deletions
--- a/apps/web/lib/api/controllers/links/postLink.ts
+++ b/apps/web/lib/api/controllers/links/postLink.ts
@@ -72,7 +72,7 @@ export default async function postLink(
  }

  const { title = "", headers = new Headers() } = link.url
-    ? await fetchTitleAndHeaders(link.url)
+    ? await fetchTitleAndHeaders(link.url, null)
    : {};

  const name =
--- a/apps/web/lib/shared/fetchTitleAndHeaders.ts
+++ b/apps/web/lib/shared/fetchTitleAndHeaders.ts
@@ -3,7 +3,7 @@ import https from "https";
 import { HttpsProxyAgent } from "https-proxy-agent";
 import { SocksProxyAgent } from "socks-proxy-agent";

-export default async function fetchTitleAndHeaders(url: string) {
+export default async function fetchTitleAndHeaders(url: string, content: string | null) {
  if (!url?.startsWith("http://") && !url?.startsWith("https://"))
    return { title: "", headers: null };

@@ -45,15 +45,17 @@ export default async function fetchTitleAndHeaders(url: string) {
    const response = await Promise.race([responsePromise, timeoutPromise]);

    if ((response as any)?.status) {
-      const text = await (response as any).text();
+      let text: string;

-      // regular expression to find the <title> tag
-      let match = text.match(/<title.*>([^<]*)<\/title>/);
+      if (content) {
+        text = content
+      } else {
+        text = await (response as any).text()
+      }

-      const title = match?.[1] || "";
      const headers = (response as Response)?.headers || null;

-      return { title, headers };
+      return await fetchTitleAndHeadersFromContent(text, headers);
    } else {
      return { title: "", headers: null };
    }
@@ -62,3 +64,12 @@ export default async function fetchTitleAndHeaders(url: string) {
    return { title: "", headers: null };
  }
 }
+
+export async function fetchTitleAndHeadersFromContent(content: string, headers: Headers) {
+  // regular expression to find the <title> tag
+  let match = content.match(/<title.*>([^<]*)<\/title>/);
+
+  const title = match?.[1] || "";
+
+  return { title, headers };
+}
--- a/apps/web/pages/api/v1/archives/index.ts
+++ b/apps/web/pages/api/v1/archives/index.ts
@@ -146,7 +146,14 @@ async function handlePost(req: NextApiRequest, res: NextApiResponse) {
      if (!collection) {
        throw new Error("Collection not found.");
      }
-      const { title = "" } = url ? await fetchTitleAndHeaders(url) : {};
+
+      // Generate a preview if it's an image
+      const { mimetype } = files.file[0];
+      const isPDF = mimetype?.includes("pdf");
+      const isImage = mimetype?.includes("image");
+      const isHTML = mimetype === "text/html";
+
+      const { title = "" } = url ? await fetchTitleAndHeaders(url, isHTML && !isPreview ? fileBuffer.toString("utf-8") : null) : {};

      const link = await prisma.link.create({
        data: {
@@ -165,12 +172,6 @@ async function handlePost(req: NextApiRequest, res: NextApiResponse) {
        },
      });

-      // Generate a preview if it's an image
-      const { mimetype } = files.file[0];
-      const isPDF = mimetype?.includes("pdf");
-      const isImage = mimetype?.includes("image");
-      const isHTML = mimetype === "text/html";
-
      if (isImage) {
        const collectionId = collection.id;
        createFolder({ filePath: `archives/preview/${collectionId}` });