From 52af8d58242091b11c2cfca0c029ecbc62a51178 Mon Sep 17 00:00:00 2001 From: Toni Ros <33701571+ToniRos@users.noreply.github.com> Date: Thu, 29 Jan 2026 17:20:12 +0100 Subject: [PATCH] PDF to DOCX using LibreOffice, fixes #425 (#510) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix to issue #425 * Fix to Fix error in previous fix, and adapt tests * Fix to Fix error in previous fix, and adapt tests plus prettier * Update tests/converters/libreoffice.test.ts Thanks Co-authored-by: Emrik Östling * Update src/converters/libreoffice.ts Thanks Co-authored-by: Emrik Östling * Update src/converters/libreoffice.ts Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> --------- Co-authored-by: Emrik Östling Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> --- src/converters/libreoffice.ts | 8 +++++--- tests/converters/libreoffice.test.ts | 13 ++++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/converters/libreoffice.ts b/src/converters/libreoffice.ts index 02f389a..1fd6488 100644 --- a/src/converters/libreoffice.ts +++ b/src/converters/libreoffice.ts @@ -102,7 +102,7 @@ const filters: Record> = { odt: "writer8", ott: "writer8_template", pages: "Apple Pages", - // pdf: "writer_pdf_import", + pdf: "writer_pdf_import", psw: "PocketWord File", rtf: "Rich Text Format", sdw: "StarOffice_Writer", @@ -124,7 +124,9 @@ const filters: Record> = { }; const getFilters = (fileType: string, converto: string) => { - if (fileType in filters.text && converto in filters.text) { + if (converto === "pdf") { + return [null, null]; + } else if (fileType in filters.text && converto in filters.text) { return [filters.text[fileType], filters.text[converto]]; } else if (fileType in filters.calc && converto in filters.calc) { return [filters.calc[fileType], filters.calc[converto]]; @@ -148,7 +150,7 @@ export function convert( const [inFilter, outFilter] = getFilters(fileType, convertTo); if (inFilter) { - args.push(`--infilter="${inFilter}"`); + args.push(`--infilter=${inFilter}`); } if (outFilter) { diff --git a/tests/converters/libreoffice.test.ts b/tests/converters/libreoffice.test.ts index 4d07f1e..5d34d45 100644 --- a/tests/converters/libreoffice.test.ts +++ b/tests/converters/libreoffice.test.ts @@ -63,7 +63,7 @@ test("invokes soffice with --headless and outdir derived from targetPath", async expect(cmd).toBe("soffice"); expect(args).toEqual([ "--headless", - `--infilter="MS Word 2007 XML"`, + "--infilter=MS Word 2007 XML", "--convert-to", "odt:writer8", "--outdir", @@ -77,8 +77,15 @@ test("uses only outFilter when input has no filter (e.g., pdf -> txt)", async () const { args } = requireDefined(calls[0], "Expected at least one execFile call"); - expect(args).not.toContainEqual(expect.stringMatching(/^--infilter=/)); - expect(args).toEqual(["--headless", "--convert-to", "txt", "--outdir", "out", "in.pdf"]); + expect(args).toEqual([ + "--headless", + "--infilter=writer_pdf_import", + "--convert-to", + "txt:Text", + "--outdir", + "out", + "in.pdf", + ]); }); test("uses only infilter when convertTo has no out filter (e.g., docx -> pdf)", async () => {