mirror of
https://github.com/C4illin/ConvertX.git
synced 2026-03-03 03:47:02 +00:00
feat: markitdown implementation (#486)
* feat: markitdown implementation * fix: code review and docker file: * fix: add markitdown PATH in container * fix: feedback changes * en: readme changed
This commit is contained in:
@@ -74,9 +74,16 @@ RUN apt-get update && apt-get install -y \
|
||||
texlive-latex-extra \
|
||||
texlive-latex-recommended \
|
||||
texlive-xetex \
|
||||
python3 \
|
||||
python3-pip \
|
||||
pipx \
|
||||
--no-install-recommends \
|
||||
&& pipx install "markitdown[all]" \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Add pipx bin directory to PATH
|
||||
ENV PATH="/root/.local/bin:${PATH}"
|
||||
|
||||
# Install VTracer binary
|
||||
RUN ARCH=$(uname -m) && \
|
||||
if [ "$ARCH" = "aarch64" ]; then \
|
||||
|
||||
@@ -45,6 +45,7 @@ A self-hosted online file converter. Supports over a thousand different formats.
|
||||
| [FFmpeg](https://ffmpeg.org/) | Video | ~472 | ~199 |
|
||||
| [Potrace](https://potrace.sourceforge.net/) | Raster to vector | 4 | 11 |
|
||||
| [VTracer](https://github.com/visioncortex/vtracer) | Raster to vector | 8 | 1 |
|
||||
| [Markitdown](https://github.com/microsoft/markitdown) | Documents | 6 | 1 |
|
||||
|
||||
<!-- many ffmpeg fileformats are duplicates -->
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ import { convert as convertresvg, properties as propertiesresvg } from "./resvg"
|
||||
import { convert as convertImage, properties as propertiesImage } from "./vips";
|
||||
import { convert as convertVtracer, properties as propertiesVtracer } from "./vtracer";
|
||||
import { convert as convertxelatex, properties as propertiesxelatex } from "./xelatex";
|
||||
import { convert as convertMarkitdown, properties as propertiesMarkitdown } from "./markitdown";
|
||||
|
||||
// This should probably be reconstructed so that the functions are not imported instead the functions hook into this to make the converters more modular
|
||||
|
||||
@@ -127,6 +128,10 @@ const properties: Record<
|
||||
properties: propertiesVtracer,
|
||||
converter: convertVtracer,
|
||||
},
|
||||
markitDown: {
|
||||
properties: propertiesMarkitdown,
|
||||
converter: convertMarkitdown,
|
||||
},
|
||||
};
|
||||
|
||||
function chunks<T>(arr: T[], size: number): T[][] {
|
||||
|
||||
39
src/converters/markitdown.ts
Normal file
39
src/converters/markitdown.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { execFile as execFileOriginal } from "node:child_process";
|
||||
import { ExecFileFn } from "./types";
|
||||
|
||||
export const properties = {
|
||||
from: {
|
||||
document: ["pdf", "powerpoint", "excel", "docx", "pptx", "html"],
|
||||
},
|
||||
to: {
|
||||
document: ["md"],
|
||||
},
|
||||
};
|
||||
|
||||
export async function convert(
|
||||
filePath: string,
|
||||
fileType: string,
|
||||
convertTo: string,
|
||||
targetPath: string,
|
||||
options?: unknown,
|
||||
execFile: ExecFileFn = execFileOriginal,
|
||||
): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile("markitdown", [filePath, "-o", targetPath], (err, stdout, stderr) => {
|
||||
if (err) {
|
||||
reject(`markitdown error: ${err}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (stdout) {
|
||||
console.log(`stdout: ${stdout}`);
|
||||
}
|
||||
|
||||
if (stderr) {
|
||||
console.error(`stderr: ${stderr}`);
|
||||
}
|
||||
|
||||
resolve("Done");
|
||||
});
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user