From 1ca24c58e5a487bd4629763a97ed3732a4f2dbdb Mon Sep 17 00:00:00 2001 From: Chris W Date: Thu, 12 Oct 2023 13:38:30 -0600 Subject: [PATCH] improve language detection somewhat --- Cargo.lock | 199 +++++++++++++++++++++++++++++++--- Cargo.toml | 5 +- Dockerfile | 5 +- docker/download_nerd_fonts.sh | 13 ++- src/config.rs | 47 ++++---- src/main.rs | 2 +- 6 files changed, 228 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9b60c57..c5255f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -268,7 +268,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -363,6 +363,16 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bstr" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c79ad7fb2dd38f3dabd76b09c6a5a20c038fc0213ef1e9afd30eb777f120f019" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.14.0" @@ -1031,6 +1041,19 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +[[package]] +name = "globset" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d" +dependencies = [ + "aho-corasick", + "bstr", + "fnv", + "log", + "regex", +] + [[package]] name = "h2" version = "0.3.21" @@ -1097,6 +1120,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + [[package]] name = "http" version = "0.2.9" @@ -1174,6 +1203,26 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "hyperpolyglot" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da03ba9199e5f86b1578b2bd0ce19c25d44e153b8305a0a54da6d8fa0b66360d" +dependencies = [ + "clap", + "ignore", + "lazy_static", + "num_cpus", + "pcre2", + "phf", + "phf_codegen", + "polyglot_tokenizer", + "regex", + "serde", + "serde_yaml", + "termcolor", +] + [[package]] name = "idna" version = "0.4.0" @@ -1184,6 +1233,23 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "ignore" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492" +dependencies = [ + "globset", + "lazy_static", + "log", + "memchr", + "regex", + "same-file", + "thread_local", + "walkdir", + "winapi-util", +] + [[package]] name = "image" version = "0.24.7" @@ -1238,6 +1304,7 @@ dependencies = [ "actix-web", "anyhow", "font-kit", + "hyperpolyglot", "image", "lazy_static", "reqwest", @@ -1245,8 +1312,8 @@ dependencies = [ "silicon", "structopt", "syntect", + "tempfile", "thiserror", - "umami_metrics", ] [[package]] @@ -1556,6 +1623,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.3", + "libc", +] + [[package]] name = "objc" version = "0.2.7" @@ -1746,12 +1823,72 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "pcre2" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9deb1d02d6a373ee392128ba86087352a986359f32a106e2e3b08cc90cc659c9" +dependencies = [ + "libc", + "log", + "pcre2-sys", +] + +[[package]] +name = "pcre2-sys" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae234f441970dbd52d4e29bee70f3b56ca83040081cb2b55b7df772b16e0b06e" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "percent-encoding" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared", + "rand 0.7.3", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -1797,6 +1934,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "polyglot_tokenizer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6091586d3c58239b154276ca7d7a14035605b829a27b92dbe10625e78ef909d" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -1874,6 +2017,7 @@ dependencies = [ "rand_chacha 0.2.2", "rand_core 0.5.1", "rand_hc", + "rand_pcg", ] [[package]] @@ -1943,6 +2087,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -2220,6 +2373,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b" +dependencies = [ + "indexmap", + "ryu", + "serde", + "yaml-rust", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2249,7 +2414,7 @@ dependencies = [ [[package]] name = "silicon" version = "0.5.1" -source = "git+https://github.com/Aloxaf/silicon.git#cf3668c9ee43ebdae608db3f7b3449c588b8411f" +source = "git+https://github.com/watzon/silicon.git#8f8eded55f2725a0a92f683dbd393f558de718b1" dependencies = [ "anyhow", "clipboard", @@ -2291,6 +2456,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -2497,6 +2668,16 @@ dependencies = [ "syn 2.0.38", ] +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tiff" version = "0.9.0" @@ -2637,18 +2818,6 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "umami_metrics" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc9ec451bb0504e32cafb076fe46e0126c70ad167846e3de02f0a2bbebc6839" -dependencies = [ - "anyhow", - "reqwest", - "serde", - "serde_json", -] - [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/Cargo.toml b/Cargo.toml index 94bf932..64dc5dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [dependencies] actix-web = "4" -silicon = { git = "https://github.com/Aloxaf/silicon.git" } +silicon = { git = "https://github.com/watzon/silicon.git" } lazy_static = "1.4.0" serde = { version = "1.0.130", features = ["derive"] } structopt = "0.3.26" @@ -17,4 +17,5 @@ thiserror = "1.0.49" syntect = "5.1.0" font-kit = "0.11.0" reqwest = "0.11.22" -umami_metrics = "0.1.0" \ No newline at end of file +hyperpolyglot = "0.1.7" +tempfile = "3.8.0" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 195d62c..b49034b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,9 +28,6 @@ RUN ls -la RUN chmod +x download_nerd_fonts.sh RUN bash ./download_nerd_fonts.sh -RUN mkdir -p /usr/share/fonts/truetype -RUN mv *.ttf /usr/share/fonts/truetype - FROM debian:buster-slim # Install dependencies @@ -41,7 +38,7 @@ RUN apt-get update && apt-get install -y \ fontconfig # Copy fonts -COPY --from=fonts /usr/share/fonts/truetype /usr/share/fonts/truetype/ +COPY --from=fonts /data/fonts/nerd_fonts/* /usr/share/fonts/truetype/ RUN fc-cache -fv # Copy binary diff --git a/docker/download_nerd_fonts.sh b/docker/download_nerd_fonts.sh index db81462..08ed057 100755 --- a/docker/download_nerd_fonts.sh +++ b/docker/download_nerd_fonts.sh @@ -59,10 +59,21 @@ all_nerd_fonts=( "VictorMono" ) +mkdir -p ./nerd_fonts + # Download each font, un-tar it, and install it for font in "${all_nerd_fonts[@]}"; do echo "Downloading $font..." wget "https://github.com/ryanoasis/nerd-fonts/releases/download/v3.0.2/$font.tar.xz" - tar -xf "./$font.tar.xz" + + mkdir -p "./$font" + tar -xf "./$font.tar.xz" -C "./$font" rm "$font.tar.xz" + + # Remove fonts contining "NerdFontMono" and "NerdFontPropo" in the name + rm "./$font/"*NerdFontMono* + rm "./$font/"*NerdFontProp* + + # Move the font directory to the nerd_fonts directory + mv "./$font" ./nerd_fonts done \ No newline at end of file diff --git a/src/config.rs b/src/config.rs index 98b10d3..ca4dd2e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,17 +1,17 @@ +use anyhow::Error; use silicon::formatter::{ImageFormatter, ImageFormatterBuilder}; use silicon::utils::{Background, ShadowAdder}; +use std::io::Write; use std::path::PathBuf; -use anyhow::Error; use syntect::highlighting::{Theme, ThemeSet}; use syntect::parsing::{SyntaxReference, SyntaxSet}; -use crate::rgba::{Rgba, ImageRgba}; +use crate::rgba::{ImageRgba, Rgba}; type FontList = Vec<(String, f32)>; type Lines = Vec; -#[derive(Debug, Clone)] -#[derive(serde::Deserialize)] +#[derive(Debug, Clone, serde::Deserialize)] pub struct Config { /// Background image URL pub background_image: Option>, @@ -71,7 +71,7 @@ pub struct Config { pub tab_width: u8, /// The syntax highlight theme. It can be a theme name or path to a .tmTheme file. - pub theme: String + pub theme: String, } impl Config { @@ -96,20 +96,29 @@ impl Config { shadow_offset_y: 0, shadow_offset_x: 0, tab_width: 4, - theme: "Dracula".to_owned() + theme: "Dracula".to_owned(), } } pub fn language<'a>(&self, ps: &'a SyntaxSet) -> Result<&'a SyntaxReference, Error> { - let possible_language = self.language.as_ref().map(|language| { - ps.find_syntax_by_token(language) - .ok_or_else(|| format_err!("Unable to determine language, please provide one explicitly")) - }); - - let language = possible_language.unwrap_or_else(|| { - ps.find_syntax_by_first_line(self.code.as_ref()) - .ok_or_else(|| format_err!("Unable to determine language, please provide one explicitly")) - })?; + let language = match &self.language { + Some(language) => ps + .find_syntax_by_token(language) + .ok_or_else(|| Error::msg(format!("Invalid language: {}", language)))?, + None => { + let first_line = self.code.lines().next().unwrap_or_default(); + ps.find_syntax_by_first_line(first_line).unwrap_or_else(|| { + // hyperpolyglot requires a file, so we need to create a temp file + let mut temp_file = tempfile::NamedTempFile::new().unwrap(); + write!(temp_file, "{}", self.code).unwrap(); + let language = hyperpolyglot::detect(temp_file.path()).unwrap(); + match language { + Some(language) => ps.find_syntax_by_token(language.language()).unwrap(), + None => ps.find_syntax_by_token("log").unwrap(), + } + }) + }, + }; Ok(language) } @@ -123,7 +132,6 @@ impl Config { } } - pub fn get_formatter(&self) -> Result { let formatter = ImageFormatterBuilder::new() .line_pad(self.line_pad) @@ -157,8 +165,7 @@ impl Config { /// Query parameters for the /generate endpoint, using Option to make all options /// with defaults optional. -#[derive(Debug, Clone)] -#[derive(serde::Deserialize)] +#[derive(Debug, Clone, serde::Deserialize)] pub struct ConfigQuery { /// Background image URL pub background_image: Option, @@ -218,5 +225,5 @@ pub struct ConfigQuery { pub tab_width: Option, /// The syntax highlight theme. It can be a theme name or path to a .tmTheme file. - pub theme: Option -} \ No newline at end of file + pub theme: Option, +} diff --git a/src/main.rs b/src/main.rs index 6e006cf..87707b4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -85,7 +85,7 @@ async fn help() -> impl Responder { "code": "The code to generate an image from. Required.", "language": "The language to use for syntax highlighting. Optional, will attempt to guess if not provided.", "theme": "The theme to use for syntax highlighting. Optional, defaults to Dracula.", - "font": "The font to use. Optional, defaults to Fira Code.", + "font": "The font to use. Optional.", "shadow_color": "The color of the shadow. Optional, defaults to transparent.", "background": "The background color. Optional, defaults to transparent.", "tab_width": "The tab width. Optional, defaults to 4.",