improve language detection somewhat

This commit is contained in:
Chris W 2023-10-12 13:38:30 -06:00
parent 08c78fff68
commit 1ca24c58e5
6 changed files with 228 additions and 43 deletions

199
Cargo.lock generated
View File

@ -268,7 +268,7 @@ version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [ dependencies = [
"hermit-abi", "hermit-abi 0.1.19",
"libc", "libc",
"winapi", "winapi",
] ]
@ -363,6 +363,16 @@ dependencies = [
"alloc-stdlib", "alloc-stdlib",
] ]
[[package]]
name = "bstr"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c79ad7fb2dd38f3dabd76b09c6a5a20c038fc0213ef1e9afd30eb777f120f019"
dependencies = [
"memchr",
"serde",
]
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.14.0" version = "3.14.0"
@ -1031,6 +1041,19 @@ version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
[[package]]
name = "globset"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d"
dependencies = [
"aho-corasick",
"bstr",
"fnv",
"log",
"regex",
]
[[package]] [[package]]
name = "h2" name = "h2"
version = "0.3.21" version = "0.3.21"
@ -1097,6 +1120,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "hermit-abi"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
[[package]] [[package]]
name = "http" name = "http"
version = "0.2.9" version = "0.2.9"
@ -1174,6 +1203,26 @@ dependencies = [
"tokio-native-tls", "tokio-native-tls",
] ]
[[package]]
name = "hyperpolyglot"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da03ba9199e5f86b1578b2bd0ce19c25d44e153b8305a0a54da6d8fa0b66360d"
dependencies = [
"clap",
"ignore",
"lazy_static",
"num_cpus",
"pcre2",
"phf",
"phf_codegen",
"polyglot_tokenizer",
"regex",
"serde",
"serde_yaml",
"termcolor",
]
[[package]] [[package]]
name = "idna" name = "idna"
version = "0.4.0" version = "0.4.0"
@ -1184,6 +1233,23 @@ dependencies = [
"unicode-normalization", "unicode-normalization",
] ]
[[package]]
name = "ignore"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492"
dependencies = [
"globset",
"lazy_static",
"log",
"memchr",
"regex",
"same-file",
"thread_local",
"walkdir",
"winapi-util",
]
[[package]] [[package]]
name = "image" name = "image"
version = "0.24.7" version = "0.24.7"
@ -1238,6 +1304,7 @@ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
"font-kit", "font-kit",
"hyperpolyglot",
"image", "image",
"lazy_static", "lazy_static",
"reqwest", "reqwest",
@ -1245,8 +1312,8 @@ dependencies = [
"silicon", "silicon",
"structopt", "structopt",
"syntect", "syntect",
"tempfile",
"thiserror", "thiserror",
"umami_metrics",
] ]
[[package]] [[package]]
@ -1556,6 +1623,16 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi 0.3.3",
"libc",
]
[[package]] [[package]]
name = "objc" name = "objc"
version = "0.2.7" version = "0.2.7"
@ -1746,12 +1823,72 @@ dependencies = [
"rustc_version", "rustc_version",
] ]
[[package]]
name = "pcre2"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9deb1d02d6a373ee392128ba86087352a986359f32a106e2e3b08cc90cc659c9"
dependencies = [
"libc",
"log",
"pcre2-sys",
]
[[package]]
name = "pcre2-sys"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae234f441970dbd52d4e29bee70f3b56ca83040081cb2b55b7df772b16e0b06e"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.3.0" version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]]
name = "phf"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
dependencies = [
"phf_shared",
"rand 0.7.3",
]
[[package]]
name = "phf_shared"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
dependencies = [
"siphasher",
]
[[package]] [[package]]
name = "pin-project-lite" name = "pin-project-lite"
version = "0.2.13" version = "0.2.13"
@ -1797,6 +1934,12 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "polyglot_tokenizer"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6091586d3c58239b154276ca7d7a14035605b829a27b92dbe10625e78ef909d"
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.17" version = "0.2.17"
@ -1874,6 +2017,7 @@ dependencies = [
"rand_chacha 0.2.2", "rand_chacha 0.2.2",
"rand_core 0.5.1", "rand_core 0.5.1",
"rand_hc", "rand_hc",
"rand_pcg",
] ]
[[package]] [[package]]
@ -1943,6 +2087,15 @@ dependencies = [
"rand_core 0.5.1", "rand_core 0.5.1",
] ]
[[package]]
name = "rand_pcg"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
dependencies = [
"rand_core 0.5.1",
]
[[package]] [[package]]
name = "rawpointer" name = "rawpointer"
version = "0.2.1" version = "0.2.1"
@ -2220,6 +2373,18 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "serde_yaml"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b"
dependencies = [
"indexmap",
"ryu",
"serde",
"yaml-rust",
]
[[package]] [[package]]
name = "sha1" name = "sha1"
version = "0.10.6" version = "0.10.6"
@ -2249,7 +2414,7 @@ dependencies = [
[[package]] [[package]]
name = "silicon" name = "silicon"
version = "0.5.1" version = "0.5.1"
source = "git+https://github.com/Aloxaf/silicon.git#cf3668c9ee43ebdae608db3f7b3449c588b8411f" source = "git+https://github.com/watzon/silicon.git#8f8eded55f2725a0a92f683dbd393f558de718b1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clipboard", "clipboard",
@ -2291,6 +2456,12 @@ version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]] [[package]]
name = "slab" name = "slab"
version = "0.4.9" version = "0.4.9"
@ -2497,6 +2668,16 @@ dependencies = [
"syn 2.0.38", "syn 2.0.38",
] ]
[[package]]
name = "thread_local"
version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]] [[package]]
name = "tiff" name = "tiff"
version = "0.9.0" version = "0.9.0"
@ -2637,18 +2818,6 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "umami_metrics"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbc9ec451bb0504e32cafb076fe46e0126c70ad167846e3de02f0a2bbebc6839"
dependencies = [
"anyhow",
"reqwest",
"serde",
"serde_json",
]
[[package]] [[package]]
name = "unicode-bidi" name = "unicode-bidi"
version = "0.3.13" version = "0.3.13"

View File

@ -7,7 +7,7 @@ edition = "2021"
[dependencies] [dependencies]
actix-web = "4" actix-web = "4"
silicon = { git = "https://github.com/Aloxaf/silicon.git" } silicon = { git = "https://github.com/watzon/silicon.git" }
lazy_static = "1.4.0" lazy_static = "1.4.0"
serde = { version = "1.0.130", features = ["derive"] } serde = { version = "1.0.130", features = ["derive"] }
structopt = "0.3.26" structopt = "0.3.26"
@ -17,4 +17,5 @@ thiserror = "1.0.49"
syntect = "5.1.0" syntect = "5.1.0"
font-kit = "0.11.0" font-kit = "0.11.0"
reqwest = "0.11.22" reqwest = "0.11.22"
umami_metrics = "0.1.0" hyperpolyglot = "0.1.7"
tempfile = "3.8.0"

View File

@ -28,9 +28,6 @@ RUN ls -la
RUN chmod +x download_nerd_fonts.sh RUN chmod +x download_nerd_fonts.sh
RUN bash ./download_nerd_fonts.sh RUN bash ./download_nerd_fonts.sh
RUN mkdir -p /usr/share/fonts/truetype
RUN mv *.ttf /usr/share/fonts/truetype
FROM debian:buster-slim FROM debian:buster-slim
# Install dependencies # Install dependencies
@ -41,7 +38,7 @@ RUN apt-get update && apt-get install -y \
fontconfig fontconfig
# Copy fonts # Copy fonts
COPY --from=fonts /usr/share/fonts/truetype /usr/share/fonts/truetype/ COPY --from=fonts /data/fonts/nerd_fonts/* /usr/share/fonts/truetype/
RUN fc-cache -fv RUN fc-cache -fv
# Copy binary # Copy binary

View File

@ -59,10 +59,21 @@ all_nerd_fonts=(
"VictorMono" "VictorMono"
) )
mkdir -p ./nerd_fonts
# Download each font, un-tar it, and install it # Download each font, un-tar it, and install it
for font in "${all_nerd_fonts[@]}"; do for font in "${all_nerd_fonts[@]}"; do
echo "Downloading $font..." echo "Downloading $font..."
wget "https://github.com/ryanoasis/nerd-fonts/releases/download/v3.0.2/$font.tar.xz" wget "https://github.com/ryanoasis/nerd-fonts/releases/download/v3.0.2/$font.tar.xz"
tar -xf "./$font.tar.xz"
mkdir -p "./$font"
tar -xf "./$font.tar.xz" -C "./$font"
rm "$font.tar.xz" rm "$font.tar.xz"
# Remove fonts contining "NerdFontMono" and "NerdFontPropo" in the name
rm "./$font/"*NerdFontMono*
rm "./$font/"*NerdFontProp*
# Move the font directory to the nerd_fonts directory
mv "./$font" ./nerd_fonts
done done

View File

@ -1,17 +1,17 @@
use anyhow::Error;
use silicon::formatter::{ImageFormatter, ImageFormatterBuilder}; use silicon::formatter::{ImageFormatter, ImageFormatterBuilder};
use silicon::utils::{Background, ShadowAdder}; use silicon::utils::{Background, ShadowAdder};
use std::io::Write;
use std::path::PathBuf; use std::path::PathBuf;
use anyhow::Error;
use syntect::highlighting::{Theme, ThemeSet}; use syntect::highlighting::{Theme, ThemeSet};
use syntect::parsing::{SyntaxReference, SyntaxSet}; use syntect::parsing::{SyntaxReference, SyntaxSet};
use crate::rgba::{Rgba, ImageRgba}; use crate::rgba::{ImageRgba, Rgba};
type FontList = Vec<(String, f32)>; type FontList = Vec<(String, f32)>;
type Lines = Vec<u32>; type Lines = Vec<u32>;
#[derive(Debug, Clone)] #[derive(Debug, Clone, serde::Deserialize)]
#[derive(serde::Deserialize)]
pub struct Config { pub struct Config {
/// Background image URL /// Background image URL
pub background_image: Option<Vec<u8>>, pub background_image: Option<Vec<u8>>,
@ -71,7 +71,7 @@ pub struct Config {
pub tab_width: u8, pub tab_width: u8,
/// The syntax highlight theme. It can be a theme name or path to a .tmTheme file. /// The syntax highlight theme. It can be a theme name or path to a .tmTheme file.
pub theme: String pub theme: String,
} }
impl Config { impl Config {
@ -96,20 +96,29 @@ impl Config {
shadow_offset_y: 0, shadow_offset_y: 0,
shadow_offset_x: 0, shadow_offset_x: 0,
tab_width: 4, tab_width: 4,
theme: "Dracula".to_owned() theme: "Dracula".to_owned(),
} }
} }
pub fn language<'a>(&self, ps: &'a SyntaxSet) -> Result<&'a SyntaxReference, Error> { pub fn language<'a>(&self, ps: &'a SyntaxSet) -> Result<&'a SyntaxReference, Error> {
let possible_language = self.language.as_ref().map(|language| { let language = match &self.language {
ps.find_syntax_by_token(language) Some(language) => ps
.ok_or_else(|| format_err!("Unable to determine language, please provide one explicitly")) .find_syntax_by_token(language)
}); .ok_or_else(|| Error::msg(format!("Invalid language: {}", language)))?,
None => {
let language = possible_language.unwrap_or_else(|| { let first_line = self.code.lines().next().unwrap_or_default();
ps.find_syntax_by_first_line(self.code.as_ref()) ps.find_syntax_by_first_line(first_line).unwrap_or_else(|| {
.ok_or_else(|| format_err!("Unable to determine language, please provide one explicitly")) // hyperpolyglot requires a file, so we need to create a temp file
})?; let mut temp_file = tempfile::NamedTempFile::new().unwrap();
write!(temp_file, "{}", self.code).unwrap();
let language = hyperpolyglot::detect(temp_file.path()).unwrap();
match language {
Some(language) => ps.find_syntax_by_token(language.language()).unwrap(),
None => ps.find_syntax_by_token("log").unwrap(),
}
})
},
};
Ok(language) Ok(language)
} }
@ -123,7 +132,6 @@ impl Config {
} }
} }
pub fn get_formatter(&self) -> Result<ImageFormatter, Error> { pub fn get_formatter(&self) -> Result<ImageFormatter, Error> {
let formatter = ImageFormatterBuilder::new() let formatter = ImageFormatterBuilder::new()
.line_pad(self.line_pad) .line_pad(self.line_pad)
@ -157,8 +165,7 @@ impl Config {
/// Query parameters for the /generate endpoint, using Option to make all options /// Query parameters for the /generate endpoint, using Option to make all options
/// with defaults optional. /// with defaults optional.
#[derive(Debug, Clone)] #[derive(Debug, Clone, serde::Deserialize)]
#[derive(serde::Deserialize)]
pub struct ConfigQuery { pub struct ConfigQuery {
/// Background image URL /// Background image URL
pub background_image: Option<String>, pub background_image: Option<String>,
@ -218,5 +225,5 @@ pub struct ConfigQuery {
pub tab_width: Option<u8>, pub tab_width: Option<u8>,
/// The syntax highlight theme. It can be a theme name or path to a .tmTheme file. /// The syntax highlight theme. It can be a theme name or path to a .tmTheme file.
pub theme: Option<String> pub theme: Option<String>,
} }

View File

@ -85,7 +85,7 @@ async fn help() -> impl Responder {
"code": "The code to generate an image from. Required.", "code": "The code to generate an image from. Required.",
"language": "The language to use for syntax highlighting. Optional, will attempt to guess if not provided.", "language": "The language to use for syntax highlighting. Optional, will attempt to guess if not provided.",
"theme": "The theme to use for syntax highlighting. Optional, defaults to Dracula.", "theme": "The theme to use for syntax highlighting. Optional, defaults to Dracula.",
"font": "The font to use. Optional, defaults to Fira Code.", "font": "The font to use. Optional.",
"shadow_color": "The color of the shadow. Optional, defaults to transparent.", "shadow_color": "The color of the shadow. Optional, defaults to transparent.",
"background": "The background color. Optional, defaults to transparent.", "background": "The background color. Optional, defaults to transparent.",
"tab_width": "The tab width. Optional, defaults to 4.", "tab_width": "The tab width. Optional, defaults to 4.",