-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
36 changed files
with
3,365 additions
and
784 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# v0.2.0 | ||
|
||
Adds support for Java. | ||
|
||
Breaking: | ||
- Normalizations have been centralized to `snippets::parser::normalize`. | ||
- `snippets::text::buffer` has been merged into `snippets::text`. | ||
- Some vestigal traits (such as `snippets::text::ConvertCRLFToLF`) have been removed. | ||
- Implementation-specific constants such as `NODE_KIND_COMMENT` have been made private. | ||
- Removed `tree-sitter` types from the API. | ||
|
||
# v0.1.3 | ||
|
||
Adds support for C++. | ||
Adds support for C. | ||
|
||
This repository initially existed in FOSSA's [foundation-libs](https://github.com/fossas/foundation-libs/tree/master/snippets) monorepo. | ||
History for this library earlier than v0.1.3 can be viewed there. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
[package] | ||
name = "snippets" | ||
version = "0.1.3" | ||
version = "0.2.0" | ||
edition = "2021" | ||
description = "Extracts snippets of programming languages from files" | ||
|
||
# This is very FOSSA specific, so don't publish to crates.io. | ||
# Instead, just add this as a dependency using git path: `cargo add [email protected]:fossas/lib-snippets.git`. | ||
publish = false | ||
|
||
[features] | ||
default = [] | ||
|
||
|
@@ -12,28 +16,34 @@ sha2-asm = ["sha2/asm"] | |
|
||
# Enables support for each named language. | ||
# For more info, see the module documentation for the language. | ||
lang-all = ["lang-c99-tc3", "lang-cpp-98"] | ||
lang-c99-tc3 = [] | ||
lang-cpp-98 = [] | ||
lang-all = ["lang-c99-tc3", "lang-cpp-98", "lang-java-11"] | ||
lang-c99-tc3 = ["tree-sitter-c"] | ||
lang-cpp-98 = ["tree-sitter-cpp"] | ||
lang-java-11 = ["tree-sitter-java"] | ||
|
||
[dependencies] | ||
base64 = "0.21.2" | ||
bstr = "1.8.0" | ||
colored = "2.1.0" | ||
delegate = "0.10.0" | ||
derivative = "2.2.0" | ||
derive_more = "0.99.17" | ||
fallible-iterator = { version = "0.3.0", features = ["std"] } | ||
flagset = "0.4.3" | ||
getset = "0.1.2" | ||
itertools = "0.11.0" | ||
lazy-regex = { version = "3.0.2", features = ["std"] } | ||
nonempty = "0.9.0" | ||
once_cell = "1.18.0" | ||
regex = "1.9.4" | ||
sha2 = "0.10.7" | ||
strum = { version = "0.25.0", features = ["derive"] } | ||
tap = "1.0.1" | ||
thiserror = "1.0.47" | ||
tracing = "0.1.37" | ||
tree-sitter = "0.20.10" | ||
tree-sitter-c = "0.20.6" | ||
tree-sitter-cpp = "0.20.3" | ||
tree-sitter-c = { version = "0.20.6", optional = true } | ||
tree-sitter-cpp = { version = "0.20.3", optional = true } | ||
tree-sitter-java = { version = "0.20.2", optional = true } | ||
tree-sitter-traversal = "0.1.2" | ||
typed-builder = "0.15.2" | ||
|
||
|
@@ -42,6 +52,7 @@ tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } | |
snippets = { path = ".", features = ["lang-all"] } | ||
criterion = "0.5.1" | ||
pretty_assertions = "1.4.0" | ||
indoc = "2.0.4" | ||
|
||
[[bench]] | ||
name = "hashes" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
//! Specialized types for dealing with content provided to this library, | ||
//! or reading content into this library. | ||
|
||
use std::{borrow::Cow, path::Path}; | ||
|
||
use derivative::Derivative; | ||
use derive_more::Index; | ||
use tap::Pipe; | ||
|
||
/// Specialized type to indicate the original content provided to the extractor, | ||
/// distinct from a sliced section of that content. | ||
#[derive(Clone, PartialEq, Eq, Derivative, Index)] | ||
#[derivative(Debug = "transparent")] | ||
pub struct Content(Vec<u8>); | ||
|
||
impl Content { | ||
/// Create a new instance with the provided content. | ||
pub fn new(content: Vec<u8>) -> Self { | ||
Self(content) | ||
} | ||
|
||
/// Read a file on disk as content. | ||
pub fn from_file(path: impl AsRef<Path>) -> Result<Self, std::io::Error> { | ||
std::fs::read(path).map(Self::new) | ||
} | ||
|
||
/// View the content as a plain byte slice. | ||
pub fn as_bytes(&self) -> &[u8] { | ||
&self.0 | ||
} | ||
} | ||
|
||
impl<U: AsRef<[u8]>> From<U> for Content { | ||
fn from(value: U) -> Self { | ||
value.as_ref().pipe(|v| v.to_vec()).pipe(Self) | ||
} | ||
} | ||
|
||
/// Common functionality for any type indicating a section of bytes to extract from [`Content`]. | ||
pub trait ByteCoordinate { | ||
/// The byte offset at which the function starts. | ||
fn byte_start(&self) -> usize; | ||
|
||
/// The byte offset at which the function ends. | ||
fn byte_end(&self) -> usize; | ||
|
||
/// Extract the text representing this part from the specified content. | ||
fn extract_from<'a>(&self, content: &'a Content) -> &'a [u8] { | ||
&content[self.byte_start()..self.byte_end()] | ||
} | ||
|
||
/// Extract the text representing this part from the specified content as a lossy string. | ||
fn extract_from_lossy<'a>(&self, content: &'a Content) -> Cow<'a, str> { | ||
let content = self.extract_from(content); | ||
String::from_utf8_lossy(content) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
//! Extensions for types. | ||
|
||
pub mod vec; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
//! Extensions to [`Vec`] or Vec-like types. | ||
|
||
use nonempty::{nonempty, NonEmpty}; | ||
|
||
/// Extends [`Vec`] to make it usable in a more functional style. | ||
pub trait FunctionalVec<T> { | ||
/// Push the new element into self, returning the modified form. | ||
fn pushed(self, new: T) -> Self; | ||
|
||
/// Extend self with the provided [`IntoIterator`], returning the modified form. | ||
fn extended(self, new: impl IntoIterator<Item = T>) -> Self; | ||
|
||
/// Reverse self, returning the modified form. | ||
fn reversed(self) -> Self; | ||
|
||
/// Prepend self with the provided item. | ||
fn prepended(self, new: T) -> Self; | ||
} | ||
|
||
impl<T> FunctionalVec<T> for Vec<T> { | ||
fn pushed(mut self, new: T) -> Self { | ||
self.push(new); | ||
self | ||
} | ||
|
||
fn extended(mut self, new: impl IntoIterator<Item = T>) -> Self { | ||
self.extend(new); | ||
self | ||
} | ||
|
||
fn reversed(mut self) -> Self { | ||
self.reverse(); | ||
self | ||
} | ||
|
||
fn prepended(self, new: T) -> Self { | ||
vec![new].extended(self) | ||
} | ||
} | ||
|
||
impl<T> FunctionalVec<T> for NonEmpty<T> { | ||
fn pushed(mut self, new: T) -> Self { | ||
self.push(new); | ||
self | ||
} | ||
|
||
fn extended(mut self, new: impl IntoIterator<Item = T>) -> Self { | ||
self.extend(new); | ||
self | ||
} | ||
|
||
fn reversed(self) -> Self { | ||
let mut tail = self.tail; | ||
let old_head = self.head; | ||
|
||
if let Some(head) = tail.pop() { | ||
NonEmpty::new(head).extended(tail.reversed().pushed(old_head)) | ||
} else { | ||
NonEmpty::new(old_head).extended(tail) | ||
} | ||
} | ||
|
||
fn prepended(self, new: T) -> Self { | ||
nonempty![new].extended(self) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use nonempty::nonempty; | ||
use pretty_assertions::assert_eq; | ||
|
||
use super::*; | ||
|
||
#[test] | ||
fn vec() { | ||
let start = Vec::new; | ||
assert_eq!(Vec::<i32>::new(), start().reversed()); | ||
assert_eq!(vec![1], start().pushed(1)); | ||
assert_eq!(vec![1, 2], start().pushed(1).pushed(2)); | ||
assert_eq!(vec![1, 2], start().extended([1, 2])); | ||
assert_eq!(vec![2, 1], start().extended([1, 2]).reversed()); | ||
} | ||
|
||
#[test] | ||
fn nonempty() { | ||
let start = || nonempty![1]; | ||
assert_eq!(nonempty![1], start()); | ||
assert_eq!(nonempty![1], start().reversed()); | ||
assert_eq!(nonempty![1, 2], start().pushed(2)); | ||
assert_eq!(nonempty![1, 2, 3], start().extended([2, 3])); | ||
assert_eq!(nonempty![3, 2, 1], start().extended([2, 3]).reversed()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.