Skip to content

Commit

Permalink
Merge pull request #3 from busticated/feature/detect-newline-style
Browse files Browse the repository at this point in the history
feature/detect-newline-style
  • Loading branch information
busticated authored Oct 3, 2023
2 parents 24cf276 + 76c62aa commit 933a65b
Show file tree
Hide file tree
Showing 5 changed files with 411 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ A `cargo` workspace ([docs](https://doc.rust-lang.org/book/ch14-03-cargo-workspa
## Crates

<!-- crate-list-start -->
* [detect-newline-style](crates/detect-newline-style)
* Determine a string's preferred newline character
* [node-js-release-info](crates/node-js-release-info)
* Asynchronously retrieve Node.js release info by version and platform from the [downloads server](https://nodejs.org/download/release/)
<!-- crate-list-end -->
Expand Down
22 changes: 22 additions & 0 deletions crates/detect-newline-style/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "detect-newline-style"
description = "Determine a string's preferred newline character"
version = "0.1.0"
keywords = [
"eol",
"newline",
"line-ending",
"text"
]
categories = [
"encoding",
"filesystem",
"text-processing"
]
edition.workspace = true
license.workspace = true
authors.workspace = true
repository.workspace = true

[dependencies]
regex = "1.*"
41 changes: 41 additions & 0 deletions crates/detect-newline-style/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# detect-newline-style

[![Latest Version](https://img.shields.io/crates/v/detect-newline-style.svg)](https://crates.io/crates/detect-newline-style)
[![Documentation](https://docs.rs/detect-newline-style/badge.svg)](https://docs.rs/detect-newline-style)
[![CI Status](https://github.com/busticated/rusty/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/busticated/rusty/actions)

Determine a string's preferred newline character

## Installation

```shell
cargo add detect-newline-style
```

## Examples

```rust
use detect_newline_style::LineEnding;

fn main() {
let text = "one\rtwo\r\nthree\nfour\n";
let eol = LineEnding::find(text, LineEnding::LF);

assert_eq!(eol, LineEnding::LF);

let text = "one\rtwo\r\nthree\n";
let eol = LineEnding::find_or_use_lf(text);

assert_eq!(eol, LineEnding::LF);

let text = "one\rtwo\r\nthree\n";
let eol = LineEnding::find_or_use_crlf(text);

assert_eq!(eol, LineEnding::CRLF);

assert_eq!(format!("{}", LineEnding::CR), "\r");
assert_eq!(format!("{}", LineEnding::LF), "\n");
assert_eq!(format!("{}", LineEnding::CRLF), "\r\n");
}
```

284 changes: 284 additions & 0 deletions crates/detect-newline-style/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
#![doc = include_str!("../README.md")]

use regex::RegexBuilder;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::str::FromStr;

const CR: &str = "\r";
const LF: &str = "\n";
const CRLF: &str = "\r\n";

#[derive(Clone, Debug, Default, PartialEq)]
pub enum LineEnding {
/// CR-style line ending (`"\r"`) rarely used, mostly on older systems
/// (e.g. classic MacOS - OS-X before 10.0)
CR,
/// LF-style line ending (`"\n"`) typically used on *nix and MacOS
#[default]
LF,
/// CRLF-style line ending (`"\r\n"`) typically used on Windows
CRLF,
}

impl LineEnding {
/// Creates a new instance - you'll almost certainly rather use one of the
/// [`find*`](crate::LineEnding::find) associated fns below :)
///
/// # Arguments
///
/// * `kind` - The line ending style you want
///
/// # Examples
///
/// ```rust
/// use detect_newline_style::LineEnding;
/// let eol = LineEnding::new("\n");
/// assert_eq!(eol, LineEnding::LF);
/// ```
pub fn new<K: AsRef<str>>(kind: K) -> LineEnding {
let kind = LineEnding::from_str(kind.as_ref());

if kind.is_err() {
return LineEnding::LF;
}

kind.unwrap()
}

/// Determines which newline style a given string uses (CR, LF, or CRLF)
///
/// # Arguments
///
/// * `text` - The text you want to analyze
/// * `default` - The default newline style to use when text has no preference
///
/// # Examples
///
/// ```rust
/// use detect_newline_style::LineEnding;
/// let eol = LineEnding::find("one\ntwo\r\nthree\n", LineEnding::CRLF);
/// assert_eq!(eol, LineEnding::LF);
/// ```
pub fn find<S: AsRef<str>>(text: S, default: LineEnding) -> LineEnding {
let text = text.as_ref();
let ptn = r"(?:\r\n?|\n)";
let re = RegexBuilder::new(ptn)
.case_insensitive(true)
.multi_line(true)
.build()
.unwrap();

let matches = re.find_iter(text);
let mut crlf_count = 0;
let mut cr_count = 0;
let mut lf_count = 0;

for item in matches {
let x = item.as_str();

if x == CRLF {
crlf_count += 1;
} else if x == LF {
lf_count += 1;
} else if x == CR {
cr_count += 1;
}
}

if crlf_count > lf_count && crlf_count > cr_count {
return LineEnding::CRLF;
} else if lf_count > crlf_count && lf_count > cr_count {
return LineEnding::LF;
} else if cr_count > lf_count && cr_count > crlf_count {
return LineEnding::CR;
}

default
}

/// Determines which newline style a given string uses (CR, LF, or CRLF)
/// defaulting to CRLF-style endings
///
/// # Arguments
///
/// * `text` - The text you want to analyze
///
/// # Examples
///
/// ```rust
/// use detect_newline_style::LineEnding;
/// let eol = LineEnding::find_or_use_crlf("one\ntwo\r\nthree\n");
/// assert_eq!(eol, LineEnding::LF);
/// let eol = LineEnding::find_or_use_crlf("one\ntwo\r\nthree\r");
/// assert_eq!(eol, LineEnding::CRLF);
/// ```
pub fn find_or_use_crlf<S: AsRef<str>>(s: S) -> LineEnding {
LineEnding::find(s, LineEnding::CRLF)
}

/// Determines which newline style a given string uses (CR, LF, or CRLF)
/// defaulting to LF-style endings
///
/// # Arguments
///
/// * `text` - The text you want to analyze
///
/// # Examples
///
/// ```rust
/// use detect_newline_style::LineEnding;
/// let eol = LineEnding::find_or_use_lf("one\r\ntwo\nthree\r\n");
/// assert_eq!(eol, LineEnding::CRLF);
/// let eol = LineEnding::find_or_use_lf("one\ntwo\r\nthree\r");
/// assert_eq!(eol, LineEnding::LF);
/// ```
pub fn find_or_use_lf<S: AsRef<str>>(s: S) -> LineEnding {
LineEnding::find(s, LineEnding::LF)
}

/// Determines which newline style a given string uses (CR, LF, or CRLF)
/// defaulting to CR-style endings
///
/// # Arguments
///
/// * `text` - The text you want to analyze
///
/// # Examples
///
/// ```rust
/// use detect_newline_style::LineEnding;
/// let eol = LineEnding::find_or_use_cr("one\ntwo\r\nthree\n");
/// assert_eq!(eol, LineEnding::LF);
/// let eol = LineEnding::find_or_use_cr("one\ntwo\r\nthree\r");
/// assert_eq!(eol, LineEnding::CR);
/// ```
pub fn find_or_use_cr<S: AsRef<str>>(s: S) -> LineEnding {
LineEnding::find(s, LineEnding::CR)
}
}

impl Display for LineEnding {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let eol = match self {
LineEnding::CR => CR,
LineEnding::LF => LF,
LineEnding::CRLF => CRLF,
};

write!(f, "{}", eol)
}
}

impl FromStr for LineEnding {
type Err = Box<dyn Error>;

fn from_str(s: &str) -> Result<LineEnding, Box<dyn Error>> {
match s.to_lowercase().as_str() {
CR => Ok(LineEnding::CR),
LF => Ok(LineEnding::LF),
CRLF => Ok(LineEnding::CRLF),
_ => Err(format!("Unrecognized input: {}", s).into()),
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn it_initializes_a_line_ending() {
let eol = LineEnding::new("\r");

assert_eq!(eol, LineEnding::CR);

let eol = LineEnding::new("\n");

assert_eq!(eol, LineEnding::LF);

let eol = LineEnding::new("\r\n");

assert_eq!(eol, LineEnding::CRLF);
}

#[test]
fn it_uses_lf_line_ending_when_kind_is_unrecognized() {
let eol = LineEnding::new("NOPE!");
assert_eq!(eol, LineEnding::LF);
}

#[test]
fn it_serializes_a_line_ending() {
assert_eq!("\r", format!("{}", LineEnding::CR));
assert_eq!("\n", format!("{}", LineEnding::LF));
assert_eq!("\r\n", format!("{}", LineEnding::CRLF));
}

#[test]
fn it_finds_preferred_line_ending_when_input_prefers_unix_style_endings() {
let input = "\nthis\nprefers\nunix-style endings\r\n";
let eol = LineEnding::find(input, LineEnding::CRLF);
assert_eq!(eol, LineEnding::LF);
}

#[test]
fn it_finds_preferred_line_ending_when_input_prefers_windows_style_endings() {
let input = "\r\nthis\r\nprefers\r\nwindows-style endings\n";
let eol = LineEnding::find(input, LineEnding::CRLF);
assert_eq!(eol, LineEnding::CRLF);
}

#[test]
fn it_finds_preferred_line_ending_when_input_prefers_obsolete_style_endings() {
let input = "\rthis\rprefers\r\nobsolete endings\n";
let eol = LineEnding::find(input, LineEnding::CRLF);
assert_eq!(eol, LineEnding::CR);
}

#[test]
fn it_uses_default_when_preference_cannot_be_determined() {
let input = "\r\nthis\r\nis\nambiguous\n?\r\r";
let eol = LineEnding::find(input, LineEnding::LF);
assert_eq!(eol, LineEnding::LF);
}

#[test]
fn it_finds_preferred_line_ending_defaulting_to_cr_endings() {
let input = "\rthis\rprefers\r\nobsolete endings\n";
let eol = LineEnding::find_or_use_cr(input);

assert_eq!(eol, LineEnding::CR);

let input = "\r\nthis\r\nis\nambiguous\n?\r\r";
let eol = LineEnding::find_or_use_cr(input);

assert_eq!(eol, LineEnding::CR);
}

#[test]
fn it_finds_preferred_line_ending_defaulting_to_lf_endings() {
let input = "\nthis\nprefers\nunix-style endings\r\n";
let eol = LineEnding::find_or_use_lf(input);

assert_eq!(eol, LineEnding::LF);

let input = "\r\nthis\r\nis\nambiguous\n?\r\r";
let eol = LineEnding::find_or_use_lf(input);

assert_eq!(eol, LineEnding::LF);
}

#[test]
fn it_finds_preferred_line_ending_defaulting_to_crlf_endings() {
let input = "\r\nthis\r\nprefers\r\nwindows-style endings\n";
let eol = LineEnding::find_or_use_crlf(input);

assert_eq!(eol, LineEnding::CRLF);

let input = "\r\nthis\r\nis\nambiguous\n?\r\r";
let eol = LineEnding::find_or_use_crlf(input);

assert_eq!(eol, LineEnding::CRLF);
}
}
Loading

0 comments on commit 933a65b

Please sign in to comment.