Coincidence detection as a crate
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

222 lines
6.8 KiB

use std::slice::Iter;
#[derive(Debug)]
pub struct Span {
pub start: usize,
pub end: usize,
pub what: String,
}
impl Span {
pub fn new(start: usize, end: usize, what: String) -> Span {
Span { start, end, what }
}
}
pub struct Detector<'a> {
source: &'a str,
coincidences: &'a Vec<&'a str>,
coincidence_iter: Iter<'a, &'a str>,
last_spans: Vec<Span>,
}
impl Detector<'_> {
pub fn new<'a>(input: &'a str, coincidences: &'a Vec<&'a str>) -> Detector<'a> {
Detector {
source: input,
coincidences,
coincidence_iter: coincidences.iter(),
last_spans: Vec::new(),
}
}
}
const JEWS: &'static str = include_str!("jews.txt");
pub fn get_jew_list() -> Vec<&'static str> {
JEWS.split('\n').collect()
}
impl Iterator for Detector<'_> {
type Item = Span;
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.last_spans.is_empty() {
let coincidence = self.coincidence_iter.next();
if let Some(coincidence) = coincidence {
let mut spans = self
.source
.to_lowercase()
.rmatch_indices(&coincidence.to_lowercase())
.map(|(idx, str)| {
Span::new(
idx,
idx + str.len() - 1,
self.source[idx..(idx + str.len())].to_string(),
)
})
.collect();
self.last_spans.append(&mut spans);
continue;
} else {
break None;
}
} else {
break self.last_spans.pop();
}
}
}
}
#[cfg(test)]
mod tests {
use crate::{get_jew_list, Detector};
#[test]
fn exact() {
let text = "Who is responsible for 9/11?";
let coincidences = vec!["Who"];
let mut detector = Detector::new(text, &coincidences);
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "Who", "span contains invalid string");
assert_eq!(jew.start, 0, "span contains invalid start");
assert_eq!(jew.end, 2, "span contains invalid end");
assert!(detector.next().is_none(), "detector returned something");
}
#[test]
fn coincidence_lower() {
let text = "Who is responsible for 9/11?";
let coincidences = vec!["who"];
let mut detector = Detector::new(text, &coincidences);
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "Who", "span contains invalid string");
assert_eq!(jew.start, 0, "span contains invalid start");
assert_eq!(jew.end, 2, "span contains invalid end");
}
#[test]
fn coincidence_upper() {
let text = "Who is responsible for 9/11?";
let coincidences = vec!["WHO"];
let mut detector = Detector::new(text, &coincidences);
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "Who", "span contains invalid string");
assert_eq!(jew.start, 0, "span contains invalid start");
assert_eq!(jew.end, 2, "span contains invalid end");
}
#[test]
fn source_upper() {
let text = "WHO is responsible for 9/11?";
let coincidences = vec!["Who"];
let mut detector = Detector::new(text, &coincidences);
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "WHO", "span contains invalid string");
assert_eq!(jew.start, 0, "span contains invalid start");
assert_eq!(jew.end, 2, "span contains invalid end");
}
#[test]
fn source_lower() {
let text = "who is responsible for 9/11?";
let coincidences = vec!["Who"];
let mut detector = Detector::new(text, &coincidences);
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "who", "span contains invalid string");
assert_eq!(jew.start, 0, "span contains invalid start");
assert_eq!(jew.end, 2, "span contains invalid end");
}
#[test]
fn source_multiple_same() {
let text = "Who is responsible for 9/11? the WHO is responsible for 9/11.";
let coincidences = vec!["Who"];
let mut detector = Detector::new(text, &coincidences);
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "Who", "span contains invalid string");
assert_eq!(jew.start, 0, "span contains invalid start");
assert_eq!(jew.end, 2, "span contains invalid end");
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "WHO", "span contains invalid string");
assert_eq!(jew.start, 33, "span contains invalid start");
assert_eq!(jew.end, 35, "span contains invalid end");
}
#[test]
fn source_multiple_unique() {
let text = "Who is responsible for 9/11? Israel is responsible for 9/11.";
let coincidences = vec!["Who", "israel"];
let mut detector = Detector::new(text, &coincidences);
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "Who", "span contains invalid string");
assert_eq!(jew.start, 0, "span contains invalid start");
assert_eq!(jew.end, 2, "span contains invalid end");
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "Israel", "span contains invalid string");
assert_eq!(jew.start, 29, "span contains invalid start");
assert_eq!(jew.end, 34, "span contains invalid end");
}
#[test]
fn try_jew_list() {
let text = "the most based man on earth, mark zuckerberg, created facebook";
let coincidences = get_jew_list();
let mut detector = Detector::new(text, &coincidences);
let jew = detector.next();
assert!(jew.is_some(), "detector returned none");
let jew = jew.unwrap();
assert_eq!(jew.what, "mark zuckerberg", "span contains invalid string");
assert_eq!(jew.start, 29, "span contains invalid start");
assert_eq!(jew.end, 43, "span contains invalid end");
}
}