Coincidence detection as a crate
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

223 lines
6.8 KiB

  1. use std::slice::Iter;
  2. #[derive(Debug)]
  3. pub struct Span {
  4. pub start: usize,
  5. pub end: usize,
  6. pub what: String,
  7. }
  8. impl Span {
  9. pub fn new(start: usize, end: usize, what: String) -> Span {
  10. Span { start, end, what }
  11. }
  12. }
  13. pub struct Detector<'a> {
  14. source: &'a str,
  15. coincidences: &'a Vec<&'a str>,
  16. coincidence_iter: Iter<'a, &'a str>,
  17. last_spans: Vec<Span>,
  18. }
  19. impl Detector<'_> {
  20. pub fn new<'a>(input: &'a str, coincidences: &'a Vec<&'a str>) -> Detector<'a> {
  21. Detector {
  22. source: input,
  23. coincidences,
  24. coincidence_iter: coincidences.iter(),
  25. last_spans: Vec::new(),
  26. }
  27. }
  28. }
  29. const JEWS: &'static str = include_str!("jews.txt");
  30. pub fn get_jew_list() -> Vec<&'static str> {
  31. JEWS.split('\n').collect()
  32. }
  33. impl Iterator for Detector<'_> {
  34. type Item = Span;
  35. fn next(&mut self) -> Option<Self::Item> {
  36. loop {
  37. if self.last_spans.is_empty() {
  38. let coincidence = self.coincidence_iter.next();
  39. if let Some(coincidence) = coincidence {
  40. let mut spans = self
  41. .source
  42. .to_lowercase()
  43. .rmatch_indices(&coincidence.to_lowercase())
  44. .map(|(idx, str)| {
  45. Span::new(
  46. idx,
  47. idx + str.len() - 1,
  48. self.source[idx..(idx + str.len())].to_string(),
  49. )
  50. })
  51. .collect();
  52. self.last_spans.append(&mut spans);
  53. continue;
  54. } else {
  55. break None;
  56. }
  57. } else {
  58. break self.last_spans.pop();
  59. }
  60. }
  61. }
  62. }
  63. #[cfg(test)]
  64. mod tests {
  65. use crate::{get_jew_list, Detector};
  66. #[test]
  67. fn exact() {
  68. let text = "Who is responsible for 9/11?";
  69. let coincidences = vec!["Who"];
  70. let mut detector = Detector::new(text, &coincidences);
  71. let jew = detector.next();
  72. assert!(jew.is_some(), "detector returned none");
  73. let jew = jew.unwrap();
  74. assert_eq!(jew.what, "Who", "span contains invalid string");
  75. assert_eq!(jew.start, 0, "span contains invalid start");
  76. assert_eq!(jew.end, 2, "span contains invalid end");
  77. assert!(detector.next().is_none(), "detector returned something");
  78. }
  79. #[test]
  80. fn coincidence_lower() {
  81. let text = "Who is responsible for 9/11?";
  82. let coincidences = vec!["who"];
  83. let mut detector = Detector::new(text, &coincidences);
  84. let jew = detector.next();
  85. assert!(jew.is_some(), "detector returned none");
  86. let jew = jew.unwrap();
  87. assert_eq!(jew.what, "Who", "span contains invalid string");
  88. assert_eq!(jew.start, 0, "span contains invalid start");
  89. assert_eq!(jew.end, 2, "span contains invalid end");
  90. }
  91. #[test]
  92. fn coincidence_upper() {
  93. let text = "Who is responsible for 9/11?";
  94. let coincidences = vec!["WHO"];
  95. let mut detector = Detector::new(text, &coincidences);
  96. let jew = detector.next();
  97. assert!(jew.is_some(), "detector returned none");
  98. let jew = jew.unwrap();
  99. assert_eq!(jew.what, "Who", "span contains invalid string");
  100. assert_eq!(jew.start, 0, "span contains invalid start");
  101. assert_eq!(jew.end, 2, "span contains invalid end");
  102. }
  103. #[test]
  104. fn source_upper() {
  105. let text = "WHO is responsible for 9/11?";
  106. let coincidences = vec!["Who"];
  107. let mut detector = Detector::new(text, &coincidences);
  108. let jew = detector.next();
  109. assert!(jew.is_some(), "detector returned none");
  110. let jew = jew.unwrap();
  111. assert_eq!(jew.what, "WHO", "span contains invalid string");
  112. assert_eq!(jew.start, 0, "span contains invalid start");
  113. assert_eq!(jew.end, 2, "span contains invalid end");
  114. }
  115. #[test]
  116. fn source_lower() {
  117. let text = "who is responsible for 9/11?";
  118. let coincidences = vec!["Who"];
  119. let mut detector = Detector::new(text, &coincidences);
  120. let jew = detector.next();
  121. assert!(jew.is_some(), "detector returned none");
  122. let jew = jew.unwrap();
  123. assert_eq!(jew.what, "who", "span contains invalid string");
  124. assert_eq!(jew.start, 0, "span contains invalid start");
  125. assert_eq!(jew.end, 2, "span contains invalid end");
  126. }
  127. #[test]
  128. fn source_multiple_same() {
  129. let text = "Who is responsible for 9/11? the WHO is responsible for 9/11.";
  130. let coincidences = vec!["Who"];
  131. let mut detector = Detector::new(text, &coincidences);
  132. let jew = detector.next();
  133. assert!(jew.is_some(), "detector returned none");
  134. let jew = jew.unwrap();
  135. assert_eq!(jew.what, "Who", "span contains invalid string");
  136. assert_eq!(jew.start, 0, "span contains invalid start");
  137. assert_eq!(jew.end, 2, "span contains invalid end");
  138. let jew = detector.next();
  139. assert!(jew.is_some(), "detector returned none");
  140. let jew = jew.unwrap();
  141. assert_eq!(jew.what, "WHO", "span contains invalid string");
  142. assert_eq!(jew.start, 33, "span contains invalid start");
  143. assert_eq!(jew.end, 35, "span contains invalid end");
  144. }
  145. #[test]
  146. fn source_multiple_unique() {
  147. let text = "Who is responsible for 9/11? Israel is responsible for 9/11.";
  148. let coincidences = vec!["Who", "israel"];
  149. let mut detector = Detector::new(text, &coincidences);
  150. let jew = detector.next();
  151. assert!(jew.is_some(), "detector returned none");
  152. let jew = jew.unwrap();
  153. assert_eq!(jew.what, "Who", "span contains invalid string");
  154. assert_eq!(jew.start, 0, "span contains invalid start");
  155. assert_eq!(jew.end, 2, "span contains invalid end");
  156. let jew = detector.next();
  157. assert!(jew.is_some(), "detector returned none");
  158. let jew = jew.unwrap();
  159. assert_eq!(jew.what, "Israel", "span contains invalid string");
  160. assert_eq!(jew.start, 29, "span contains invalid start");
  161. assert_eq!(jew.end, 34, "span contains invalid end");
  162. }
  163. #[test]
  164. fn try_jew_list() {
  165. let text = "the most based man on earth, mark zuckerberg, created facebook";
  166. let coincidences = get_jew_list();
  167. let mut detector = Detector::new(text, &coincidences);
  168. let jew = detector.next();
  169. assert!(jew.is_some(), "detector returned none");
  170. let jew = jew.unwrap();
  171. assert_eq!(jew.what, "mark zuckerberg", "span contains invalid string");
  172. assert_eq!(jew.start, 29, "span contains invalid start");
  173. assert_eq!(jew.end, 43, "span contains invalid end");
  174. }
  175. }