/*
 * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

use crate::char::Grapheme;
use crate::regexp::RegExpConfig;
use crate::unicode_tables::{DECIMAL_NUMBER, WHITE_SPACE, WORD};
use itertools::Itertools;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::ops::Range;
use unic_char_range::CharRange;
use unic_ucd_category::GeneralCategory;
use unicode_segmentation::UnicodeSegmentation;

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct GraphemeCluster {
    graphemes: Vec<Grapheme>,
    config: RegExpConfig,
}

impl GraphemeCluster {
    pub(crate) fn from(s: &str, config: &RegExpConfig) -> Self {
        Self {
            graphemes: UnicodeSegmentation::graphemes(s, true)
                .flat_map(|it| {
                    let starts_with_backslash = it.chars().count() == 2 && it.starts_with('\\');
                    let contains_combining_mark =
                        it.chars().any(|c| GeneralCategory::of(c).is_mark());

                    if starts_with_backslash || contains_combining_mark {
                        it.chars()
                            .map(|c| Grapheme::from(&c.to_string(), config))
                            .collect_vec()
                    } else {
                        vec![Grapheme::from(it, config)]
                    }
                })
                .collect_vec(),
            config: config.clone(),
        }
    }

    pub(crate) fn from_graphemes(graphemes: Vec<Grapheme>, config: &RegExpConfig) -> Self {
        Self {
            graphemes,
            config: config.clone(),
        }
    }

    pub(crate) fn new(grapheme: Grapheme, config: &RegExpConfig) -> Self {
        Self {
            graphemes: vec![grapheme],
            config: config.clone(),
        }
    }

    pub(crate) fn convert_to_char_classes(&mut self) {
        let is_digit_converted = self.config.is_digit_converted();
        let is_non_digit_converted = self.config.is_non_digit_converted();
        let is_space_converted = self.config.is_space_converted();
        let is_non_space_converted = self.config.is_non_space_converted();
        let is_word_converted = self.config.is_word_converted();
        let is_non_word_converted = self.config.is_non_word_converted();

        let valid_numeric_chars = convert_chars_to_range(DECIMAL_NUMBER);
        let valid_alphanumeric_chars = convert_chars_to_range(WORD);
        let valid_space_chars = convert_chars_to_range(WHITE_SPACE);

        for grapheme in self.graphemes.iter_mut() {
            grapheme.chars = grapheme
                .chars
                .iter()
                .map(|it| {
                    it.chars()
                        .map(|c| {
                            let is_digit =
                                valid_numeric_chars.iter().any(|range| range.contains(c));
                            let is_word = valid_alphanumeric_chars
                                .iter()
                                .any(|range| range.contains(c));
                            let is_space = valid_space_chars.iter().any(|range| range.contains(c));

                            if is_digit_converted && is_digit {
                                "\\d".to_string()
                            } else if is_word_converted && is_word {
                                "\\w".to_string()
                            } else if is_space_converted && is_space {
                                "\\s".to_string()
                            } else if is_non_digit_converted && !is_digit {
                                "\\D".to_string()
                            } else if is_non_word_converted && !is_word {
                                "\\W".to_string()
                            } else if is_non_space_converted && !is_space {
                                "\\S".to_string()
                            } else {
                                c.to_string()
                            }
                        })
                        .join("")
                })
                .collect_vec();
        }
    }

    pub(crate) fn convert_repetitions(&mut self) {
        let mut repetitions = vec![];
        convert_repetitions(self.graphemes(), repetitions.as_mut(), &self.config);
        if !repetitions.is_empty() {
            self.graphemes = repetitions;
        }
    }

    pub(crate) fn merge(
        first: &GraphemeCluster,
        second: &GraphemeCluster,
        config: &RegExpConfig,
    ) -> Self {
        let mut graphemes = vec![];
        graphemes.extend_from_slice(&first.graphemes);
        graphemes.extend_from_slice(&second.graphemes);
        Self {
            graphemes,
            config: config.clone(),
        }
    }

    pub(crate) fn graphemes(&self) -> &Vec<Grapheme> {
        &self.graphemes
    }

    pub(crate) fn graphemes_mut(&mut self) -> &mut Vec<Grapheme> {
        &mut self.graphemes
    }

    pub(crate) fn size(&self) -> usize {
        self.graphemes.len()
    }

    pub(crate) fn char_count(&self, is_non_ascii_char_escaped: bool) -> usize {
        self.graphemes
            .iter()
            .map(|it| it.char_count(is_non_ascii_char_escaped))
            .sum()
    }

    pub(crate) fn is_empty(&self) -> bool {
        self.graphemes.is_empty()
    }
}

fn convert_repetitions(
    graphemes: &[Grapheme],
    repetitions: &mut Vec<Grapheme>,
    config: &RegExpConfig,
) {
    let repeated_substrings = collect_repeated_substrings(graphemes);
    let ranges_of_repetitions = create_ranges_of_repetitions(repeated_substrings);
    let coalesced_repetitions = coalesce_repetitions(ranges_of_repetitions);
    replace_graphemes_with_repetitions(coalesced_repetitions, graphemes, repetitions, config)
}

fn collect_repeated_substrings(graphemes: &[Grapheme]) -> HashMap<Vec<String>, Vec<usize>> {
    let mut map = HashMap::new();

    for i in 0..graphemes.len() {
        let suffix = &graphemes[i..];
        for j in 1..=graphemes.len() / 2 {
            if suffix.len() >= j {
                let prefix = suffix[..j].iter().map(|it| it.value()).collect_vec();
                let indices = map.entry(prefix).or_insert_with(Vec::new);
                indices.push(i);
            }
        }
    }
    map
}

fn create_ranges_of_repetitions(
    repeated_substrings: HashMap<Vec<String>, Vec<usize>>,
) -> Vec<(Range<usize>, Vec<String>)> {
    let mut repetitions = Vec::<(Range<usize>, Vec<String>)>::new();

    for (prefix_length, group) in &repeated_substrings
        .iter()
        .filter(|&(_, indices)| indices.len() > 1)
        .sorted_by_key(|&(prefix, _)| prefix.len())
        .rev()
        .group_by(|&(prefix, _)| prefix.len())
    {
        for (prefix, indices) in group.sorted_by_key(|&(_, indices)| indices[0]) {
            let all_even = indices
                .iter()
                .all(|it| it % prefix_length == 0 || it % 2 == 0);
            let all_odd = indices
                .iter()
                .all(|it| it % prefix_length == 1 || it % 2 == 1);

            if all_even || all_odd {
                let ranges = indices
                    .iter()
                    .cloned()
                    .map(|it| it..it + prefix_length)
                    .coalesce(|x, y| {
                        if x.end == y.start {
                            Ok(x.start..y.end)
                        } else {
                            Err((x, y))
                        }
                    })
                    .filter(|it| (it.end - it.start) > prefix_length)
                    .collect_vec();

                for range in ranges {
                    repetitions.push((range, prefix.clone()));
                }
            }
        }
    }
    repetitions
}

fn coalesce_repetitions(
    ranges_of_repetitions: Vec<(Range<usize>, Vec<String>)>,
) -> Vec<(Range<usize>, Vec<String>)> {
    ranges_of_repetitions
        .iter()
        .sorted_by(|&(first_range, _), &(second_range, _)| {
            match second_range.end.cmp(&first_range.end) {
                Ordering::Equal => first_range.start.cmp(&second_range.start),
                other => other,
            }
        })
        .coalesce(|first_tup, second_tup| {
            let first_range = &first_tup.0;
            let second_range = &second_tup.0;

            if (first_range.contains(&second_range.start)
                || first_range.contains(&second_range.end))
                && second_range.end != first_range.start
            {
                Ok(first_tup)
            } else {
                Err((first_tup, second_tup))
            }
        })
        .map(|(range, substr)| (range.clone(), substr.clone()))
        .collect_vec()
}

fn replace_graphemes_with_repetitions(
    coalesced_repetitions: Vec<(Range<usize>, Vec<String>)>,
    graphemes: &[Grapheme],
    repetitions: &mut Vec<Grapheme>,
    config: &RegExpConfig,
) {
    if coalesced_repetitions.is_empty() {
        return;
    }

    for grapheme in graphemes {
        repetitions.push(grapheme.clone());
    }

    for (range, substr) in coalesced_repetitions.iter() {
        if range.end > repetitions.len() {
            break;
        }

        let count = ((range.end - range.start) / substr.len()) as u32;

        if count <= config.minimum_repetitions
            || substr.len() < config.minimum_substring_length as usize
        {
            continue;
        }

        let joined_substr = substr.iter().join("").repeat(count as usize);
        let graphemes_slice = repetitions[range.clone()]
            .iter()
            .map(|it| it.value())
            .join("");

        if graphemes_slice != joined_substr {
            break;
        }

        repetitions.splice(
            range.clone(),
            [Grapheme::new(substr.clone(), count, count, config)]
                .iter()
                .cloned(),
        );
    }

    for new_grapheme in repetitions.iter_mut() {
        convert_repetitions(
            &new_grapheme
                .chars
                .iter()
                .map(|it| Grapheme::from(it, config))
                .collect_vec(),
            new_grapheme.repetitions.as_mut(),
            config,
        );
    }
}

fn convert_chars_to_range(chars: &[(char, char)]) -> Vec<CharRange> {
    chars
        .iter()
        .map(|&(start, end)| CharRange::closed(start, end))
        .collect_vec()
}
