import GraphemeSplitter from 'grapheme-splitter'
import {
  Emphasis,
  Html,
  Paragraph,
  RootContent,
  Strong,
  TableCell,
  Text,
  Link,
  Delete,
} from 'mdast'

import * as Types from './types'
import { extractText } from './utils'

export type TokenAttributes = {
  token: string
  attributes: string[]
  /**
   * Helper to tell our diff algo not to add a space either before or after
   * the token. This is useful for things like "这是" where each character
   * is a token but we don't want a space between them.
   */
  noSpace?: {
    before: boolean
    after: boolean
  }
  skipCount?: number
}

// An inline citation consists of 3 elements: html, text, html
const CITATION_ELEMENT_COUNT = 3

/**
 * Takes in a parent node and returns an array of tokens with attributes.
 * A token is a string (ie. word) with formatting attributes.
 * This allows us to compare two paragraphs and highlight the differences,
 * including formatting differences. For example, if one paragraph has a
 * word in bold and the other doesn't, we can highlight that word by doing
 * diff ({text: 'word', attributes: ['bold']}, {text: 'word', attributes: []}).
 */
export class ParagraphTokenizer {
  public tokenizeParagraph(
    node: TableCell | Paragraph | Strong | Emphasis | Delete | Link | null,
    attributes: string[] = []
  ): TokenAttributes[] {
    if (!node || !Types.isParent(node)) return []

    const result: TokenAttributes[] = []
    let skipCount = 0

    for (let i = 0; i < node.children.length; i++) {
      if (skipCount > 0) {
        skipCount--
        continue
      }

      const child = node.children[i]

      if (Types.isText(child)) {
        result.push(...this.tokenizeText(child, attributes))
      } else if (
        Types.isStrong(child) ||
        Types.isEmphasis(child) ||
        Types.isDelete(child)
      ) {
        result.push(...this.tokenizeStrongOrEmphasisOrDelete(child, attributes))
      } else if (Types.isHtml(child)) {
        const tokenizedHtml = this.tokenizeHtml({
          child,
          children: node.children as any,
          index: i,
          attributes,
        })
        result.push(...tokenizedHtml)
        if (tokenizedHtml[0].skipCount) skipCount = tokenizedHtml[0].skipCount
      } else if (Types.isLink(child)) {
        result.push(...this.tokenizeLink(child, attributes))
      } else if (Types.isLineBreak(child)) {
        result.push({
          token: '  \n',
          attributes,
          noSpace: { before: true, after: true },
        })
      } else if (Types.isImage(child)) {
        result.push({
          token: `![${child.alt}](${child.url})`,
          attributes,
        })
      } else if (Types.isFootnoteReference(child)) {
        result.push({
          token: `[^${child.identifier}]`,
          attributes,
          noSpace: { before: true, after: true },
        })
      } else {
        console.error('Unexpected node type in tokenizeParagraph:', child.type)
      }
    }

    return result
  }

  public tokenizeText(child: Text, attributes: string[]) {
    const tokens: Array<{
      token: string
      attributes: string[]
      noSpace: { before: boolean; after: boolean }
    }> = []

    let currentToken = ''

    const chars = new GraphemeSplitter().splitGraphemes(child.value)

    for (let i = 0; i < chars.length; i++) {
      const char = chars[i]

      if (char === ' ') {
        if (currentToken) {
          tokens.push({
            token: currentToken,
            attributes,
            noSpace: {
              before: false,
              after: false,
            },
          })
          currentToken = ''
        }
      } else {
        if (this.isComplexCharacter(char)) {
          if (currentToken.length > 0) {
            tokens.push({
              token: currentToken,
              attributes,
              noSpace: {
                before: i !== 0 && chars[i - 1] !== ' ',
                after: i !== chars.length - 1 && chars[i + 1] !== ' ',
              },
            })
          }

          tokens.push({
            token: char,
            attributes,
            noSpace: {
              before: i !== 0 && chars[i - 1] !== ' ',
              after: i !== chars.length - 1 && chars[i + 1] !== ' ',
            },
          })

          currentToken = ''
        } else {
          currentToken += char
        }
      }
    }

    // Add the last token if there is one
    if (currentToken) {
      tokens.push({
        token: currentToken,
        attributes,
        noSpace: {
          before: false,
          after: false,
        },
      })
    }

    return tokens
  }

  private isComplexCharacter(char: string): boolean {
    return (
      char.length > 1 ||
      (char.charCodeAt(0) > 127 && !this.isLatinExtended(char))
    )
  }

  private isLatinExtended(char: string): boolean {
    const code = char.charCodeAt(0)
    return (code >= 128 && code <= 255) || (code >= 256 && code <= 383)
  }

  private tokenizeStrongOrEmphasisOrDelete(
    child: Strong | Emphasis | Delete,
    attributes: string[]
  ): TokenAttributes[] {
    const newAttributes = [...attributes, child.type]
    return this.tokenizeParagraph(child, newAttributes)
  }

  private tokenizeHtml(params: {
    child: Html
    children: (Text | Strong | Emphasis | Html | Link)[]
    index: number
    attributes: string[]
  }): TokenAttributes[] {
    const { child, children, index, attributes } = params
    const citation = this.nextChildrenAreCitation(children.slice(index))
    if (citation.match) {
      return [
        {
          token: citation.nextThreeString as string,
          attributes,
          skipCount: 2,
        },
      ]
    } else {
      return [{ token: child.value, attributes }]
    }
  }

  private tokenizeLink(child: Link, attributes: string[]): TokenAttributes[] {
    const newAttributes = [...attributes, 'link']
    return [
      {
        token: `[${extractText(child)}](${child.url})`,
        attributes: newAttributes,
      },
    ]
  }

  /**
   * A citation is made up of 3 elements: html, text, html. They should also
   * match the regex pattern. If the next 3 elements are a citation, return
   * true and the next three elements as a string. Otherwise, return false.
   */
  private nextChildrenAreCitation(children: RootContent[]): {
    match: boolean
    nextThreeString?: string
  } {
    if (children.length < CITATION_ELEMENT_COUNT) return { match: false }

    const [first, second, third] = children
    if (Types.isHtml(first) && Types.isText(second) && Types.isHtml(third)) {
      const nextThreeString = `${first.value}${second.value}${third.value}`
      const regex =
        /<span data-hrvy-id="[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}">.*?<\/span>/gs

      const match = regex.test(nextThreeString)

      return { match, nextThreeString }
    }

    return { match: false }
  }
}
