import { justBody } from '@/std/api-contract'
import { FetchApiRouteClient } from '@/std/api-contract/route-client'
import { flow, pipe } from '@/std/function'
import { RemoteAction, RemoteResource, TR } from '@/std/remote'
import { getClientContext } from '@mindpalace/shared/client.context'
import { contract } from './contract'

const ScrapePage = () => {
  const { fetch } = getClientContext()
  const client = flow(FetchApiRouteClient(contract, fetch), justBody)
  const scrape = (url: URL) =>
    pipe(
      client({ searchParams: { url } }),
      TR.map(parseHtml),
      TR.map((document) => ({
        url,
        lang: document.querySelector('html')?.getAttribute('lang') ?? 'en',
        node: pipe(
          document,
          extractMainContent,
          stripDoc,
          removeCustomElements,
          removeIframes,
          fixImagesSrc(url),
          fixAnchorsHref(url),
        ),
      })),
    )
  return scrape
}

export type ScrapedPage = Element

export type ScrapePageResource = ReturnType<typeof ScrapePageResource>
export const ScrapePageResource = (url: URL) => {
  const scrape = ScrapePage()
  return RemoteResource(() => scrape(url))
}

export type ScrapePageAction = ReturnType<typeof ScrapePageAction>
export const ScrapePageAction = () => RemoteAction(ScrapePage())

const parseHtml = (html: string) => {
  const parser = new DOMParser()
  return parser.parseFromString(html, 'text/html')
}

const extractMainContent = (doc: Document) => {
  return (
    doc.querySelector('main') ??
    doc.querySelector('[role=main]') ??
    doc.querySelector('article')?.parentElement ??
    (() => {
      const div = doc.createElement('div')
      div.append(...doc.body.children)
      return div
    })()
  )
}

const removeAttr = (doc: Element, attr: string) => {
  for (const withAttr of doc.querySelectorAll(`[${attr}]`))
    withAttr.removeAttribute(attr)
}

const stripDoc = (doc: Element) => {
  removeAttr(doc, 'class')
  removeAttr(doc, 'style')
  return doc
}
const removeCustomElements = (node: Element) => {
  const regex = /<[a-z]+-[a-z\-]+/g
  const matches = node.innerHTML.match(regex) ?? []
  const custom = new Set(matches.map((s: string) => s.slice(1)))
  custom.forEach((el) => {
    for (const custom of node.querySelectorAll(el)) {
      if (!custom.hasChildNodes()) {
        custom.remove()
        continue
      }
      for (const attr of custom.attributes) {
        custom.removeAttribute(attr.name)
      }
    }
  })
  return node
}

const removeIframes = (node: Element) => {
  for (const iframe of node.querySelectorAll('iframe')) {
    iframe.remove()
  }
  return node
}

const fixImagesSrc = (url: URL) => (root: Element) => {
  for (const img of root.querySelectorAll<HTMLImageElement>('img[src]')) {
    img.src = new URL(img.getAttribute('src') ?? '', url).href
  }
  return root
}

const fixAnchorsHref = (url: URL) => (root: Element) => {
  for (const anchor of root.querySelectorAll<HTMLAnchorElement>('a[href]')) {
    const hrefWithoutOrigin = anchor.href.replace(window.origin, '')
    anchor.href = new URL(hrefWithoutOrigin, url).href
  }
  return root
}
