import axios from "axios";
import cheerio from "cheerio";

export class Crawler {
  constructor(url, noticeCallback) {
    this.url = url.endsWith("/") ? url : url + "/";
    this.links = [];
    this.visitedLinks = [];
    this.noticeCallback = noticeCallback;
  }

  async fetch() {
    try {
      this.links = [this.url];
      while (this.links.length>0) {
        await this.getLinksFromUrl(this.links[0]);
      }
    } catch (err) {
      throw err;
    }
  }

  async getLinksFromUrl(url) {
    try {
      const response = await this.getURL(url);
      const $ = cheerio.load(response);
      //const links = $('a[href^="' + url + '"]');
      const hrefs = $("a");
      const links = Array.from(new Set(this.parseLinks($, hrefs, url)));
      if (links.length > 0) {
        links.forEach((href) => {
          if (href && !this.links.includes(href) && !this.visitedLinks.includes(href)) {
            this.links.push(href);
            this.noticeCallback(href);
          }
        });
      }
      this.links.splice(this.links.indexOf(url), 1);
      this.visitedLinks.push(url);
    } catch (err) {
      throw new Error("Unable to get links.");
    }
  }

  async getURL(url) {
    try {
      const response = await axios.get(
        `${process.env.REACT_APP_API_URL}/api/getUrl/${encodeURIComponent(url)}`
      );
      return response.data;
    } catch (err) {
      throw err;
    }
  }

  parseLinks($dom, $links, url) {
    const hrefs = [];
    const {hostname} = (new URL(url));
    $links.each(function () {
      let href = $dom(this).attr("href");
      let internalLink = href.startsWith('/') || href.startsWith(hostname.replace('https://','').replace('http://', '').replace('www.', ''));
      if (internalLink) {
        hrefs.push(href.startsWith('/') ? `https://${hostname.endsWith('/') ? hostname.slice(0, -1) : hostname}${href}` : href);
      }
    });

    return hrefs;
  }
}
