import { toNumberOrUndefined } from 'scrapers/utils';
import { NETWORK_IDS } from '../constants';
import { ErrorMsg } from '../constants/errors';

export function hasTrailingSlash(url: URL) {
  return url.pathname.substr(-1) === '/';
}

export function isValidUrl(maybeUrl: string) {
  try {
    validateUrl(maybeUrl);
  } catch (err) {
    return false;
  }
  return true;
}

/** Takes a string url and returns a URL object if it's valid.
 * Throws an error that include the string url itself to behave similarly
 * to pre-node 16.
 */
export function validateUrl(maybeUrl: string) {
  try {
    const url = new URL(maybeUrl);
    if (url.protocol === 'http:' || url.protocol === 'https:') {
      return url;
    }
    throw new Error(`${ErrorMsg.INVALID_URL}: ${maybeUrl}`);
  } catch (err: any) {
    throw new Error(`${ErrorMsg.INVALID_URL}: ${maybeUrl}`);
  }
}

const matchTrailingPhoto = /\/photo\/\d+$/;
const hasPhotoTrail = (url: URL) => !!url.pathname.match(matchTrailingPhoto);

/**
 * isUrlValidForScraping takes a url and returns true if it's valid for scraping.
 * @param href The url to check
 * @returns True if the url is valid for scraping, false otherwise
 */
export function isUrlValidForScraping(href: string) {
  const url = validateUrl(href);

  if (url.pathname.length < 4) {
    return false;
  }

  if (hasTrailingSlash(url)) {
    url.pathname = url.pathname.substring(0, url.pathname.length - 1);
  }

  // Deals with cases where a url has been pasted more than once
  if (url.pathname.match(/https?:\/\//)) {
    return false;
  }

  // youtube rules
  // pathname: /watch
  // search: ?v=:id
  if (url.host.includes('youtube')) {
    if (url.pathname.includes('watch')) {
      return url.search.length > 3;
    }
    return false;
  }

  // twitter rules
  // pathname: /:username/status/:id
  if (url.host.includes('twitter') || url.host.includes('x.com')) {
    if (url.pathname.includes('/status')) {
      const id = url.pathname.split('status/').pop() || '';
      return id.length > 9;
    }
    return false;
  }

  // instagram rules
  // pathname: /p/:id (or /reel/ or /tv/ or /share)
  if (url.host.includes('instagram')) {
    if (url.pathname.match(/\/share\//)) {
      // Very lenient with share urls
      return true;
    }
    const idMatch = url.pathname.match(/\/(p|tv|reel|share)\/([^/]+)/);
    if (idMatch) {
      const id = idMatch[2];
      return id.length > 9;
    }
    return false;
  }

  if (url.host.includes('tiktok')) {
    if (url.host.includes('vt.tiktok')
      || url.host.includes('vm.tiktok')
      || (url.host.includes('m.tiktok-style'))) {
      return true;
    }
    if (url.pathname.includes('/video/') && url.pathname.includes('/@')) {
      const id = url.pathname.split('video/').pop() || '';
      return id.length >= 9;
    }
    if (url.pathname.includes('/t/')) {
      const id = url.pathname.split('t/').pop() || '';
      return id.length >= 9;
    }
    if (url.pathname.includes('/photo/')) {
      const id = url.pathname.split('photo/').pop() || '';
      return id.length >= 9;
    }

    return false;
  }

  if (url.host.includes('facebook') && url.pathname.match(/videos|posts|photos|watch|reel|share/)) {
    return true;
  }

  if (url.host.includes('fb.watch')) {
    return true;
  }

  if (isUrlTwitchVideo(url)) {
    const pathArray = url.pathname.split('/');
    const id = toNumberOrUndefined(pathArray[pathArray.length - 1]);
    if (!id) {
      return false;
    }
    return true;
  }
  if (isUrlTwitchClip(url)) {
    const pathArray = url.pathname.split('/clip/');
    const id = pathArray[pathArray.length - 1];
    if (id.length < 5) {
      return false;
    }
    return true;
  }

  return false;
}

/**
 * normalizeUrl takes a url and returns a normalized url.
 * It removes query strings, trailing slashes, and other unnecessary parts of the url.
 * It also changes the host to the canonical host if the url is a mobile url.
 * @param href The url to normalize
 * @returns The normalized url
 */
export function normalizeUrl(href: string) {
  const url = validateUrl(href);

  if (url.host.includes('mobile.twitter.com') || url.host.includes('x.com')) {
    url.host = 'twitter.com';
  }

  if (url.host.includes('twitter.com') && hasPhotoTrail(url)) {
    url.pathname = url.pathname.replace(matchTrailingPhoto, '');
  }

  if (hasTrailingSlash(url)) {
    url.pathname = url.pathname.substring(0, url.pathname.length - 1);
  }

  // Amazingly, if you just replace the /reel/ or /tv/ with a /p/, it... works?
  if (url.host.includes('instagram.com')) {
    url.pathname = url.pathname.replace('/reel/', '/p/');
    url.pathname = url.pathname.replace('/tv/', '/p/');
    // if url.pathname has a username before /p/, need to remove it
    url.pathname = url.pathname.replace(/\/[\w.]+\/p\//, '/p/');

    const idMatch = url.pathname.match(/\/(p|tv|reel)\/([^/]+)/);
    if (idMatch) {
      const id = idMatch[2];
      url.pathname = `/p/${id}`;
    }
  }

  if (url.host.includes('youtube.com') && url.pathname.includes('/shorts/')) {
    const param = url.pathname.split('/').pop() || '';
    url.pathname = url.pathname.replace('shorts/', 'watch').replace(param, '');
    url.searchParams.set('v', param);
  }

  if (url.host.includes('youtu.be')) {
    const param = url.pathname.split('/').pop() || '';
    url.pathname = '/watch';
    url.host = url.host.replace('youtu.be', 'youtube.com/');
    url.searchParams.set('v', param);
  }

  if (url.href.includes('www.youtube.com')) {
    url.href = url.href.replace('www.', '');
  }

  if (url.host.includes('tiktok.com')) {
    url.pathname = url.pathname.replace('/photo/', '/video/');
  }

  if (url.host.includes('tiktok.com')
    || url.host.includes('twitter.com')
    || url.host.includes('instagram.com')) {
    url.search = '';
  } else if (url.host.includes('youtube.com') || url.host.includes('facebook.com')) {
    const vValue = url.searchParams.get('v');
    url.search = '';
    if (vValue) {
      url.searchParams.set('v', vValue);
    }
  }

  // Remove URL fragments
  url.hash = '';

  if (url.host.includes('threads.net')) {
    url.href = urlWithoutQueryString(url);
  }
  return url;
}

/**
 * getNetworkFromUrl takes a url and returns the network id.
 * @param href The url to get the network id from
 * @returns The network id
 */
export const getNetworkFromUrl = (href: string) => {
  const url = validateUrl(href);

  if (url.pathname.includes('http://')) {
    // The url was probably pasted twice.
    throw new Error(`Invalid url: ${url}`);
  }

  if (url.host.includes('instagram.com')) {
    return NETWORK_IDS.instagram;
  }
  if (url.host.includes('facebook.com') || url.host.includes('fb.watch')) {
    return NETWORK_IDS.facebook;
  }
  if (url.host.includes('youtube.com') || url.host.includes('youtu.be')) {
    return NETWORK_IDS.youtube;
  }
  if (url.host.includes('tiktok.com')) {
    return NETWORK_IDS.tiktok;
  }
  if (url.host.includes('twitter.com') || url.host.includes('x.com')) {
    return NETWORK_IDS.twitter;
  }
  if (url.host.includes('twitch.tv')) {
    return NETWORK_IDS.twitch;
  }
  if (url.host.includes('threads.net')) {
    return NETWORK_IDS.threads;
  }
  throw new Error(`No network found for ${url}`);
};

/**
 * isUrlTwitchClip takes a url and returns true if it's a twitch clip.
 * @param url The url to check
 * @returns True if the url is a twitch clip, false otherwise
 */
export const isUrlTwitchClip = (url: URL) => url.host.includes('twitch.tv')
  && (url.pathname.includes('/clip') || url.host.includes('clips'));

/**
 * isUrlTwitchVideo takes a url and returns true if it's a twitch video.
 * @param url The url to check
 * @returns True if the url is a twitch video, false otherwise
 */
export const isUrlTwitchVideo = (url: URL) => url.host.includes('twitch.tv')
  && (url.pathname.includes('/videos/') || url.pathname.includes('/video/'));

/**
 * isUrlSupported takes a url and returns true if it's supported.
 * @param href The url to check
 * @returns True if the url is supported, false otherwise
 */
export const isUrlSupported = (href: string) => {
  const url = validateUrl(href);
  const networks = ['instagram.com', 'facebook.com', 'fb.watch', 'youtube.com', 'youtu.be', 'tiktok.com', 'twitter.com', 'twitch.tv', 'x.com', 'threads.net'];
  return networks.some((n) => url.host.includes(n));
};

/**
 * urlWithoutQueryString takes a url and returns the url without the query string.
 * @param url The url to remove the query string from
 * @returns The url without the query string
 */
export function urlWithoutQueryString(url: URL) {
  return url.href.split('?')[0];
}
