import sanitizeHtmlBase, { IOptions } from 'sanitize-html';

/** Domains allowed in both embedded scripts and iframes **/
export const ALLOWED_DOMAINS = [
    'bright.nl',
    'gpblog.com',
    'vi.nl',
    'voetbalnieuws.nl',
    'voetbalprimeur.nl',
    'babybytes.nl',
    'instagram.com',
    'platform.twitter.com',
    'player.vimeo.com',
    'twitter.com',
    'v.24liveblog.com',
    'www.instagram.com',
    'www.youtube.com',
    'youtu.be',
    'tiktok.com',
];

export const ALLOWED_IFRAME_DOMAINS = [
    ...ALLOWED_DOMAINS,
    'albumizr.com',
    'art19.com',
    'dailymotion.com', // Last used in 2022 (VP), 2018 (GP)
    'embed.kijk.nl',
    'html5-player.libsyn.com', // Last used in 2022
    'omny.fm', // Last used in 2022
    'open.spotify.com',
    'podcastluisteren.nl', // Last used in 2021
    'widget.roninmedia.io',
    'www.facebook.com',
];

export const ALLOWED_SCRIPT_DOMAINS = [...ALLOWED_DOMAINS];

const DISALLOWED_TAGS: (keyof HTMLElementTagNameMap)[] = ['style', 'textarea', 'option', 'noscript'];
const ALLOWED_EXTRA_TAGS: (keyof HTMLElementTagNameMap)[] = ['script', 'iframe', 'img'];
const ALLOWED_CUSTOM_TAGS = ['viaplay-embed'];

const PURIFY_OPTIONS: IOptions = {
    allowedTags: [...sanitizeHtmlBase.defaults.allowedTags, ...ALLOWED_EXTRA_TAGS, ...ALLOWED_CUSTOM_TAGS],
    allowedAttributes: false,
    parseStyleAttributes: false,
    nonTextTags: DISALLOWED_TAGS,
    allowedIframeDomains: ALLOWED_IFRAME_DOMAINS,
    allowedScriptDomains: ALLOWED_SCRIPT_DOMAINS,
    allowVulnerableTags: true,
    disallowedTagsMode: 'escape',
};

const ALLOWED_SCRIPT_REGEX = new RegExp(ALLOWED_SCRIPT_DOMAINS.join('|').replace(/\./g, '\\.'), 'i');
const ALLOWED_IFRAME_REGEX = new RegExp(ALLOWED_IFRAME_DOMAINS.join('|').replace(/\./g, '\\.'), 'i');
const VIAPLAY_EMBED_REGEX = /embed\.viaplay\.com/;

export const checkIfAllowedScriptSrc = (src: string) => ALLOWED_SCRIPT_REGEX.test(src);
export const checkIfAllowedIframeSrc = (src: string) => ALLOWED_IFRAME_REGEX.test(src);
export const checkIfViaplayEmbed = (src: string) => VIAPLAY_EMBED_REGEX.test(src);

/**
 * Substitute Viaplay embeds with custom tags to skip sanitization and retain the inline script.
 * Should be removed in
 * FIXME: PB-5395 - Remove the ability to embed dangerous scripts
 */
const replaceViaplayScripts = (html: string): string => {
    // Handle div to via-play-embed replacement
    // Regular expression to match divs with data-tag attribute
    const divStyleRegex = /<div class="ndm-load-via-play" data-tag="([^"]+)"><\/div>/g;
    // Replace matched divs with viaplay-embed elements
    html = html.replace(divStyleRegex, '<viaplay-embed data-tag="$1"></viaplay-embed>');

    const scriptMatches = html.match(/<script[\s\S]*?>[\s\S]*?<\/script>/g);
    const videoIDRegex = /(?:\(["'])([\d\w-]+)(?:["']\))/g;
    scriptMatches?.forEach((script) => {
        if (script.includes('ndm-load-via-script')) {
            html = html.replace(script, '');
        } else if (script.includes('embed.viaplay')) {
            const videoIDMatches = [...script.matchAll(videoIDRegex)];
            if (videoIDMatches) {
                const videoID = videoIDMatches[videoIDMatches.length - 1][1];
                html = html.replace(script, `<viaplay-embed data-tag="${videoID}"></viaplay-embed>`);
            }
        }
    });

    return html;
};

export const sanitizeHtml = (html: string): string => {
    return sanitizeHtmlBase(replaceViaplayScripts(html), PURIFY_OPTIONS).replace(/\n|\r/g, '');
};
