google_forms/node_modules/eslint/lib/rules/no-misleading-character-cla...

/**
 * @author Toru Nagashima <https://github.com/mysticatea>
 */
"use strict";

const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils");
const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
const astUtils = require("./utils/ast-utils.js");
const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------

/**
 * @typedef {import('@eslint-community/regexpp').AST.Character} Character
 * @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement
 */

/**
 * Iterate character sequences of a given nodes.
 *
 * CharacterClassRange syntax can steal a part of character sequence,
 * so this function reverts CharacterClassRange syntax and restore the sequence.
 * @param {CharacterClassElement[]} nodes The node list to iterate character sequences.
 * @returns {IterableIterator<Character[]>} The list of character sequences.
 */
function *iterateCharacterSequence(nodes) {

    /** @type {Character[]} */
    let seq = [];

    for (const node of nodes) {
        switch (node.type) {
            case "Character":
                seq.push(node);
                break;

            case "CharacterClassRange":
                seq.push(node.min);
                yield seq;
                seq = [node.max];
                break;

            case "CharacterSet":
            case "CharacterClass": // [[]] nesting character class
            case "ClassStringDisjunction": // \q{...}
            case "ExpressionCharacterClass": // [A--B]
                if (seq.length > 0) {
                    yield seq;
                    seq = [];
                }
                break;

            // no default
        }
    }

    if (seq.length > 0) {
        yield seq;
    }
}


/**
 * Checks whether the given character node is a Unicode code point escape or not.
 * @param {Character} char the character node to check.
 * @returns {boolean} `true` if the character node is a Unicode code point escape.
 */
function isUnicodeCodePointEscape(char) {
    return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
}

/**
 * Each function returns `true` if it detects that kind of problem.
 * @type {Record<string, (chars: Character[]) => boolean>}
 */
const hasCharacterSequence = {
    surrogatePairWithoutUFlag(chars) {
        return chars.some((c, i) => {
            if (i === 0) {
                return false;
            }
            const c1 = chars[i - 1];

            return (
                isSurrogatePair(c1.value, c.value) &&
                !isUnicodeCodePointEscape(c1) &&
                !isUnicodeCodePointEscape(c)
            );
        });
    },

    surrogatePair(chars) {
        return chars.some((c, i) => {
            if (i === 0) {
                return false;
            }
            const c1 = chars[i - 1];

            return (
                isSurrogatePair(c1.value, c.value) &&
                (
                    isUnicodeCodePointEscape(c1) ||
                    isUnicodeCodePointEscape(c)
                )
            );
        });
    },

    combiningClass(chars) {
        return chars.some((c, i) => (
            i !== 0 &&
            isCombiningCharacter(c.value) &&
            !isCombiningCharacter(chars[i - 1].value)
        ));
    },

    emojiModifier(chars) {
        return chars.some((c, i) => (
            i !== 0 &&
            isEmojiModifier(c.value) &&
            !isEmojiModifier(chars[i - 1].value)
        ));
    },

    regionalIndicatorSymbol(chars) {
        return chars.some((c, i) => (
            i !== 0 &&
            isRegionalIndicatorSymbol(c.value) &&
            isRegionalIndicatorSymbol(chars[i - 1].value)
        ));
    },

    zwj(chars) {
        const lastIndex = chars.length - 1;

        return chars.some((c, i) => (
            i !== 0 &&
            i !== lastIndex &&
            c.value === 0x200d &&
            chars[i - 1].value !== 0x200d &&
            chars[i + 1].value !== 0x200d
        ));
    }
};

const kinds = Object.keys(hasCharacterSequence);

//------------------------------------------------------------------------------
// Rule Definition
//------------------------------------------------------------------------------

/** @type {import('../shared/types').Rule} */
module.exports = {
    meta: {
        type: "problem",

        docs: {
            description: "Disallow characters which are made with multiple code points in character class syntax",
            recommended: true,
            url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"
        },

        hasSuggestions: true,

        schema: [],

        messages: {
            surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
            surrogatePair: "Unexpected surrogate pair in character class.",
            combiningClass: "Unexpected combined character in character class.",
            emojiModifier: "Unexpected modified Emoji in character class.",
            regionalIndicatorSymbol: "Unexpected national flag in character class.",
            zwj: "Unexpected joined character sequence in character class.",
            suggestUnicodeFlag: "Add unicode 'u' flag to regex."
        }
    },
    create(context) {
        const sourceCode = context.sourceCode;
        const parser = new RegExpParser();

        /**
         * Verify a given regular expression.
         * @param {Node} node The node to report.
         * @param {string} pattern The regular expression pattern to verify.
         * @param {string} flags The flags of the regular expression.
         * @param {Function} unicodeFixer Fixer for missing "u" flag.
         * @returns {void}
         */
        function verify(node, pattern, flags, unicodeFixer) {
            let patternNode;

            try {
                patternNode = parser.parsePattern(
                    pattern,
                    0,
                    pattern.length,
                    {
                        unicode: flags.includes("u"),
                        unicodeSets: flags.includes("v")
                    }
                );
            } catch {

                // Ignore regular expressions with syntax errors
                return;
            }

            const foundKinds = new Set();

            visitRegExpAST(patternNode, {
                onCharacterClassEnter(ccNode) {
                    for (const chars of iterateCharacterSequence(ccNode.elements)) {
                        for (const kind of kinds) {
                            if (hasCharacterSequence[kind](chars)) {
                                foundKinds.add(kind);
                            }
                        }
                    }
                }
            });

            for (const kind of foundKinds) {
                let suggest;

                if (kind === "surrogatePairWithoutUFlag") {
                    suggest = [{
                        messageId: "suggestUnicodeFlag",
                        fix: unicodeFixer
                    }];
                }

                context.report({
                    node,
                    messageId: kind,
                    suggest
                });
            }
        }

        return {
            "Literal[regex]"(node) {
                verify(node, node.regex.pattern, node.regex.flags, fixer => {
                    if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
                        return null;
                    }

                    return fixer.insertTextAfter(node, "u");
                });
            },
            "Program"(node) {
                const scope = sourceCode.getScope(node);
                const tracker = new ReferenceTracker(scope);

                /*
                 * Iterate calls of RegExp.
                 * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
                 *       `const {RegExp: a} = window; new a()`, etc...
                 */
                for (const { node: refNode } of tracker.iterateGlobalReferences({
                    RegExp: { [CALL]: true, [CONSTRUCT]: true }
                })) {
                    const [patternNode, flagsNode] = refNode.arguments;
                    const pattern = getStringIfConstant(patternNode, scope);
                    const flags = getStringIfConstant(flagsNode, scope);

                    if (typeof pattern === "string") {
                        verify(refNode, pattern, flags || "", fixer => {

                            if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
                                return null;
                            }

                            if (refNode.arguments.length === 1) {
                                const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis

                                return fixer.insertTextAfter(
                                    penultimateToken,
                                    astUtils.isCommaToken(penultimateToken)
                                        ? ' "u",'
                                        : ', "u"'
                                );
                            }

                            if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
                                const range = [flagsNode.range[0], flagsNode.range[1] - 1];

                                return fixer.insertTextAfterRange(range, "u");
                            }

                            return null;
                        });
                    }
                }
            }
        };
    }
};
Your commit message 2024-08-09 12:04:48 +00:00			`/**`
			`* @author Toru Nagashima <https://github.com/mysticatea>`
			`*/`
			`"use strict";`

Your commit message 2024-08-21 06:34:30 +00:00			`const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils");`
			`const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");`
Your commit message 2024-08-09 12:04:48 +00:00			`const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");`
Your commit message 2024-08-21 06:34:30 +00:00			`const astUtils = require("./utils/ast-utils.js");`
			`const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");`
Your commit message 2024-08-09 12:04:48 +00:00
			`//------------------------------------------------------------------------------`
			`// Helpers`
			`//------------------------------------------------------------------------------`

Your commit message 2024-08-21 06:34:30 +00:00			`/**`
			`* @typedef {import('@eslint-community/regexpp').AST.Character} Character`
			`* @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement`
			`*/`

Your commit message 2024-08-09 12:04:48 +00:00			`/**`
			`* Iterate character sequences of a given nodes.`
			`*`
			`* CharacterClassRange syntax can steal a part of character sequence,`
			`* so this function reverts CharacterClassRange syntax and restore the sequence.`
Your commit message 2024-08-21 06:34:30 +00:00			`* @param {CharacterClassElement[]} nodes The node list to iterate character sequences.`
			`* @returns {IterableIterator<Character[]>} The list of character sequences.`
Your commit message 2024-08-09 12:04:48 +00:00			`*/`
			`function *iterateCharacterSequence(nodes) {`
Your commit message 2024-08-21 06:34:30 +00:00
			`/** @type {Character[]} */`
Your commit message 2024-08-09 12:04:48 +00:00			`let seq = [];`

			`for (const node of nodes) {`
			`switch (node.type) {`
			`case "Character":`
Your commit message 2024-08-21 06:34:30 +00:00			`seq.push(node);`
Your commit message 2024-08-09 12:04:48 +00:00			`break;`

			`case "CharacterClassRange":`
Your commit message 2024-08-21 06:34:30 +00:00			`seq.push(node.min);`
Your commit message 2024-08-09 12:04:48 +00:00			`yield seq;`
Your commit message 2024-08-21 06:34:30 +00:00			`seq = [node.max];`
Your commit message 2024-08-09 12:04:48 +00:00			`break;`

			`case "CharacterSet":`
Your commit message 2024-08-21 06:34:30 +00:00			`case "CharacterClass": // [[]] nesting character class`
			`case "ClassStringDisjunction": // \q{...}`
			`case "ExpressionCharacterClass": // [A--B]`
Your commit message 2024-08-09 12:04:48 +00:00			`if (seq.length > 0) {`
			`yield seq;`
			`seq = [];`
			`}`
			`break;`

			`// no default`
			`}`
			`}`

			`if (seq.length > 0) {`
			`yield seq;`
			`}`
			`}`

Your commit message 2024-08-21 06:34:30 +00:00
			`/**`
			`* Checks whether the given character node is a Unicode code point escape or not.`
			`* @param {Character} char the character node to check.`
			* @returns {boolean} `true` if the character node is a Unicode code point escape.
			`*/`
			`function isUnicodeCodePointEscape(char) {`
			`return /^\\u\{[\da-f]+\}$/iu.test(char.raw);`
			`}`

			`/**`
			* Each function returns `true` if it detects that kind of problem.
			`* @type {Record<string, (chars: Character[]) => boolean>}`
			`*/`
Your commit message 2024-08-09 12:04:48 +00:00			`const hasCharacterSequence = {`
			`surrogatePairWithoutUFlag(chars) {`
Your commit message 2024-08-21 06:34:30 +00:00			`return chars.some((c, i) => {`
			`if (i === 0) {`
			`return false;`
			`}`
			`const c1 = chars[i - 1];`

			`return (`
			`isSurrogatePair(c1.value, c.value) &&`
			`!isUnicodeCodePointEscape(c1) &&`
			`!isUnicodeCodePointEscape(c)`
			`);`
			`});`
			`},`

			`surrogatePair(chars) {`
			`return chars.some((c, i) => {`
			`if (i === 0) {`
			`return false;`
			`}`
			`const c1 = chars[i - 1];`

			`return (`
			`isSurrogatePair(c1.value, c.value) &&`
			`(`
			`isUnicodeCodePointEscape(c1) \|\|`
			`isUnicodeCodePointEscape(c)`
			`)`
			`);`
			`});`
Your commit message 2024-08-09 12:04:48 +00:00			`},`

			`combiningClass(chars) {`
			`return chars.some((c, i) => (`
			`i !== 0 &&`
Your commit message 2024-08-21 06:34:30 +00:00			`isCombiningCharacter(c.value) &&`
			`!isCombiningCharacter(chars[i - 1].value)`
Your commit message 2024-08-09 12:04:48 +00:00			`));`
			`},`

			`emojiModifier(chars) {`
			`return chars.some((c, i) => (`
			`i !== 0 &&`
Your commit message 2024-08-21 06:34:30 +00:00			`isEmojiModifier(c.value) &&`
			`!isEmojiModifier(chars[i - 1].value)`
Your commit message 2024-08-09 12:04:48 +00:00			`));`
			`},`

			`regionalIndicatorSymbol(chars) {`
			`return chars.some((c, i) => (`
			`i !== 0 &&`
Your commit message 2024-08-21 06:34:30 +00:00			`isRegionalIndicatorSymbol(c.value) &&`
			`isRegionalIndicatorSymbol(chars[i - 1].value)`
Your commit message 2024-08-09 12:04:48 +00:00			`));`
			`},`

			`zwj(chars) {`
			`const lastIndex = chars.length - 1;`

			`return chars.some((c, i) => (`
			`i !== 0 &&`
			`i !== lastIndex &&`
Your commit message 2024-08-21 06:34:30 +00:00			`c.value === 0x200d &&`
			`chars[i - 1].value !== 0x200d &&`
			`chars[i + 1].value !== 0x200d`
Your commit message 2024-08-09 12:04:48 +00:00			`));`
			`}`
			`};`

			`const kinds = Object.keys(hasCharacterSequence);`

			`//------------------------------------------------------------------------------`
			`// Rule Definition`
			`//------------------------------------------------------------------------------`

Your commit message 2024-08-21 06:34:30 +00:00			`/** @type {import('../shared/types').Rule} */`
Your commit message 2024-08-09 12:04:48 +00:00			`module.exports = {`
			`meta: {`
			`type: "problem",`

			`docs: {`
Your commit message 2024-08-21 06:34:30 +00:00			`description: "Disallow characters which are made with multiple code points in character class syntax",`
Your commit message 2024-08-09 12:04:48 +00:00			`recommended: true,`
Your commit message 2024-08-21 06:34:30 +00:00			`url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"`
Your commit message 2024-08-09 12:04:48 +00:00			`},`

Your commit message 2024-08-21 06:34:30 +00:00			`hasSuggestions: true,`

Your commit message 2024-08-09 12:04:48 +00:00			`schema: [],`

			`messages: {`
			`surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",`
Your commit message 2024-08-21 06:34:30 +00:00			`surrogatePair: "Unexpected surrogate pair in character class.",`
Your commit message 2024-08-09 12:04:48 +00:00			`combiningClass: "Unexpected combined character in character class.",`
			`emojiModifier: "Unexpected modified Emoji in character class.",`
			`regionalIndicatorSymbol: "Unexpected national flag in character class.",`
Your commit message 2024-08-21 06:34:30 +00:00			`zwj: "Unexpected joined character sequence in character class.",`
			`suggestUnicodeFlag: "Add unicode 'u' flag to regex."`
Your commit message 2024-08-09 12:04:48 +00:00			`}`
			`},`
			`create(context) {`
Your commit message 2024-08-21 06:34:30 +00:00			`const sourceCode = context.sourceCode;`
Your commit message 2024-08-09 12:04:48 +00:00			`const parser = new RegExpParser();`

			`/**`
			`* Verify a given regular expression.`
			`* @param {Node} node The node to report.`
			`* @param {string} pattern The regular expression pattern to verify.`
			`* @param {string} flags The flags of the regular expression.`
Your commit message 2024-08-21 06:34:30 +00:00			`* @param {Function} unicodeFixer Fixer for missing "u" flag.`
Your commit message 2024-08-09 12:04:48 +00:00			`* @returns {void}`
			`*/`
Your commit message 2024-08-21 06:34:30 +00:00			`function verify(node, pattern, flags, unicodeFixer) {`
Your commit message 2024-08-09 12:04:48 +00:00			`let patternNode;`

			`try {`
			`patternNode = parser.parsePattern(`
			`pattern,`
			`0,`
			`pattern.length,`
Your commit message 2024-08-21 06:34:30 +00:00			`{`
			`unicode: flags.includes("u"),`
			`unicodeSets: flags.includes("v")`
			`}`
Your commit message 2024-08-09 12:04:48 +00:00			`);`
			`} catch {`

			`// Ignore regular expressions with syntax errors`
			`return;`
			`}`

Your commit message 2024-08-21 06:34:30 +00:00			`const foundKinds = new Set();`

Your commit message 2024-08-09 12:04:48 +00:00			`visitRegExpAST(patternNode, {`
			`onCharacterClassEnter(ccNode) {`
			`for (const chars of iterateCharacterSequence(ccNode.elements)) {`
			`for (const kind of kinds) {`
Your commit message 2024-08-21 06:34:30 +00:00			`if (hasCharacterSequence[kind](chars)) {`
			`foundKinds.add(kind);`
			`}`
Your commit message 2024-08-09 12:04:48 +00:00			`}`
			`}`
			`}`
			`});`

Your commit message 2024-08-21 06:34:30 +00:00			`for (const kind of foundKinds) {`
			`let suggest;`

			`if (kind === "surrogatePairWithoutUFlag") {`
			`suggest = [{`
			`messageId: "suggestUnicodeFlag",`
			`fix: unicodeFixer`
			`}];`
Your commit message 2024-08-09 12:04:48 +00:00			`}`
Your commit message 2024-08-21 06:34:30 +00:00
			`context.report({`
			`node,`
			`messageId: kind,`
			`suggest`
			`});`
Your commit message 2024-08-09 12:04:48 +00:00			`}`
			`}`

			`return {`
			`"Literal[regex]"(node) {`
Your commit message 2024-08-21 06:34:30 +00:00			`verify(node, node.regex.pattern, node.regex.flags, fixer => {`
			`if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {`
			`return null;`
			`}`

			`return fixer.insertTextAfter(node, "u");`
			`});`
Your commit message 2024-08-09 12:04:48 +00:00			`},`
Your commit message 2024-08-21 06:34:30 +00:00			`"Program"(node) {`
			`const scope = sourceCode.getScope(node);`
Your commit message 2024-08-09 12:04:48 +00:00			`const tracker = new ReferenceTracker(scope);`

			`/*`
			`* Iterate calls of RegExp.`
			* E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
			* `const {RegExp: a} = window; new a()`, etc...
			`*/`
Your commit message 2024-08-21 06:34:30 +00:00			`for (const { node: refNode } of tracker.iterateGlobalReferences({`
Your commit message 2024-08-09 12:04:48 +00:00			`RegExp: { [CALL]: true, [CONSTRUCT]: true }`
			`})) {`
Your commit message 2024-08-21 06:34:30 +00:00			`const [patternNode, flagsNode] = refNode.arguments;`
Your commit message 2024-08-09 12:04:48 +00:00			`const pattern = getStringIfConstant(patternNode, scope);`
			`const flags = getStringIfConstant(flagsNode, scope);`

			`if (typeof pattern === "string") {`
Your commit message 2024-08-21 06:34:30 +00:00			`verify(refNode, pattern, flags \|\| "", fixer => {`

			`if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {`
			`return null;`
			`}`

			`if (refNode.arguments.length === 1) {`
			`const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis`

			`return fixer.insertTextAfter(`
			`penultimateToken,`
			`astUtils.isCommaToken(penultimateToken)`
			`? ' "u",'`
			`: ', "u"'`
			`);`
			`}`

			`if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") \|\| flagsNode.type === "TemplateLiteral") {`
			`const range = [flagsNode.range[0], flagsNode.range[1] - 1];`

			`return fixer.insertTextAfterRange(range, "u");`
			`}`

			`return null;`
			`});`
Your commit message 2024-08-09 12:04:48 +00:00			`}`
			`}`
			`}`
			`};`
			`}`
			`};`