Python: Extract variables/values from source code comments

The source code in this article can be used to extract variables and values from source code comments. The code is written in Python and uses a combination of regular expressions and Python’s built-in string functions to extract specific information from source code comments.

#!/usr/bin/env python
# Author: James Cherti
# License: MIT
# URL: https://www.jamescherti.com/python-extract-variables-values-from-source-code-comments/
"""Extract variables/values from source code comments."""

import re
from typing import Dict


def get_variables_from_comments(
        source_code_content: str,
        comment_pattern: str = r'[^\w\s]+') -> Dict[str, list]:
    """Extract variables/values from source code comments.

    Source code example:
        #!/usr/bin/env python
        # This is a simple comment.
        print("Hello world")
        #
        # myvar: value 1
        # myvar: value 2
        # myvar: value 3
        # AnotherVar: value 1

    Here is how to extract the variables and their values from the
    source code above:
    >>> get_variables_from_comments(source_code_content)
    {'AnotherVar': ['value 1'], 'myvar': ['value 1', 'value 2', 'value 3']}

    """
    source_code_lines = source_code_content.splitlines()

    result: Dict[str, list] = {}
    for line in source_code_lines:
        re_str = (r'^\s*' +
                  comment_pattern +
                  r'\s*([\w\d]+)\s*:\s*(.*)\s*$')
        match_result = re.search(re_str, line)
        if match_result:
            var_name = match_result.group(1)
            var_value = match_result.group(2)

            if var_name not in result:
                result[var_name] = []

            result[var_name].append(var_value)

    return result


def main():
    """Try the method 'get_variables_from_comments()'."""

    source_code = (
        "#!/usr/bin/env python\n"
        "# This is a simple comment.\n"
        "print(\"Hello world\")\n"
        "#\n"
        "# myvar: value 1\n"
        "# myvar: value 2\n"
        "#\n"
        "# myvar: value 3\n"
        "# AnotherVar: value 1\n"
    )

    __import__('pprint').pprint(get_variables_from_comments(
        source_code_content=source_code,
        comment_pattern=re.escape('#'))
     )


if __name__ == '__main__':
    main()Code language: Python (python)

Leave a Reply

Your email address will not be published. Required fields are marked *