"""Detect all Python scripts in HTML pages in current folder and subfolders.
"""
import os
import html.parser
import ast
# start by getting all modules in the Brython standard distribution
# they must be in brython_stdlib.js somewhere in the current directory
# or below
for dirname, dirnames, filenames in os.getcwd():
for file in filenames:
if filename == "brython_stdlib.js":
print('found')
class CharsetDetector(html.parser.HTMLParser):
def __init__(self, *args, **kw):
kw.setdefault('convert_charrefs', True)
try:
html.parser.HTMLParser.__init__(self, *args, **kw)
except TypeError:
# convert_charrefs is only supported by Python 3.4+
del kw['convert_charrefs']
html.parser.HTMLParser.__init__(self, *args, **kw)
self.encoding = "iso-8859-1"
def handle_starttag(self, tag, attrs):
if tag.lower() == "meta":
for key, value in attrs:
if key == "charset":
self.encoding = value
class Parser(html.parser.HTMLParser):
def __init__(self, *args, **kw):
kw.setdefault('convert_charrefs', True)
try:
html.parser.HTMLParser.__init__(self, *args, **kw)
except TypeError:
# convert_charrefs is only supported by Python 3.4+
del kw['convert_charrefs']
html.parser.HTMLParser.__init__(self, *args, **kw)
self.scripts = []
self.py_tags = [] # stack of Python blocks
self.tag_stack = []
def handle_starttag(self, tag, attrs):
if tag.lower() == "script":
for key, value in attrs:
if key == 'type' and value == "text/python":
self.tag_stack.append("py_script")
break
else:
self.tag_stack.append("js_script")
def handle_endtag(self, tag):
if tag.lower() == "script":
self.tag_stack.pop()
def handle_data(self, data):
"""Data is printed unchanged"""
if data.strip():
if self.tag_stack and self.tag_stack[-1].lower() == "py_script":
self.scripts.append(data)
class Visitor(ast.NodeVisitor):
def __init__(self, *args, **kw):
ast.NodeVisitor.__init__(self, *args, **kw)
self.imports = set()
def visit_Import(self, node):
for name in node.names:
self.imports.add(name.name)
def get_imports(src):
"""Get all imports in source code src."""
tree = ast.parse(src)
visitor = Visitor()
visitor.visit(tree)
return list(visitor.imports)
for dirname, dirnames, filenames in os.walk(os.getcwd()):
for filename in filenames:
ext = os.path.splitext(filename)[1]
if ext.lower() == '.html':
path = os.path.join(dirname, filename)
charset_detector = CharsetDetector()
with open(path, encoding="iso-8859-1") as fobj:
charset_detector.feed(fobj.read())
parser = Parser()
with open(path, encoding=charset_detector.encoding) as fobj:
parser.feed(fobj.read())
if parser.scripts:
print(filename, len(parser.scripts), "scripts")
elif ext.lower() == '.py':
print(filename)
path = os.path.join(dirname, filename)
with open(path, encoding="utf-8") as fobj:
try:
imports = get_imports(fobj.read())
if imports:
print(filename, 'imports', imports)
except SyntaxError:
print('syntax error', path)