Skip to content

Tighten up rules for namedexpr parens #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
82bf0ed
Tighten up rules for namedexpr parens
dflook Apr 14, 2023
a9bdc14
Fix expression lists
dflook Apr 16, 2023
246bcea
Fix
dflook Apr 16, 2023
c252d00
Tighten up rules for namedexpr parens
dflook Apr 16, 2023
6f4e1a7
Use compressed corpus
dflook Apr 16, 2023
3088a82
expression_list can't have unparenthesized upacking
dflook Apr 16, 2023
61451f5
Return chooses which expression list or starred list depending on cur…
dflook Apr 16, 2023
a9ef593
Return doesn't need to return a value
dflook Apr 16, 2023
00d3be2
Fix for python <3.9
dflook Apr 16, 2023
7ad7c76
Resume from existing corpus results file
dflook Apr 17, 2023
efb7b6e
Github actions is dogshit
dflook Apr 17, 2023
f20acda
Github actions is dogshit
dflook Apr 17, 2023
78a430c
Github actions is dogshit
dflook Apr 17, 2023
a2499af
Github actions is dogshit
dflook Apr 17, 2023
eb5491a
Tuple assignment target doesn't need parenthesizing
dflook Apr 17, 2023
0af651f
Tweak assignment
dflook Apr 17, 2023
243017b
progess
dflook Apr 18, 2023
140daa1
Add more tests for Python3.11
dflook Apr 19, 2023
82a7d1a
Test with pytho2.7 and 3.6+
dflook Apr 19, 2023
ae1dd9d
Fix ifexp in ifexp and namedexpr in root of formattedvalue
dflook Apr 19, 2023
a725584
Fix for namedexpr in tuples
dflook Apr 20, 2023
84490d9
Fix for python<3.8
dflook Apr 20, 2023
6200fee
Fix namedexpr in slices
dflook Apr 21, 2023
8e9ef72
Fix corpus test for python2
dflook Apr 21, 2023
378e8f4
Fix with statements
dflook Apr 21, 2023
ffa15d1
Fix NamedExpr in dict
dflook Apr 21, 2023
920a3ea
Fix complex slices
dflook Apr 22, 2023
9fa57e2
Fix python2 ellipses in slice
dflook Apr 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_corpus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ on:
type: boolean
description: 'Regenerate results'
required: true
default: true
default: false
workflow_call:
inputs:
ref:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/xtest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

- name: Run tests
run: |

if [[ "${{ matrix.python }}" == "python3.4" ]]; then
(cd /usr/lib64/python3.4/test && python3.4 make_ssl_certs.py)
elif [[ "${{ matrix.python }}" == "python3.5" ]]; then
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ docs/source/transforms/*.min.py
.circleci-config.yml
.coverage
.mypy_cache/
NOTES.md
166 changes: 121 additions & 45 deletions corpus_test/generate_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from result import Result, ResultReader

ENHANCED_REPORT = os.environ.get('ENHANCED_REPORT', False)
ENHANCED_REPORT = os.environ.get('ENHANCED_REPORT', True)


@dataclass
Expand Down Expand Up @@ -64,6 +64,9 @@ def mean_percent_of_original(self) -> float:
def larger_than_original(self) -> Iterable[Result]:
"""Return those entries that have a larger minified size than the original size"""
for result in self.entries.values():
if result.outcome != 'Minified':
continue

if result.original_size < result.minified_size:
yield result

Expand Down Expand Up @@ -91,10 +94,18 @@ def compare_size_increase(self, base: 'ResultSet') -> Iterable[Result]:
"""

for result in self.entries.values():
if result.outcome != 'Minified':
# This result was not minified, so we can't compare
continue

if result.corpus_entry not in base.entries:
continue

base_result = base.entries[result.corpus_entry]
if base_result.outcome != 'Minified':
# The base result was not minified, so we can't compare
continue

if result.minified_size > base_result.minified_size:
yield result

Expand All @@ -104,10 +115,17 @@ def compare_size_decrease(self, base: 'ResultSet') -> Iterable[Result]:
"""

for result in self.entries.values():
if result.outcome != 'Minified':
continue

if result.corpus_entry not in base.entries:
continue

base_result = base.entries[result.corpus_entry]
if base_result.outcome != 'Minified':
# The base result was not minified, so we can't compare
continue

if result.minified_size < base_result.minified_size:
yield result

Expand Down Expand Up @@ -164,6 +182,103 @@ def format_difference(compare: Iterable[Result], base: Iterable[Result]) -> str:
else:
return s

def report_larger_than_original(results_dir: str, python_versions: str, minifier_sha: str) -> str:
yield '''
## Larger than original

| Corpus Entry | Original Size | Minified Size |
|--------------|--------------:|--------------:|'''

for python_version in python_versions:
try:
summary = result_summary(results_dir, python_version, minifier_sha)
except FileNotFoundError:
continue

larger_than_original = sorted(summary.larger_than_original(), key=lambda result: result.original_size)

for entry in larger_than_original:
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - entry.original_size:+}) |'

def report_unstable(results_dir: str, python_versions: str, minifier_sha: str) -> str:
yield '''
## Unstable

| Corpus Entry | Python Version | Original Size |
|--------------|----------------|--------------:|'''

for python_version in python_versions:
try:
summary = result_summary(results_dir, python_version, minifier_sha)
except FileNotFoundError:
continue

unstable = sorted(summary.unstable_minification(), key=lambda result: result.original_size)

for entry in unstable:
yield f'| {entry.corpus_entry} | {python_version} | {entry.original_size} |'

def report_exceptions(results_dir: str, python_versions: str, minifier_sha: str) -> str:
yield '''
## Exceptions

| Corpus Entry | Python Version | Exception |
|--------------|----------------|-----------|'''

exceptions_found = False

for python_version in python_versions:
try:
summary = result_summary(results_dir, python_version, minifier_sha)
except FileNotFoundError:
continue

exceptions = sorted(summary.exception(), key=lambda result: result.original_size)

for entry in exceptions:
exceptions_found = True
yield f'| {entry.corpus_entry} | {python_version} | {entry.outcome} |'

if not exceptions_found:
yield ' None | | |'

def report_larger_than_base(results_dir: str, python_versions: str, minifier_sha: str, base_sha: str) -> str:
yield '''
## Top 10 Larger than base

| Corpus Entry | Original Size | Minified Size |
|--------------|--------------:|--------------:|'''

there_are_some_larger_than_base = False

for python_version in python_versions:
try:
summary = result_summary(results_dir, python_version, minifier_sha)
except FileNotFoundError:
continue

base_summary = result_summary(results_dir, python_version, base_sha)
larger_than_original = sorted(summary.compare_size_increase(base_summary), key=lambda result: result.original_size)[:10]

for entry in larger_than_original:
there_are_some_larger_than_base = True
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - base_summary.entries[entry.corpus_entry].minified_size:+}) |'

if not there_are_some_larger_than_base:
yield '| N/A | N/A | N/A |'

def report_slowest(results_dir: str, python_versions: str, minifier_sha: str) -> str:
yield '''
## Top 10 Slowest

| Corpus Entry | Original Size | Minified Size | Time |
|--------------|--------------:|--------------:|-----:|'''

for python_version in python_versions:
summary = result_summary(results_dir, python_version, minifier_sha)

for entry in sorted(summary.entries.values(), key=lambda entry: entry.time, reverse=True)[:10]:
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} | {entry.time:.3f} |'

def report(results_dir: str, minifier_ref: str, minifier_sha: str, base_ref: str, base_sha: str) -> Iterable[str]:
"""
Expand Down Expand Up @@ -236,50 +351,11 @@ def format_size_change_detail() -> str:
)

if ENHANCED_REPORT:
yield '''
## Larger than original

| Corpus Entry | Original Size | Minified Size |
|--------------|--------------:|--------------:|'''

for python_version in ['3.11']:
summary = result_summary(results_dir, python_version, minifier_sha)
larger_than_original = sorted(summary.larger_than_original(), key=lambda result: result.original_size)

for entry in larger_than_original:
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - entry.original_size:+}) |'

yield '''
## Top 10 Larger than base

| Corpus Entry | Original Size | Minified Size |
|--------------|--------------:|--------------:|'''

there_are_some_larger_than_base = False

for python_version in ['3.11']:
summary = result_summary(results_dir, python_version, minifier_sha)
base_summary = result_summary(results_dir, python_version, base_sha)
larger_than_original = sorted(summary.compare_size_increase(base_summary), key=lambda result: result.original_size)[:10]

for entry in larger_than_original:
there_are_some_larger_than_base = True
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - base_summary.entries[entry.corpus_entry].minified_size:+}) |'

if not there_are_some_larger_than_base:
yield '| N/A | N/A | N/A |'

yield '''
## Top 10 Slowest

| Corpus Entry | Original Size | Minified Size | Time |
|--------------|--------------:|--------------:|-----:|'''

for python_version in ['3.11']:
summary = result_summary(results_dir, python_version, minifier_sha)

for entry in sorted(summary.entries.values(), key=lambda entry: entry.time, reverse=True)[:10]:
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} | {entry.time:.3f} |'
yield from report_larger_than_original(results_dir, ['3.11'], minifier_sha)
yield from report_larger_than_base(results_dir, ['3.11'], minifier_sha, base_sha)
yield from report_slowest(results_dir, ['3.11'], minifier_sha)
yield from report_unstable(results_dir, ['2.7', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], minifier_sha)
yield from report_exceptions(results_dir, ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], minifier_sha)


def main():
Expand Down
60 changes: 52 additions & 8 deletions corpus_test/generate_results.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import argparse
import datetime
import gzip
import os
import sys
import time


import logging


import python_minifier
from result import Result, ResultWriter

Expand All @@ -23,8 +29,13 @@ def minify_corpus_entry(corpus_path, corpus_entry):
:rtype: Result
"""

with open(os.path.join(corpus_path, corpus_entry), 'rb') as f:
source = f.read()
if os.path.isfile(os.path.join(corpus_path, corpus_entry + '.py.gz')):
with gzip.open(os.path.join(corpus_path, corpus_entry + '.py.gz'), 'rb') as f:
source = f.read()
else:
with open(os.path.join(corpus_path, corpus_entry), 'rb') as f:
source = f.read()


result = Result(corpus_entry, len(source), 0, 0, '')

Expand Down Expand Up @@ -72,21 +83,54 @@ def corpus_test(corpus_path, results_path, sha, regenerate_results):
:param str sha: The python-minifier sha we are testing
:param bool regenerate_results: Regenerate results even if they are present
"""
corpus_entries = os.listdir(corpus_path)

python_version = '.'.join([str(s) for s in sys.version_info[:2]])

log_path = 'results_' + python_version + '_' + sha + '.log'
print('Logging in GitHub Actions is absolute garbage. Logs are going to ' + log_path)

logging.basicConfig(filename=os.path.join(results_path, log_path), level=logging.DEBUG)

corpus_entries = [entry[:-len('.py.gz')] for entry in os.listdir(corpus_path)]

results_file_path = os.path.join(results_path, 'results_' + python_version + '_' + sha + '.csv')

if os.path.isfile(results_file_path) and not regenerate_results:
print('Results file already exists: %s', results_file_path)
return
if os.path.isfile(results_file_path):
logging.info('Results file already exists: %s', results_file_path)
if regenerate_results:
os.remove(results_file_path)

total_entries = len(corpus_entries)
logging.info('Testing python-minifier on %d entries' % total_entries)
tested_entries = 0

start_time = time.time()
next_checkpoint = time.time() + 60

with ResultWriter(results_file_path) as result_writer:
logging.info('%d results already present' % len(result_writer))

for entry in corpus_entries:
print(entry)
if entry in result_writer:
continue

logging.debug(entry)

result = minify_corpus_entry(corpus_path, entry)
result_writer.write(result)
tested_entries += 1

sys.stdout.flush()

if time.time() > next_checkpoint:
percent = len(result_writer) / total_entries * 100
time_per_entry = (time.time() - start_time) / tested_entries
entries_remaining = len(corpus_entries) - len(result_writer)
time_remaining = int(entries_remaining * time_per_entry)
logging.info('Tested %d/%d entries (%d%%) %s seconds remaining' % (len(result_writer), total_entries, percent, time_remaining))
sys.stdout.flush()
next_checkpoint = time.time() + 60

logging.info('Finished')

def bool_parse(value):
return value == 'true'
Expand Down
32 changes: 31 additions & 1 deletion corpus_test/result.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os


class Result(object):

def __init__(self, corpus_entry, original_size, minified_size, time, outcome):
Expand All @@ -21,15 +24,37 @@ def __init__(self, results_path):
:param str results_path: The path to the results file
"""
self._results_path = results_path
self._size = 0
self._existing_result_set = set()

if not os.path.isfile(self._results_path):
return

with open(self._results_path, 'r') as f:
for line in f:
if line != 'corpus_entry,original_size,minified_size,time,result\n':
self._existing_result_set.add(line.split(',')[0])

self._size += len(self._existing_result_set)

def __enter__(self):
self.results = open(self._results_path, 'w')
self.results = open(self._results_path, 'a')
self.results.write('corpus_entry,original_size,minified_size,time,result\n')
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.results.close()

def __contains__(self, item):
"""
:param str item: The name of the entry in the corpus
:return bool: True if the entry already exists in the results file
"""
return item in self._existing_result_set

def __len__(self):
return self._size

def write(self, result):
"""
:param Result result: The result to write to the file
Expand All @@ -41,6 +66,7 @@ def write(self, result):
str(result.time) + ',' + result.outcome + '\n'
)
self.results.flush()
self._size += 1


class ResultReader:
Expand All @@ -66,7 +92,11 @@ def __next__(self):
"""
:return Result: The next result in the file
"""

line = self.results.readline()
while line == 'corpus_entry,original_size,minified_size,time,result\n':
line = self.results.readline()

if line == '':
raise StopIteration
else:
Expand Down
Loading