Skip to content

Commit

Permalink
Merge pull request #1285 from uccser/lzss-features
Browse files Browse the repository at this point in the history
Add text output and efficiency calculation to LZSS interactive
  • Loading branch information
eAlasdair committed Jun 30, 2020
2 parents f1444db + 8f9d759 commit 560f8e7
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 66 deletions.
Expand Up @@ -11,7 +11,7 @@ The interactive below illustrates this idea using a variation of the Ziv-Lempel

{interactive slug="lzss-compression" type="whole-page"}

In LZSS, a reference to where some text occurred before is actually two numbers: the first says how many characters to count back to where the previous phrase starts, and the second says how long the referenced phrase is.
In LZSS, a reference to where some text occurred before is actually two numbers: the first says where the referenced phrase starts, and the second says how long the referenced phrase is.
Each reference typically takes the space of about one or two characters, so the system makes a saving as long as two characters are replaced.
Of course, all characters count, not just letters of the alphabet, so the system can also refer back to the spaces between words.
In fact, some of the most common sequences are things like a full stop followed by a space.
Expand Down
6 changes: 3 additions & 3 deletions csfieldguide/static/interactives/city-trip/js/city-trip.js
Expand Up @@ -2,7 +2,7 @@ const cytoscape = require('cytoscape');
const noOverlap = require('cytoscape-no-overlap');
const automove = require('cytoscape-automove');
const Mathjs = require('mathjs');
const ha = require('./heapsAlgorithm.js');
const ha = require('./heaps-algorithm.js');

cytoscape.use(noOverlap);
cytoscape.use(automove);
Expand Down Expand Up @@ -489,7 +489,7 @@ function addStartingCityToPath(startingCity, cities) {
}


/** Start timer and begin finding permutations by calling methods in heapsAlgorithm.js. */
/** Start timer and begin finding permutations by calling methods in heaps-algorithm.js. */
function getNextPath(cy, cy2, intermediateCities, startingCity, seconds) {
// if permutations to do
if (!stopPathFinding) {
Expand All @@ -511,7 +511,7 @@ function getNextPath(cy, cy2, intermediateCities, startingCity, seconds) {
}


/** Get the next permutation from heapsAlgorithm.js and render the graph
/** Get the next permutation from heaps-algorithm.js and render the graph
* by calling testNewPath. */
function computeAndDisplayNextRoute(cy, cy2, intermediateCities, startingCity) {
var intermediateCities = ha.getNextPermutation();
Expand Down
Expand Up @@ -36,6 +36,7 @@ function compressText(message) {
var longestMatchOffset;
var longestMatchLength = 0;
var currentMatchLength = 0;
var windowCharacter;

for (var i = 0; i < slidingWindow; i++) {
// get next character in sliding window
Expand All @@ -47,12 +48,14 @@ function compressText(message) {
slidingWindow++;
stringToMatch.splice(0, 1);
// put next character on string to match
stringToMatch.push(message[slidingWindow + MAX_LENGTH]);
if (message.length > slidingWindow + MAX_LENGTH - 1) {
stringToMatch.push(message[slidingWindow + MAX_LENGTH - 1]);
}
// add newline to output
encodedMessage.push(newlineCharacter);
}

if (windowCharacter == stringToMatch[0]) {
if (stringToMatch[0] == windowCharacter) {
// record the current position as the start of the match in the sw
matchOffset = i;
currentMatchLength = 1;
Expand Down Expand Up @@ -90,7 +93,7 @@ function compressText(message) {
encodedMessage.push([longestMatchOffset, longestMatchLength]);
stringToMatch.splice(0, longestMatchLength);
slidingWindow += longestMatchLength;
} else {
} else if (stringToMatch[0]) {
numCharacters = 1;
encodedMessage.push(stringToMatch[0]);
stringToMatch.splice(0, 1);
Expand Down
@@ -1,4 +1,4 @@
const lzssAlgorithm = require('./lzssAlgorithm.js');
const lzssAlgorithm = require('./lzss-algorithm.js');

var encoded_message = [];
var start_index;
Expand All @@ -10,11 +10,15 @@ Some like it in the pot, nine days old.`);
const newlineCharacter = ':n';


// Set placeholder message
// Clear boxes and set placeholder message
window.onload = function() {
var message_div = document.getElementById('message-to-encode');
message_div.value = placeholder_message;
document.getElementById('lzss-compression-compress-button').addEventListener('click', compress, false);
document.getElementById('lzss-compression-compress-button').addEventListener('click', compress, false);

document.getElementById('message-to-decode').value = '';
document.getElementById('base-size').innerHTML = '';
document.getElementById('encoded-size').innerHTML = '';
}

// Compress the message and display the encoded message
Expand All @@ -24,6 +28,8 @@ function compress() {
document.getElementById('lzss-compression-compressed-text').innerHTML = '';
encoded_message = lzssAlgorithm.compressText(message);
drawEncodedMessage(encoded_message);
writeEncodedMessage(encoded_message);
writeEncodeEfficiency(message, encoded_message);
}

// Create a new div
Expand All @@ -33,15 +39,64 @@ function newLineDiv() {
return line_div;
}

// Output the encoded message
// Write a sentence on the before/after compression text size
function writeEncodeEfficiency(message, encoded_message) {
var unencoded_size = message.length;
var encoded_size = 0;
var flags = 0;

for (var i=0; i < encoded_message.length; i++) {
if (encoded_message[i] == newlineCharacter || encoded_message[i].length == 1) {
encoded_size++;
} else {
// Assuming a 1.5 byte reference value, we know it's a 4 bit length value
// This assumption is too low when looking at long encoded texts
encoded_size += 2;
}
flags++; // Need 1-bit flags for whether or not each item is a character or reference
}

// Round flags to an even number of bytes
flags = Math.ceil(flags / 8);
//encoded_size += flags; //TODO Decide. It is accurate but may confuse users, particularly since those flags aren't mentioned in the content

var unencoded_size_message = gettext("Base message size:") + " " + interpolate(ngettext('1 Byte', '%s Bytes', unencoded_size), [unencoded_size]);
var encoded_size_message = gettext("Approximate encoded size:") + " " + interpolate(ngettext('1 Byte', '%s Bytes', encoded_size), [encoded_size]);

document.getElementById('base-size').innerHTML = unencoded_size_message;
document.getElementById('encoded-size').innerHTML = encoded_size_message;
}

// Output the encoded message (text box)
function writeEncodedMessage(encoded_message) {
var compressed_text_div = document.getElementById('message-to-decode');
var text_to_write = "";
var item;

for (var i=0; i < encoded_message.length; i++) {
item = encoded_message[i];

if (item == newlineCharacter) {
text_to_write += "\n";
} else if (item.length == 1) { // just a single character
text_to_write += item;
} else { // a reference
text_to_write += "(" + item[0] + "," + item[1] + ")";
}
}

compressed_text_div.value = text_to_write;
}

// Output the encoded message (visual box)
function drawEncodedMessage(encoded_message) {
var compressed_text_div = document.getElementById('lzss-compression-compressed-text');

// create new div for first line of the message
var line_div = newLineDiv();

var index = 0;
for (var i = 0; i < encoded_message.length; i++) {
for (var i=0; i < encoded_message.length; i++) {
var string = encoded_message[i];

if (string == newlineCharacter) {
Expand All @@ -50,14 +105,11 @@ function drawEncodedMessage(encoded_message) {
// make a new div for the next line
var line_div = newLineDiv();
index += 1;
continue;
}

if (string.length == 1) { // i.e. just a single character
} else if (string.length == 1) { // i.e. just a single character
// add child div for character to line
var character_div = document.createElement('div');
character_div.classList.add('lzss-compression-encoded-character');
character_div.innerHTML = encoded_message[i];
character_div.innerHTML = string;
character_div.setAttribute('data-index', index);
line_div.append(character_div);
index += 1;
Expand Down
Expand Up @@ -8,7 +8,7 @@ body {

#lzss-compression {
width: 100%;
h3 {
h2 {
text-align: center;
}
#lzss-compression-wrapper {
Expand All @@ -27,57 +27,56 @@ body {
margin-top: 4px;
}
}
}

#lzss-compression-right-box {
#lzss-compression-compressed-text {
background-color: #FFF;
font-weight: bold;
box-sizing: border-box;
width: 100%;
font: 1rem "Courier New", monospace;
padding: 3px;
border: 1px solid #BFB9B7;
min-height: 120px;
.lzss-compression-encoded-line {
display: flex;
align-items: center;
flex-wrap: wrap;
min-height: 25px;
}
.lzss-compression-encoded-character,
#lzss-compression-bottom-box {
margin: 10px;
#lzss-compression-compressed-text {
background-color: #FFF;
font-weight: bold;
box-sizing: border-box;
width: 100%;
font: 1rem "Courier New", monospace;
padding: 3px;
border: 1px solid #BFB9B7;
min-height: 120px;
border-radius: 2px !important;
box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);
.lzss-compression-encoded-line {
display: flex;
align-items: center;
flex-wrap: wrap;
min-height: 25px;
}
.lzss-compression-encoded-character,
input.lzss-compression-placeholder-box {
display: inline-block;
min-width: 10px;
min-height: 1.2rem;
margin: 1px;
}
.lzss-compression-reference {
display: flex;
flex-wrap: wrap;
margin: 2px;
padding: 2px;
input.lzss-compression-placeholder-box {
display: inline-block;
min-width: 10px;
min-height: 1.2rem;
margin: 1px;
}
.lzss-compression-reference {
display: flex;
flex-wrap: wrap;
margin: 2px;
padding: 2px;
input.lzss-compression-placeholder-box {
width: 1rem;
border: 1px solid #BFB9B7;
}
width: 1rem;
border: 1px solid #BFB9B7;
}
}
}

#lzss-compression-right-box #lzss-compression-compressed-text {
border-radius: 2px !important;
box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12)
}
}
}

#message-to-encode {
#message-to-encode, #message-to-decode {
font: 1rem "Courier New", monospace;
font-size: 1.2rem;
}

#char-replacement-note {
.subtext {
font-size: 0.8rem;
margin: 0px;
color: grey;
}

Expand All @@ -96,11 +95,8 @@ body {
#lzss-compression-wrapper {
flex-direction: column;
#lzss-compression-left-box,
#lzss-compression-right-box {
#lzss-compression-bottom-box {
width: unset;
p {
margin: 2px;
}
}
}
}
Expand Down
24 changes: 18 additions & 6 deletions csfieldguide/templates/interactives/lzss-compression.html
Expand Up @@ -6,25 +6,37 @@
{% block html %}
<div class="container">
<div id="lzss-compression">
<h3>{% trans "LZSS Compression" %}</h3>
<h2>{% trans "LZSS Compression" %}</h2>
<div id="lzss-compression-wrapper">
<div id="lzss-compression-left-box">
<p>{% trans "Enter the message you want to encode here" %}:</p>
<h5>{% trans "Enter the message you want to encode here" %}:</h5>
<p class="subtext">{% trans "Note: spaces are replaced with the &#9251; character." %}</p>
<div id="lzss-compression-input-text">
<textarea class="form-control" rows="5" id="message-to-encode"></textarea>
</div>
<div>
<button id="lzss-compression-compress-button" class="btn btn-primary btn-block" type="button">
{% trans "Compress" %} &rarr;
{% trans "Compress" %} &rarr;
</button>
</div>
</div>
<div id="lzss-compression-right-box">
<p>{% trans "Encoded Message" %}:</p>
<div id="lzss-compression-compressed-text"></div>
<p id="char-replacement-note">{% trans "Note: spaces are replaced with the &#9251; character." %}</p>
<h5>{% trans "Encoded Message (Text)" %}:</h5>
<p class="subtext">{% trans "Each reference is shown in parentheses: (start, length)." %}</p>
<div id="lzss-compression-output-text">
<textarea class="form-control" rows="5" id="message-to-decode" readonly></textarea>
</div>
<div id="result-note">
<div id="base-size"></div>
<div id="encoded-size"></div>
</div>
</div>
</div>
<div id="lzss-compression-bottom-box">
<h5>{% trans "Encoded Message (Visual)" %}:</h5>
<p class="subtext">{% trans "Hover over the encoded portions to see what they reference." %}</p>
<div id="lzss-compression-compressed-text"></div>
</div>
</div>
</div>
{% endblock html %}
Expand Down

0 comments on commit 560f8e7

Please sign in to comment.