In [1]:
# look at tools/set_up_magics.ipynb
yandex_metrica_allowed = True ; get_ipython().run_cell('# one_liner_str\n\nget_ipython().run_cell_magic(\'javascript\', \'\', \n    \'// setup cpp code highlighting\\n\'\n    \'IPython.CodeCell.options_default.highlight_modes["text/x-c++src"] = {\\\'reg\\\':[/^%%cpp/]} ;\'\n    \'IPython.CodeCell.options_default.highlight_modes["text/x-cmake"] = {\\\'reg\\\':[/^%%cmake/]} ;\'\n    \'IPython.CodeCell.options_default.highlight_modes["text/x-sql"] = {\\\'reg\\\':[/^%%sql/]} ;\'\n)\n\n# creating magics\nfrom IPython.core.magic import register_cell_magic, register_line_magic\nfrom IPython.display import display, Markdown, HTML\nimport argparse\nfrom subprocess import Popen, PIPE, STDOUT, check_output\nimport html\nimport random\nimport sys\nimport os\nimport re\nimport signal\nimport shutil\nimport shlex\nimport glob\nimport time\n\n@register_cell_magic\ndef save_file(args_str, cell, line_comment_start="#"):\n    parser = argparse.ArgumentParser()\n    parser.add_argument("fname")\n    parser.add_argument("--ejudge-style", action="store_true")\n    parser.add_argument("--under-spoiler-threshold", type=int, default=None)\n    args = parser.parse_args(args_str.split())\n    \n    cell = cell if cell[-1] == \'\\n\' or args.no_eof_newline else cell + "\\n"\n    cmds = []\n    with open(args.fname, "w") as f:\n        f.write(line_comment_start + " %%cpp " + args_str + "\\n")\n        for line in cell.split("\\n"):\n            line_to_write = (line if not args.ejudge_style else line.rstrip()) + "\\n"\n            if not line.startswith("%"):\n                f.write(line_to_write)\n            else:\n                f.write(line_comment_start + " " + line_to_write)\n                run_prefix = "%run "\n                md_prefix = "%MD "\n                comment_prefix = "%" + line_comment_start\n                if line.startswith(run_prefix):\n                    cmds.append(line[len(run_prefix):].strip())\n                elif line.startswith(md_prefix):\n                    cmds.append(\'#<MD>\' + line[len(md_prefix):].strip())\n                elif line.startswith(comment_prefix):\n                    cmds.append(\'#\' + line[len(comment_prefix):].strip())\n                else:\n                    raise Exception("Unknown %%save_file subcommand: \'%s\'" % line)\n                \n        f.write("" if not args.ejudge_style else line_comment_start + r" line without \\n")\n    for cmd in cmds:\n        if cmd.startswith(\'#\'):\n            if cmd.startswith(\'#<MD>\'):\n                display(Markdown(cmd[5:]))\n            else:\n                display(Markdown("\\#\\#\\#\\# `%s`" % cmd[1:]))\n        else:\n            display(Markdown("Run: `%s`" % cmd))\n            if args.under_spoiler_threshold:\n                out = check_output(cmd, stderr=STDOUT, shell=True, universal_newlines=True)\n                out = out[:-1] if out.endswith(\'\\n\') else out\n                out = html.escape(out)\n                if len(out.split(\'\\n\')) > args.under_spoiler_threshold:\n                    out = "<details> <summary> output </summary> <pre><code>%s</code></pre></details>" % out\n                elif out:\n                    out = "<pre><code>%s</code></pre>" % out\n                if out:\n                    display(HTML(out))\n            else:\n                get_ipython().system(cmd)\n\n@register_cell_magic\ndef cpp(fname, cell):\n    save_file(fname, cell, "//")\n    \n@register_cell_magic\ndef cmake(fname, cell):\n    save_file(fname, cell, "#")\n\n@register_cell_magic\ndef asm(fname, cell):\n    save_file(fname, cell, "//")\n    \n@register_cell_magic\ndef makefile(fname, cell):\n    fname = fname or "makefile"\n    assert fname.endswith("makefile")\n    save_file(fname, cell.replace(" " * 4, "\\t"))\n        \n@register_line_magic\ndef p(line):\n    line = line.strip() \n    if line[0] == \'#\':\n        display(Markdown(line[1:].strip()))\n    else:\n        try:\n            expr, comment = line.split(" #")\n            display(Markdown("`{} = {}`  # {}".format(expr.strip(), eval(expr), comment.strip())))\n        except:\n            display(Markdown("{} = {}".format(line, eval(line))))\n    \n    \ndef show_log_file(file, return_html_string=False):\n    obj = file.replace(\'.\', \'_\').replace(\'/\', \'_\') + "_obj"\n    html_string = \'\'\'\n        <!--MD_BEGIN_FILTER-->\n        <script type=text/javascript>\n        var entrance___OBJ__ = 0;\n        var errors___OBJ__ = 0;\n        function halt__OBJ__(elem, color)\n        {\n            elem.setAttribute("style", "font-size: 14px; background: " + color + "; padding: 10px; border: 3px; border-radius: 5px; color: white; ");                    \n        }\n        function refresh__OBJ__()\n        {\n            entrance___OBJ__ -= 1;\n            if (entrance___OBJ__ < 0) {\n                entrance___OBJ__ = 0;\n            }\n            var elem = document.getElementById("__OBJ__");\n            if (elem) {\n                var xmlhttp=new XMLHttpRequest();\n                xmlhttp.onreadystatechange=function()\n                {\n                    var elem = document.getElementById("__OBJ__");\n                    console.log(!!elem, xmlhttp.readyState, xmlhttp.status, entrance___OBJ__);\n                    if (elem && xmlhttp.readyState==4) {\n                        if (xmlhttp.status==200)\n                        {\n                            errors___OBJ__ = 0;\n                            if (!entrance___OBJ__) {\n                                if (elem.innerHTML != xmlhttp.responseText) {\n                                    elem.innerHTML = xmlhttp.responseText;\n                                }\n                                if (elem.innerHTML.includes("Process finished.")) {\n                                    halt__OBJ__(elem, "#333333");\n                                } else {\n                                    entrance___OBJ__ += 1;\n                                    console.log("req");\n                                    window.setTimeout("refresh__OBJ__()", 300); \n                                }\n                            }\n                            return xmlhttp.responseText;\n                        } else {\n                            errors___OBJ__ += 1;\n                            if (!entrance___OBJ__) {\n                                if (errors___OBJ__ < 6) {\n                                    entrance___OBJ__ += 1;\n                                    console.log("req");\n                                    window.setTimeout("refresh__OBJ__()", 300); \n                                } else {\n                                    halt__OBJ__(elem, "#994444");\n                                }\n                            }\n                        }\n                    }\n                }\n                xmlhttp.open("GET", "__FILE__", true);\n                xmlhttp.setRequestHeader("Cache-Control", "no-cache");\n                xmlhttp.send();     \n            }\n        }\n        \n        if (!entrance___OBJ__) {\n            entrance___OBJ__ += 1;\n            refresh__OBJ__(); \n        }\n        </script>\n\n        <p id="__OBJ__" style="font-size: 14px; background: #000000; padding: 10px; border: 3px; border-radius: 5px; color: white; ">\n        </p>\n        \n        </font>\n        <!--MD_END_FILTER-->\n        <!--MD_FROM_FILE __FILE__.md -->\n        \'\'\'.replace("__OBJ__", obj).replace("__FILE__", file)\n    if return_html_string:\n        return html_string\n    display(HTML(html_string))\n\n    \nclass TInteractiveLauncher:\n    tmp_path = "./interactive_launcher_tmp"\n    def __init__(self, cmd):\n        try:\n            os.mkdir(TInteractiveLauncher.tmp_path)\n        except:\n            pass\n        name = str(random.randint(0, 1e18))\n        self.inq_path = os.path.join(TInteractiveLauncher.tmp_path, name + ".inq")\n        self.log_path = os.path.join(TInteractiveLauncher.tmp_path, name + ".log")\n        \n        os.mkfifo(self.inq_path)\n        open(self.log_path, \'w\').close()\n        open(self.log_path + ".md", \'w\').close()\n\n        self.pid = os.fork()\n        if self.pid == -1:\n            print("Error")\n        if self.pid == 0:\n            exe_cands = glob.glob("../tools/launcher.py") + glob.glob("../../tools/launcher.py")\n            assert(len(exe_cands) == 1)\n            assert(os.execvp("python3", ["python3", exe_cands[0], "-l", self.log_path, "-i", self.inq_path, "-c", cmd]) == 0)\n        self.inq_f = open(self.inq_path, "w")\n        interactive_launcher_opened_set.add(self.pid)\n        show_log_file(self.log_path)\n\n    def write(self, s):\n        s = s.encode()\n        assert len(s) == os.write(self.inq_f.fileno(), s)\n        \n    def get_pid(self):\n        n = 100\n        for i in range(n):\n            try:\n                return int(re.findall(r"PID = (\\d+)", open(self.log_path).readline())[0])\n            except:\n                if i + 1 == n:\n                    raise\n                time.sleep(0.1)\n        \n    def input_queue_path(self):\n        return self.inq_path\n        \n    def wait_stop(self, timeout):\n        for i in range(int(timeout * 10)):\n            wpid, status = os.waitpid(self.pid, os.WNOHANG)\n            if wpid != 0:\n                return True\n            time.sleep(0.1)\n        return False\n        \n    def close(self, timeout=3):\n        self.inq_f.close()\n        if not self.wait_stop(timeout):\n            os.kill(self.get_pid(), signal.SIGKILL)\n            os.waitpid(self.pid, 0)\n        os.remove(self.inq_path)\n        # os.remove(self.log_path)\n        self.inq_path = None\n        self.log_path = None \n        interactive_launcher_opened_set.remove(self.pid)\n        self.pid = None\n        \n    @staticmethod\n    def terminate_all():\n        if "interactive_launcher_opened_set" not in globals():\n            globals()["interactive_launcher_opened_set"] = set()\n        global interactive_launcher_opened_set\n        for pid in interactive_launcher_opened_set:\n            print("Terminate pid=" + str(pid), file=sys.stderr)\n            os.kill(pid, signal.SIGKILL)\n            os.waitpid(pid, 0)\n        interactive_launcher_opened_set = set()\n        if os.path.exists(TInteractiveLauncher.tmp_path):\n            shutil.rmtree(TInteractiveLauncher.tmp_path)\n    \nTInteractiveLauncher.terminate_all()\n   \nyandex_metrica_allowed = bool(globals().get("yandex_metrica_allowed", False))\nif yandex_metrica_allowed:\n    display(HTML(\'\'\'<!-- YANDEX_METRICA_BEGIN -->\n    <script type="text/javascript" >\n       (function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)};\n       m[i].l=1*new Date();k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)})\n       (window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym");\n\n       ym(59260609, "init", {\n            clickmap:true,\n            trackLinks:true,\n            accurateTrackBounce:true\n       });\n    </script>\n    <noscript><div><img src="https://mc.yandex.ru/watch/59260609" style="position:absolute; left:-9999px;" alt="" /></div></noscript>\n    <!-- YANDEX_METRICA_END -->\'\'\'))\n\ndef make_oneliner():\n    html_text = \'("–í —ç—Ç–æ—Ç –Ω–æ—É—Ç–±—É–∫ –≤—Å—Ç—Ä–æ–µ–Ω –∫–æ–¥ –Ø–Ω–¥–µ–∫—Å –ú–µ—Ç—Ä–∏–∫–∏ –¥–ª—è —Å–±–æ—Ä–∞ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–π. –ï—Å–ª–∏ –≤—ã –Ω–µ —Ö–æ—Ç–∏—Ç–µ, —á—Ç–æ–±—ã –ø–æ –≤–∞–º —Å–æ–±–∏—Ä–∞–ª–∞—Å—å —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞, –∏—Å–ø—Ä–∞–≤—å—Ç–µ: yandex_metrica_allowed = False" if yandex_metrica_allowed else "")\'\n    html_text += \' + "<""!-- MAGICS_SETUP_PRINTING_END -->"\'\n    return \'\'.join([\n        \'# look at tools/set_up_magics.ipynb\\n\',\n        \'yandex_metrica_allowed = True ; get_ipython().run_cell(%s);\' % repr(one_liner_str),\n        \'display(HTML(%s))\' % html_text,\n        \' #\'\'MAGICS_SETUP_END\'\n    ])\n       \n\n');display(HTML(("–í —ç—Ç–æ—Ç –Ω–æ—É—Ç–±—É–∫ –≤—Å—Ç—Ä–æ–µ–Ω –∫–æ–¥ –Ø–Ω–¥–µ–∫—Å –ú–µ—Ç—Ä–∏–∫–∏ –¥–ª—è —Å–±–æ—Ä–∞ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–π. –ï—Å–ª–∏ –≤—ã –Ω–µ —Ö–æ—Ç–∏—Ç–µ, —á—Ç–æ–±—ã –ø–æ –≤–∞–º —Å–æ–±–∏—Ä–∞–ª–∞—Å—å —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞, –∏—Å–ø—Ä–∞–≤—å—Ç–µ: yandex_metrica_allowed = False" if yandex_metrica_allowed else "") + "<""!-- MAGICS_SETUP_PRINTING_END -->")) #MAGICS_SETUP_END

<IPython.core.display.Javascript object>

# Ints & Floats & Strings encoding

[–ó–∞–ø–∏—Å—å —Å–µ–º–∏–Ω–∞—Ä–∞](https://www.youtube.com/watch?TODO)


[–†–∏–¥–∏–Ω–≥ –Ø–∫–æ–≤–ª–µ–≤–∞: –¶–µ–ª–æ—á–∏—Å–ª–µ–Ω–Ω–∞—è –∞—Ä–∏—Ñ–º–µ—Ç–∏–∫–∞](https://github.com/victor-yacovlev/mipt-diht-caos/tree/master/practice/integers) 
<br>[–†–∏–¥–∏–Ω–≥ –Ø–∫–æ–≤–ª–µ–≤–∞: –í–µ—â–µ—Å—Ç–≤–µ–Ω–Ω–∞—è –∞—Ä–∏—Ñ–º–µ—Ç–∏–∫–∞](https://github.com/victor-yacovlev/mipt-diht-caos/tree/master/practice/ieee754) 



–°–µ–≥–æ–¥–Ω—è –≤ –ø—Ä–æ–≥—Ä–∞–º–º–µ:
* <a href="#int" style="color:#856024"> –¶–µ–ª—ã–µ —á–∏—Å–ª–∞ </a>
  * <a href="#ubsan" style="color:#856024"> UBSAN </a>
  * <a href="#saturation" style="color:#856024"> –ù–∞—Å—ã—â–µ–Ω–∏–µ </a>
* <a href="#float" style="color:#856024"> –í–µ—â–µ—Å—Ç–≤–µ–Ω–Ω—ã–µ —á–∏—Å–ª–∞ </a>
* <a href="#str" style="color:#856024"> –°—Ç—Ä–æ–∫–∏ </a>
  * <a href="#ascii" style="color:#856024"> ASCII </a>
  * <a href="#utf-8" style="color:#856024"> UTF-8 </a>

## <a name="int"></a> –¶–µ–ª—ã–µ —á–∏—Å–ª–∞

–ü—Ä–æ–∏–∑–≤–æ–¥—è `+`, `-`, `*` —Å–æ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–º–∏ —Ü–µ–ª–æ—á–∏—Å–ª–µ–Ω–Ω—ã–º–∏ —Ç–∏–ø–∞–º–∏ –≤ –ø—Ä–æ–≥—Ä–∞–º–º–µ –º—ã —Ä–∞–±–æ—Ç–∞–µ–º $\mathbb{Z}_{2^k}$, –≥–¥–µ $k$ - –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –±–∏—Ç –≤ —á–∏—Å–ª–µ. –ü—Ä–∏—á–µ–º —ç—Ç–æ –≤–µ—Ä–Ω–æ –∫–∞–∫ —Å–æ –∑–Ω–∞–∫–æ–≤—ã–º–∏, —Ç–∞–∫ –∏ –±–µ–∑–∑–Ω–∞–∫–æ–≤—ã–º–∏ —á–∏—Å–ª–∞–º–∏.

–í –ø—Ä–æ—Ü–µ—Å—Å–æ—Ä–µ –¥–ª—è —Å–ª–æ–∂–µ–Ω–∏—è –∑–Ω–∞–∫–æ–≤—ã—Ö –∏ –±–µ–∑–∑–Ω–∞–∫–æ–≤—ã—Ö —á–∏—Å–µ–ª –≤—ã–ø–æ–ª–Ω—è–µ—Ç—Å—è –æ–¥–Ω–∞ –∏ —Ç–∞ –∂–µ –∏–Ω—Å—Ç—Ä—É–∫—Ü–∏—è.

In [15]:
k = 3 # min 0, max 7 = (1 << 3) - 1
m = (1 << k)

def normalize(x):
    return ((x % m) + m) % m

def format_n(x):
    x = normalize(x)
    return "%du, %+d, 0b%s" % (x, x if x < (m >> 1) else x - m, bin(x + m)[3:])
    
for i in range(0, m):
    print("i=%d -> %s" % (i, format_n(i)))

i=0 -> 0u, +0, 0b000
i=1 -> 1u, +1, 0b001
i=2 -> 2u, +2, 0b010
i=3 -> 3u, +3, 0b011
i=4 -> 4u, -4, 0b100
i=5 -> 5u, -3, 0b101
i=6 -> 6u, -2, 0b110
i=7 -> 7u, -1, 0b111


In [16]:
def show_add(a, b):
    print("%d + %d = %d" % (a, b, a + b))
    print("    (%s) + (%s) = (%s)" % (format_n(a), format_n(b), format_n(a + b)))
show_add(2, 1)
show_add(2, -1)

2 + 1 = 3
    (2u, +2, 0b010) + (1u, +1, 0b001) = (3u, +3, 0b011)
2 + -1 = 1
    (2u, +2, 0b010) + (7u, -1, 0b111) = (1u, +1, 0b001)


In [17]:
def show_mul(a, b):
    print("%d * %d = %d" % (a, b, a * b))
    print("    (%s) * (%s) = (%s)" % (format_n(a), format_n(b), format_n(a * b)))
show_mul(2, 3)
show_mul(-2, -3)
show_mul(-1, -1)

2 * 3 = 6
    (2u, +2, 0b010) * (3u, +3, 0b011) = (6u, -2, 0b110)
-2 * -3 = 6
    (6u, -2, 0b110) * (5u, -3, 0b101) = (6u, -2, 0b110)
-1 * -1 = 1
    (7u, -1, 0b111) * (7u, -1, 0b111) = (1u, +1, 0b001)


–ù–æ –µ—Å—Ç—å –Ω–µ–∫–æ—Ç–æ—Ä—ã–µ —Ç–æ–Ω–∫–æ—Å—Ç–∏. 

–ï—Å–ª–∏ –≤—ã –ø–∏—à–µ—Ç–µ –∫–æ–¥ –Ω–∞ C/C++ —Ç–æ –∫–æ–º–ø–∏–ª—è—Ç–æ—Ä —Å—á–∏—Ç–∞–µ—Ç **–ø–µ—Ä–µ–ø–æ–ª–Ω–µ–Ω–∏–µ –∑–Ω–∞–∫–æ–≤–æ–≥–æ —Ç–∏–ø–∞ UB** (undefined behavior). –≠—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –µ–º—É –ø—Ä–æ–≤–æ–¥–∏—Ç—å –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏–∏.

–ê –ø–µ—Ä–µ–ø–æ–ª–Ω–µ–Ω–∏–µ –±–µ–∑–∑–Ω–∞–∫–æ–≤–æ–≥–æ —Ç–∏–ø–∞ - –∑–∞–∫–æ–Ω–Ω–æ–π –æ–ø–µ—Ä–∞—Ü–∏–µ–π, –ø—Ä–∏ –∫–æ—Ç–æ—Ä–æ–π –ø—Ä–æ—Å—Ç–æ –æ—Ç–±—Ä–∞—Å—ã–≤–∞—é—Ç—Å—è —Å—Ç–∞—Ä—à–∏–µ –±–∏—Ç—ã (–∏–ª–∏ –∑–Ω–∞—á–µ–Ω–∏–µ –±–µ—Ä–µ—Ç—Å—è –ø–æ –º–æ–¥—É–ª—é $2^k$, –∏–ª–∏ –ø—Ä–æ—Å—Ç–æ –æ–ø–µ—Ä–∞—Ü–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç—Å—è –≤ $\mathbb{Z}_{2^k}$ - –º–æ–∂–µ—Ç–µ –≤—ã–±–∏—Ä–∞—Ç—å –±–æ–ª–µ–µ —É–¥–æ–±–Ω—ã–π –¥–ª—è –≤–∞—Å —Å–ø–æ—Å–æ–± –Ω–∞ —ç—Ç–æ —Å–º–æ—Ç—Ä–µ—Ç—å).

In [168]:
%%cpp lib.c
%run gcc -O3 -shared -fPIC lib.c -o lib.so  -Os -Wl,--gc-sections -fno-asynchronous-unwind-tables -fcf-protection=branch -mmanual-endbr 

int check_increment(int x) {
    return x + 1 > x; // –í—Å–µ–≥–¥–∞ –ª–∏ true?
}

int unsigned_check_increment(unsigned int x) {
    return x + 1 > x; // –í—Å–µ–≥–¥–∞ –ª–∏ true?
}

Run: `gcc -O3 -shared -fPIC lib.c -o lib.so  -Os -Wl,--gc-sections -fno-asynchronous-unwind-tables -fcf-protection=branch -mmanual-endbr`

In [169]:
import ctypes

int32_max = (1 << 31) - 1
uint32_max = (1 << 32) - 1

lib = ctypes.CDLL("./lib.so")
lib.check_increment.argtypes = [ctypes.c_int]
lib.unsigned_check_increment.argtypes = [ctypes.c_uint]

%p lib.check_increment(1)
%p lib.check_increment(int32_max)

%p lib.unsigned_check_increment(1)
%p lib.unsigned_check_increment(uint32_max)

lib.check_increment(1) = 1

lib.check_increment(int32_max) = 1

lib.unsigned_check_increment(1) = 1

lib.unsigned_check_increment(uint32_max) = 0

In [28]:
!gdb lib.so -batch -ex="disass check_increment" -ex="disass unsigned_check_increment"

Dump of assembler code for function check_increment:
   0x00000000000010f9 <+0>:	mov    $0x1,%eax
   0x00000000000010fe <+5>:	retq   
End of assembler dump.
Dump of assembler code for function unsigned_check_increment:
   0x00000000000010ff <+0>:	xor    %eax,%eax
   0x0000000000001101 <+2>:	inc    %edi
   0x0000000000001103 <+4>:	setne  %al
   0x0000000000001106 <+7>:	retq   
End of assembler dump.


### <a name="ubsan"></a> UBSAN

In [137]:
# UB —Å–∞–Ω–∏—Ç–∞–π–∑–µ—Ä –≤ gcc —ç—Ç–æ–≥–æ –Ω–µ –ª–æ–≤–∏—Ç :|
# –ê –≤–æ—Ç clang –º–æ–ª–æ–¥–µ—Ü :)
!clang -O0 -shared -fPIC -fsanitize=undefined lib.c -o lib_ubsan.so

In [170]:
%%save_file run_ub.py
%run LD_PRELOAD=$(gcc -print-file-name=libubsan.so) python3 run_ub.py

import ctypes 

int32_max = (1 << 31) - 1

lib = ctypes.CDLL("./lib_ubsan.so")
lib.check_increment.argtypes = [ctypes.c_int]

print(lib.check_increment(int32_max))

Run: `LD_PRELOAD=$(gcc -print-file-name=libubsan.so) python3 run_ub.py`

[1mlib.c:5:14:[1m[31m runtime error: [1m[0m[1msigned integer overflow: 2147483647 + 1 cannot be represented in type 'int'[1m[0m
0


### <a name="saturation"></a> –ù–∞—Å—ã—â–µ–Ω–∏–µ

In [31]:
%%cpp code_sample
// –≤–æ–æ–±—Ä–∞–∂–∞–µ–º–∞—è —Å–∏—Ç—É–∞—Ü–∏—è, –∫–æ–≥–¥–∞ –ø–µ—Ä–µ–ø–æ–ª–Ω–µ–Ω–∏–µ –Ω–µ–∂–µ–ª–∞—Ç–µ–ª—å–Ω–æ
isize = 100000
n, m = 100000
for (int i = 0; i < isize && i < saturation_multiplication(n, m); ++i) {
    
}

–ò–Ω–æ–≥–¥–∞ —Ö–æ—á–µ—Ç—Å—è –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞—Ç—å –ø–µ—Ä–µ–ø–æ–ª–Ω–µ–Ω–∏—è —Ä–∞–∑—É–º–Ω—ã–º –æ–±—Ä–∞–∑–æ–º, –Ω–∞–ø—Ä–∏–º–µ—Ä, –Ω–∞—Å—ã—â–µ–Ω–∏–µ–º:

In [171]:
%%cpp main.c
%run gcc -O3 main.c -o a.exe 
%run ./a.exe

#include <assert.h>
#include <stdint.h>

unsigned int satsum(unsigned int x, unsigned int y) {
    unsigned int z;
    // –§—É–Ω–∫—Ü–∏—è, –∫–æ—Ç–æ—Ä–∞—è –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ—Ç –≤—ã—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã–π –ø—Ä–æ—Ü–µ—Å—Å–æ—Ä–æ–º —Ñ–ª–∞–≥ –∏ –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç –µ–≥–æ —è–≤–Ω–æ
    if (__builtin_uadd_overflow(x, y, &z)) {
        return ~0u;
    }
    return z;
}

int main() {
    assert(satsum(2000000000L, 2000000000L) == 4000000000L);
    assert(satsum(4000000000L, 4000000000L) == (unsigned int)-1);
    return 0;
}

Run: `gcc -O3 main.c -o a.exe`

Run: `./a.exe`

–î–ª—è –æ–ø–µ—Ä–∞—Ü–∏–π —Å—Ä–∞–≤–Ω–µ–Ω–∏—è –∏ –¥–µ–ª–µ–Ω–∏—è —Ü–µ–ª—ã—Ö —á–∏—Å–µ–ª —É–∂–µ –æ–¥–Ω–æ–∑–Ω–∞—á–Ω–æ –≤–∞–∂–Ω–æ, –∑–Ω–∞–∫–æ–≤—ã–µ –æ–Ω–∏ –∏–ª–∏ –Ω–µ—Ç.

In [33]:
%%cpp lib2.c
%run gcc -O3 -shared -fPIC lib2.c -o lib2.so   -Os -Wl,--gc-sections -fno-asynchronous-unwind-tables -fcf-protection=branch -mmanual-endbr 

typedef unsigned int uint;

int sum(int x, int y) { return x + y; }
uint usum(uint x, uint y) { return x + y; }

int mul(int x, int y) { return x * y; }
uint umul(uint x, uint y) { return x * y; }

int cmp(int x, int y) { return x < y; }
int ucmp(uint x, uint y) { return x < y; }

int div(int x, int y) { return x / y; }
int udiv(uint x, uint y) { return x / y; }

Run: `gcc -O3 -shared -fPIC lib2.c -o lib2.so   -Os -Wl,--gc-sections -fno-asynchronous-unwind-tables -fcf-protection=branch -mmanual-endbr`

In [35]:
# –§—É–Ω–∫—Ü–∏–∏ sum –∏ usum –∏–¥–µ–Ω—Ç–∏—á–Ω—ã
!gdb lib2.so -batch -ex="disass sum" -ex="disass usum" | grep -v "End of assembler"

Dump of assembler code for function sum:
   0x00000000000010f9 <+0>:	lea    (%rdi,%rsi,1),%eax
   0x00000000000010fc <+3>:	retq   
Dump of assembler code for function usum:
   0x00000000000010fd <+0>:	lea    (%rdi,%rsi,1),%eax
   0x0000000000001100 <+3>:	retq   


In [36]:
!gdb lib2.so -batch -ex="disass mul" -ex="disass umul" | grep -v "End of assembler"

Dump of assembler code for function mul:
   0x0000000000001101 <+0>:	mov    %edi,%eax
   0x0000000000001103 <+2>:	imul   %esi,%eax
   0x0000000000001106 <+5>:	retq   
Dump of assembler code for function umul:
   0x0000000000001107 <+0>:	mov    %edi,%eax
   0x0000000000001109 <+2>:	imul   %esi,%eax
   0x000000000000110c <+5>:	retq   


In [38]:
# –§—É–Ω–∫—Ü–∏–∏ cmp –∏ ucmp –æ—Ç–ª–∏—á–∞—é—Ç—Å—è!
!gdb lib2.so -batch -ex="disass cmp" -ex="disass ucmp" | grep -v "End of assembler"

Dump of assembler code for function cmp:
   0x000000000000110d <+0>:	xor    %eax,%eax
   0x000000000000110f <+2>:	cmp    %esi,%edi
   0x0000000000001111 <+4>:	setl   %al
   0x0000000000001114 <+7>:	retq   
Dump of assembler code for function ucmp:
   0x0000000000001115 <+0>:	xor    %eax,%eax
   0x0000000000001117 <+2>:	cmp    %esi,%edi
   0x0000000000001119 <+4>:	setb   %al
   0x000000000000111c <+7>:	retq   


In [39]:
!gdb lib2.so -batch -ex="disass div" -ex="disass udiv" | grep -v "End of assembler"

Dump of assembler code for function div:
   0x000000000000111d <+0>:	mov    %edi,%eax
   0x000000000000111f <+2>:	cltd   
   0x0000000000001120 <+3>:	idiv   %esi
   0x0000000000001122 <+5>:	retq   
Dump of assembler code for function udiv:
   0x0000000000001123 <+0>:	mov    %edi,%eax
   0x0000000000001125 <+2>:	xor    %edx,%edx
   0x0000000000001127 <+4>:	div    %esi
   0x0000000000001129 <+6>:	retq   


## <a name="size"></a> –ü—Ä–æ —Ä–∞–∑–º–µ—Ä—ã int'–æ–≤ –∏ –∑–Ω–∞–∫–æ–≤–æ—Å—Ç—å

<p> <details> <summary> ‚ñ∫ –£—Å—Ç–∞–Ω–æ–≤–∫–∞ –≤—Å—è–∫–æ–≥–æ-—Ä–∞–∑–Ω–æ–≥–æ</summary>
 
–î–ª—è `-m32`

`sudo apt-get install g++-multilib libc6-dev-i386`

–î–ª—è `qemu-arm`

`sudo apt-get install qemu-system-arm qemu-user`

`sudo apt-get install lib32z1`

–î–ª—è —Å–±–æ—Ä–∫–∏ –∏ –∑–∞–ø—É—Å–∫–∞ arm:

`wget http://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/arm-linux-gnueabi/gcc-linaro-7.3.1-2018.05-i686_arm-linux-gnueabi.tar.xz`

`tar xvf gcc-linaro-7.3.1-2018.05-i686_arm-linux-gnueabi.tar.xz`


</details> </p>


In [41]:
# Add path to compilers to PATH
import os
os.environ["PATH"] = os.environ["PATH"] + ":" + \
    "/home/pechatnov/arm/gcc-linaro-7.3.1-2018.05-i686_arm-linux-gnueabi/bin"

In [48]:
%%cpp size.c
%run gcc size.c -o size.exe && ./size.exe # –ö–æ–º–ø–∏–ª–∏—Ä—É–µ–º –æ–±—ã—á–Ω—ã–º –æ–±—Ä–∞–∑–æ–º
%run gcc -m32 size.c -o size.exe && ./size.exe # –ü–æ–¥ 32-–±–∏—Ç–Ω—É—é –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É
%run arm-linux-gnueabi-gcc -marm size.c -o size.exe && qemu-arm -L ~/arm/gcc-linaro-7.3.1-2018.05-i686_arm-linux-gnueabi/arm-linux-gnueabi/libc ./size.exe # –ü–æ–¥ ARM

#include <stdio.h>

int main() {
    printf("is char signed = %d, ", (int)((char)(-1) > 0));
    printf("sizeof(long int) = %d\n", (int)sizeof(long int));
}

Run: `gcc size.c -o size.exe && ./size.exe # –ö–æ–º–ø–∏–ª–∏—Ä—É–µ–º –æ–±—ã—á–Ω—ã–º –æ–±—Ä–∞–∑–æ–º`

is char signed = 0, sizeof(long int) = 8


Run: `gcc -m32 size.c -o size.exe && ./size.exe # –ü–æ–¥ 32-–±–∏—Ç–Ω—É—é –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É`

is char signed = 0, sizeof(long int) = 4


Run: `arm-linux-gnueabi-gcc -marm size.c -o size.exe && qemu-arm -L ~/arm/gcc-linaro-7.3.1-2018.05-i686_arm-linux-gnueabi/arm-linux-gnueabi/libc ./size.exe # –ü–æ–¥ ARM`

is char signed = 1, sizeof(long int) = 4


–ö–∞–∫–æ–π –∏–∑ —ç—Ç–æ–≥–æ –º–æ–∂–Ω–æ —Å–¥–µ–ª–∞—Ç—å –≤—ã–≤–æ–¥? –•–æ—Ç–∏—Ç–µ –ø–æ–Ω—è—Ç–Ω—ã–π —Ç–∏–ø - –∏—Å–ø–æ–ª—å–∑—É–π—Ç–µ —Ç–∏–ø—ã —Å –¥–µ—Ç–µ—Ä–º–∏–Ω–∏—Ä–æ–≤–∞–Ω—ã–º —Ä–∞–∑–º–µ—Ä–æ–º –∏ –∑–Ω–∞–∫–æ–≤–æ—Å—Ç—å—é - uint64_t –∏ –¥—Ä—É–≥–∏–µ –ø–æ–¥–æ–±–Ω—ã–µ 

## <a name="bit"></a> –ë–∏—Ç–æ–≤—ã–µ –æ–ø–µ—Ä–∞—Ü–∏–∏

`^`, `|`, `&`, `~`, `>>`, `<<`

In [56]:
a = 0b0110

def my_bin(x, digits=4):
    m = (1 << digits)
    x = ((x % m) + m) % m # –≠–º—É–ª–∏—Ä—É–µ–º –∫–æ–Ω–µ—á–Ω–æ–µ —á–∏—Å–ª–æ –±–∏—Ç –≤ int-–µ –≤ python 
    return "0b{:0{digits}b}".format(x, digits=digits)

%p my_bin(     a)  # 4-–±–∏—Ç–Ω–æ–µ —á–∏—Å–ª–æ
%p my_bin(    ~a)  # –ï–≥–æ –ø–æ–±–∏—Ç–æ–≤–æ–µ –æ—Ç—Ä–∏—Ü–∞–Ω–∏–µ
%p my_bin(a >> 1)  # –ï–≥–æ —Å–¥–≤–∏–≥ –≤–ø—Ä–∞–≤–æ –Ω–∞ 1
%p my_bin(a << 1)  # –ï–≥–æ —Å–¥–≤–∏–≥ –≤–ª–µ–≤–æ –Ω–∞ 1

`my_bin(     a) = 0b0110`  # 4-–±–∏—Ç–Ω–æ–µ —á–∏—Å–ª–æ

`my_bin(    ~a) = 0b1001`  # –ï–≥–æ –ø–æ–±–∏—Ç–æ–≤–æ–µ –æ—Ç—Ä–∏—Ü–∞–Ω–∏–µ

`my_bin(a >> 1) = 0b0011`  # –ï–≥–æ —Å–¥–≤–∏–≥ –≤–ø—Ä–∞–≤–æ –Ω–∞ 1

`my_bin(a << 1) = 0b1100`  # –ï–≥–æ —Å–¥–≤–∏–≥ –≤–ª–µ–≤–æ –Ω–∞ 1

In [57]:
x = 0b0011
y = 0b1001

%p my_bin(x    )  # X
%p my_bin(y    )  # Y

%p my_bin(x | y)  # –ü–æ–±–∏—Ç–æ–≤—ã–π OR
%p my_bin(x ^ y)  # –ü–æ–±–∏—Ç–æ–≤—ã–π XOR
%p my_bin(x & y)  # –ü–æ–±–∏—Ç–æ–≤—ã–π AND

`my_bin(x    ) = 0b0011`  # X

`my_bin(y    ) = 0b1001`  # Y

`my_bin(x | y) = 0b1011`  # –ü–æ–±–∏—Ç–æ–≤—ã–π OR

`my_bin(x ^ y) = 0b1010`  # –ü–æ–±–∏—Ç–æ–≤—ã–π XOR

`my_bin(x & y) = 0b0001`  # –ü–æ–±–∏—Ç–æ–≤—ã–π AND

–ó–∞–¥–∞—á–∫–∏:
1. –ü–æ–ª—É—á–∏—Ç–µ –∏–∑ —á–∏—Å–ª–∞ `a` `i`-—ã–π –±–∏—Ç 
2. –í—ã—Å—Ç–∞–≤—å—Ç–µ –≤ —Ü–µ–ª–æ–º —á–∏—Å–ª–µ `a` `i`-—ã–π –±–∏—Ç 
3. –ó–∞–Ω—É–ª–∏—Ç–µ –≤ —Ü–µ–ª–æ–º —á–∏—Å–ª–µ `a` `i`-—ã–π –±–∏—Ç 
4. –ò–Ω–≤–µ—Ä—Ç–∏—Ä—É–π—Ç–µ –≤ —Ü–µ–ª–æ–º —á–∏—Å–ª–µ `a` `i`-—ã–π –±–∏—Ç 
5. –ü–æ–ª—É—á–∏—Ç–µ –±–∏—Ç—ã —á–∏—Å–ª–∞ `a` —Å `i` –ø–æ `j` –Ω–µ–≤–∫–ª—é—á–∏—Ç–µ–ª—å–Ω–æ –∫–∞–∫ –±–µ–∑–∑–Ω–∞–∫–æ–≤–æ–µ —á–∏—Å–ª–æ
6. –°–∫–æ–ø–∏—Ä—É–π—Ç–µ –≤ –±–∏—Ç—ã —á–∏—Å–ª–∞ `a` —Å `i` –ø–æ `j` –Ω–µ–≤–∫–ª—é—á–∏—Ç–µ–ª—å–Ω–æ –º–ª–∞–¥—à–∏–µ –±–∏—Ç—ã —á–∏—Å–ª–∞ `b` 

<details> <summary>  ‚ñ∫ –†–µ—à–µ–Ω–∏—è —Å —Å–µ–º–∏–Ω–∞—Ä–∞ </summary>
<pre> <code> 

</code> </pre>
</details>

## <a name="float"></a> –í–µ—â–µ—Å—Ç–≤–µ–Ω–Ω—ã–µ —á–∏—Å–ª–∞

–î–∞–≤–∞–π—Ç–µ –ø—Ä–æ—Å—Ç–æ –ø–æ—Å–º–æ—Ç—Ä–∏–º –Ω–∞ –±–∏—Ç–æ–≤—ã–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è –≤–µ—â–µ—Å—Ç–≤–µ–Ω–Ω—ã—Ö —á–∏—Å–µ–ª –∏ –Ω–∞–π–¥–µ–º –∑–∞–∫–æ–Ω–æ–º–µ—Ä–Ω–æ—Å—Ç–∏ :)

In [315]:
%%cpp stand.h
// –ú–æ–∂–Ω–æ –Ω–µ –≤–Ω–∏–∫–∞—Ç—å, –ø—Ä–æ—Å—Ç–æ –ø–µ—á–∞—Ç–∞–ª–∫–∞ –±–∏—Ç–∏–∫–æ–≤

#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>

#define EXTRA_INFO // –≤–∫–ª—é—á–µ–Ω–∏–µ –±–æ–ª–µ–µ –ø–æ–¥—Ä–æ–±–Ω–æ–≥–æ –≤—ã–≤–æ–¥–∞
#if defined(EXTRA_INFO)
    #define FORMULA_TEXT "(-1)^S * 2^(E-B) * (1+M/(2^Mbits))"
    #define DENORM_FORMULA_TEXT "\n  (-1)^S * M / 2^(1023 + 51)"
    #define IS_VLINE_POINT(i) (i == 63 || i == 52)
    #define DESCRIBE(d) describe_double(d)
    #define DENORM_DESCRIBE(d) denorm_describe_double(d)
typedef union {
    double double_val;
    struct {
        uint64_t mantissa_val : 52;
        uint64_t exp_val : 11;
        uint64_t sign_val : 1;
    };
} double_parser_t;

void describe_double(double x) {
    double_parser_t parser = {.double_val = x};
    assert(parser.exp_val != 0 || x == 0.0); // ensure not denorm
    printf("  (-1)^%d * 2^(%d) * 0x1.%013llx", 
           (int)parser.sign_val, parser.exp_val - 1023, (long long unsigned int)parser.mantissa_val);
}

void denorm_describe_double(double x) {
    double_parser_t parser = {.double_val = x};
    assert(parser.exp_val == 0 && x != 0.0); // ensure denorm
    printf("\n  (-1)^%d * 0x%013llx / 2^(1023 + 51)", 
           (int)parser.sign_val, (long long unsigned int)parser.mantissa_val);
}

#else
    #define FORMULA_TEXT ""
    #define DENORM_FORMULA_TEXT ""
    #define IS_VLINE_POINT(i) 0
    #define DESCRIBE(d) (void)(d)
    #define DENORM_DESCRIBE(d) (void)(d)
#endif

inline uint64_t bits_of_double(double d) {
    uint64_t result;
    memcpy(&result, &d, sizeof(result));
    return result;
}

inline void print_doubles(double* dds, _Bool denorm) {
    char line_1[70] = {0}, line_2[70] = {0}, hline[70] = {0};
    int j = 0;
    for (int i = 63; i >= 0; --i) {
        line_1[j] = (i % 10 == 0) ? ('0' + (i / 10)) : ' ';
        line_2[j] = '0' + (i % 10);
        hline[j] = '-';
        ++j;
        if (IS_VLINE_POINT(i)) {
            line_1[j] = line_2[j] = '|';
            hline[j] = '-';
            ++j;
        }
    }
    const char* prespaces = denorm ? "              " : "";
    printf("%sBit numbers: %s\n", prespaces, line_1);
    printf("%s             %s  %s\n", prespaces, line_2, denorm ? DENORM_FORMULA_TEXT : FORMULA_TEXT);
    printf("%s             %s\n", prespaces, hline);
    for (double* d = dds; *d; ++d) {
        printf(denorm ? "%24.13la   " : "%10.4lf   ", *d);
        uint64_t m = bits_of_double(*d);
        for (int i = 63; i >= 0; --i) {
            printf("%d", (int)((m >> i) & 1));
            if (IS_VLINE_POINT(i)) {
                printf("|");
            }
        }
        denorm ? DENORM_DESCRIBE(*d) : DESCRIBE(*d);
        printf("\n");
    }
}

##### –ü–æ—Å–º–æ—Ç—Ä–∏–º –Ω–∞ –ø–∞—Ä—ã —á–∏—Å–µ–ª x –∏ -x

In [316]:
%%cpp stand.cpp
%run gcc stand.cpp -o stand.exe
%run ./stand.exe

#include "stand.h"

int main() {
    double dd[] = {1, -1, 132, -132, 3.1415, -3.1415,  0};
    print_doubles(dd, /*denorm = */ false);
}

Run: `gcc stand.cpp -o stand.exe`

Run: `./stand.exe`

Bit numbers:  |  6        | 5         4         3         2         1         0
             3|21098765432|1098765432109876543210987654321098765432109876543210  (-1)^S * 2^(E-B) * (1+M/(2^Mbits))
             ------------------------------------------------------------------
    1.0000   0|01111111111|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.0000000000000
   -1.0000   1|01111111111|0000000000000000000000000000000000000000000000000000  (-1)^1 * 2^(0) * 0x1.0000000000000
  132.0000   0|10000000110|0000100000000000000000000000000000000000000000000000  (-1)^0 * 2^(7) * 0x1.0800000000000
 -132.0000   1|10000000110|0000100000000000000000000000000000000000000000000000  (-1)^1 * 2^(7) * 0x1.0800000000000
    3.1415   0|10000000000|1001001000011100101011000000100000110001001001101111  (-1)^0 * 2^(1) * 0x1.921cac083126f
   -3.1415   1|10000000000|1001001000011100101011000000100000110001001001101111  (-1)^1 * 2^(1) * 0x1.921cac083126f


##### –ü–æ—Å–º–æ—Ç—Ä–∏–º –Ω–∞ —Å—Ç–µ–ø–µ–Ω–∏ 2-–∫–∏

In [317]:
%%cpp stand.cpp
%run gcc stand.cpp -o stand.exe
%run ./stand.exe

#include "stand.h"

int main() {
    double dd[] = {0.125, 0.25, 0.5, 1, 2, 4, 8, 16, 0};
    print_doubles(dd, /*denorm = */ false);
}

Run: `gcc stand.cpp -o stand.exe`

Run: `./stand.exe`

Bit numbers:  |  6        | 5         4         3         2         1         0
             3|21098765432|1098765432109876543210987654321098765432109876543210  (-1)^S * 2^(E-B) * (1+M/(2^Mbits))
             ------------------------------------------------------------------
    0.1250   0|01111111100|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(-3) * 0x1.0000000000000
    0.2500   0|01111111101|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(-2) * 0x1.0000000000000
    0.5000   0|01111111110|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(-1) * 0x1.0000000000000
    1.0000   0|01111111111|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.0000000000000
    2.0000   0|10000000000|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(1) * 0x1.0000000000000
    4.0000   0|10000000001|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(2) * 0x1.0000000000000
    8.0000   0|1

##### –ü–æ—Å–º–æ—Ç—Ä–∏–º –Ω–∞ —á–∏—Å–ª–∞ –≤–∏–¥–∞ $ 1 + i \cdot 2^{(-k)}$

In [318]:
%%cpp stand.cpp
%run gcc stand.cpp -o stand.exe
%run ./stand.exe

#include "stand.h"

int main() {
    double t8 = 1.0 / 8;
    double dd[] = {1 + 0 * t8, 1 + 1 * t8, 1 + 2 * t8, 1 + 3 * t8, 1 + 4 * t8, 
                   1 + 5 * t8, 1 + 6 * t8, 1 + 7 * t8, 0};
    print_doubles(dd, /*denorm = */ false);
}

Run: `gcc stand.cpp -o stand.exe`

Run: `./stand.exe`

Bit numbers:  |  6        | 5         4         3         2         1         0
             3|21098765432|1098765432109876543210987654321098765432109876543210  (-1)^S * 2^(E-B) * (1+M/(2^Mbits))
             ------------------------------------------------------------------
    1.0000   0|01111111111|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.0000000000000
    1.1250   0|01111111111|0010000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.2000000000000
    1.2500   0|01111111111|0100000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.4000000000000
    1.3750   0|01111111111|0110000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.6000000000000
    1.5000   0|01111111111|1000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.8000000000000
    1.6250   0|01111111111|1010000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.a000000000000
    1.7500   0|0111

##### –ü–æ—Å–º–æ—Ç—Ä–∏–º –Ω–∞ —á–∏—Å–ª–∞ –≤–∏–¥–∞ $ 1 + i \cdot 2^{(-k)}$

In [319]:
%%cpp stand.cpp
%run gcc stand.cpp -o stand.exe
%run ./stand.exe

#include "stand.h"

int main() {
    double eps = 1.0 / (1LL << 52);
    double dd[] = {1 + 0 * eps, 1 + 1 * eps, 1 + 2 * eps, 1 + 3 * eps, 1 + 4 * eps, 0};
    print_doubles(dd, /*denorm = */ false);
}

Run: `gcc stand.cpp -o stand.exe`

Run: `./stand.exe`

Bit numbers:  |  6        | 5         4         3         2         1         0
             3|21098765432|1098765432109876543210987654321098765432109876543210  (-1)^S * 2^(E-B) * (1+M/(2^Mbits))
             ------------------------------------------------------------------
    1.0000   0|01111111111|0000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.0000000000000
    1.0000   0|01111111111|0000000000000000000000000000000000000000000000000001  (-1)^0 * 2^(0) * 0x1.0000000000001
    1.0000   0|01111111111|0000000000000000000000000000000000000000000000000010  (-1)^0 * 2^(0) * 0x1.0000000000002
    1.0000   0|01111111111|0000000000000000000000000000000000000000000000000011  (-1)^0 * 2^(0) * 0x1.0000000000003
    1.0000   0|01111111111|0000000000000000000000000000000000000000000000000100  (-1)^0 * 2^(0) * 0x1.0000000000004


##### –î–µ–Ω–æ—Ä–º–∞–ª–∏–∑–æ–≤–∞–Ω–Ω—ã–µ —á–∏—Å–ª–∞

In [320]:
%%cpp stand.cpp
%run gcc stand.cpp -o stand.exe
%run ./stand.exe

#include <math.h>
#include "stand.h"

int main() {
    double dd[] = {-1. / pow(2, 1023) / 1, 1. / pow(2, 1023) / 2, 1. / pow(2, 1023) / pow(2, 50), 1. / pow(2, 1023) / pow(2, 51), 0};
    print_doubles(dd, /*denorm = */ true);
}

Run: `gcc stand.cpp -o stand.exe`

Run: `./stand.exe`

              Bit numbers:  |  6        | 5         4         3         2         1         0
                           3|21098765432|1098765432109876543210987654321098765432109876543210  
  (-1)^S * M / 2^(1023 + 51)
                           ------------------------------------------------------------------
-0x0.8000000000000p-1022   1|00000000000|1000000000000000000000000000000000000000000000000000
  (-1)^1 * 0x8000000000000 / 2^(1023 + 51)
 0x0.4000000000000p-1022   0|00000000000|0100000000000000000000000000000000000000000000000000
  (-1)^0 * 0x4000000000000 / 2^(1023 + 51)
 0x0.0000000000002p-1022   0|00000000000|0000000000000000000000000000000000000000000000000010
  (-1)^0 * 0x0000000000002 / 2^(1023 + 51)
 0x0.0000000000001p-1022   0|00000000000|0000000000000000000000000000000000000000000000000001
  (-1)^0 * 0x0000000000001 / 2^(1023 + 51)


##### –ü–æ—Å–º–æ—Ç—Ä–∏–º –Ω–∞ —Å—É—â–µ—Å—Ç–≤–µ–Ω–Ω–æ —Ä–∞–∑–Ω—ã–µ –∑–Ω–∞—á–µ–Ω–∏—è double

In [321]:
%%cpp stand.cpp
%run gcc stand.cpp -o stand.exe
%run ./stand.exe

#include <math.h>
#include "stand.h"

int main() {
    double dd[] = {0.1, 1.5, 100, NAN, -NAN, 0.0 / 0.0, INFINITY, -INFINITY, 0};
    print_doubles(dd, /*denorm = */ false);
}

Run: `gcc stand.cpp -o stand.exe`

Run: `./stand.exe`

Bit numbers:  |  6        | 5         4         3         2         1         0
             3|21098765432|1098765432109876543210987654321098765432109876543210  (-1)^S * 2^(E-B) * (1+M/(2^Mbits))
             ------------------------------------------------------------------
    0.1000   0|01111111011|1001100110011001100110011001100110011001100110011010  (-1)^0 * 2^(-4) * 0x1.999999999999a
    1.5000   0|01111111111|1000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(0) * 0x1.8000000000000
  100.0000   0|10000000101|1001000000000000000000000000000000000000000000000000  (-1)^0 * 2^(6) * 0x1.9000000000000
       nan   0|11111111111|1000000000000000000000000000000000000000000000000000  (-1)^0 * 2^(1024) * 0x1.8000000000000
      -nan   1|11111111111|1000000000000000000000000000000000000000000000000000  (-1)^1 * 2^(1024) * 0x1.8000000000000
      -nan   1|11111111111|1000000000000000000000000000000000000000000000000000  (-1)^1 * 2^(1024) * 0x1.8000000000000
       in

–ê –∑–∞—á–µ–º –Ω—É–∂–µ–Ω -nan?

–Ø –Ω–∞–¥–µ—é—Å—å –ø–æ –ø—Ä–∏–º–µ—Ä–∞–º –≤—ã —É–ª–æ–≤–∏–ª–∏ —Å—É—Ç—å. –ü–æ–¥—Ä–æ–±–Ω–µ–µ –∑–∞ —Ç–µ–æ—Ä–∏–µ–π –º–æ–∂–Ω–æ –≤ 
[–†–∏–¥–∏–Ω–≥ –Ø–∫–æ–≤–ª–µ–≤–∞: –í–µ—â–µ—Å—Ç–≤–µ–Ω–Ω–∞—è –∞—Ä–∏—Ñ–º–µ—Ç–∏–∫–∞](https://github.com/victor-yacovlev/mipt-diht-caos/tree/master/practice/ieee754) 

# –î–æ–ø–æ–ª–Ω–µ–Ω–∏–µ –ø—Ä–æ bitcast

In [322]:
%%cpp bitcast.c
%run gcc -O2 -Wall bitcast.c -o bitcast.exe  -Os -fno-asynchronous-unwind-tables -fcf-protection=branch -mmanual-endbr 
%run ./bitcast.exe

#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>

uint64_t bit_cast_memcpy(double d) {
    uint64_t result;
    memcpy(&result, &d, sizeof(result)); // –ñ–µ–ª–µ–∑–æ–±–µ—Ç–æ–Ω–Ω—ã–π —Å–ø–æ—Å–æ–±, –Ω–æ —á—É—Ç—å —Å–ª–æ–∂–Ω–µ–µ –¥–ª—è –æ–ø—Ç–∏–º–∏–∑–∞—Ç–æ—Ä–∞
    return result;
}

typedef union {
    double double_val;
    uint64_t ui64_val;
} converter_t;

uint64_t bit_cast_union(double d) {
    converter_t conv;
    conv.double_val = d;
    return conv.ui64_val;
    //return ((converter_t){.double_val = d}).ui64_val; // –í—Ä–æ–¥–µ (?) —Ö–æ—Ä–æ—à–µ–µ —Ä–µ—à–µ–Ω–∏–µ
}

uint64_t bit_cast_ptr(double d) {
    return *(uint64_t*)(void*)&d; // –ü—Ä–æ—Å—Ç–æ–µ, –Ω–æ –Ω–µ–æ–¥–Ω–æ–∑–Ω–∞—á–Ω–æ–µ —Ä–µ—à–µ–Ω–∏–µ –∏–∑-–∑–∞ –∞–ª–∏–∞—Å–∏–Ω–≥–∞
}

int main() {
    double d = 3.15;
    printf("%" PRId64 "\n", bit_cast_memcpy(d));
    printf("%" PRId64 "\n", bit_cast_union(d));
    printf("%" PRId64 "\n", bit_cast_memcpy(d));
}

Run: `gcc -O2 -Wall bitcast.c -o bitcast.exe  -Os -fno-asynchronous-unwind-tables -fcf-protection=branch -mmanual-endbr`

[01m[Kbitcast.c:[m[K In function ‚Äò[01m[Kbit_cast_ptr[m[K‚Äô:
   29 |     return *[01;35m[K(uint64_t*)(void*)&d[m[K; // –ü—Ä–æ—Å—Ç–æ–µ, –Ω–æ –Ω–µ–æ–¥–Ω–æ–∑–Ω–∞—á–Ω–æ–µ —Ä–µ—à–µ–Ω–∏–µ –∏–∑-–∑–∞ –∞–ª–∏–∞—Å–∏–Ω–≥–∞
      |             [01;35m[K^~~~~~~~~~~~~~~~~~~~[m[K


Run: `./bitcast.exe`

4614275588213125939
4614275588213125939
4614275588213125939


In [323]:
# –í—Å–µ —Å–ø–æ—Å–æ–±—ã –æ–¥–∏–Ω–∞–∫–æ–≤–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã –ø—Ä–∏ –∫–æ–º–ø–∏–ª—è—Ü–∏–∏ —Å -O2
!gdb bitcast.exe -batch -ex="disass bit_cast_memcpy" -ex="disass bit_cast_union" -ex="disass bit_cast_ptr"

Dump of assembler code for function bit_cast_memcpy:
   0x00000000000011a9 <+0>:	movq   %xmm0,%rax
   0x00000000000011ae <+5>:	retq   
End of assembler dump.
Dump of assembler code for function bit_cast_union:
   0x00000000000011af <+0>:	movq   %xmm0,%rax
   0x00000000000011b4 <+5>:	retq   
End of assembler dump.
Dump of assembler code for function bit_cast_ptr:
   0x00000000000011b5 <+0>:	movq   %xmm0,%rax
   0x00000000000011ba <+5>:	retq   
End of assembler dump.


–Ø –±—ã —Ä–µ–∫–æ–º–µ–Ω–¥–æ–≤–∞–ª –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –≤ —Ç–∞–∫–∏—Ö —Å–ª—É—á–∞—è—Ö memcpy. [–¢–∞–∫ —Å–¥–µ–ª–∞–Ω–æ –≤ std::bit_cast](https://en.cppreference.com/w/cpp/numeric/bit_cast)

[–ü—Ä–æ C++ –∞–ª–∏–∞—Å–∏–Ω–≥, –ª–æ–≤–∫–∏–µ –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏–∏ –∏ –ø–æ–¥–ª—ã–µ –±–∞–≥–∏ / –•–∞–±—Ä](https://habr.com/ru/post/114117/)


## <a name="str"></a> –°—Ç—Ä–æ–∫–∏


`pip3 install --user hexdump` - —É—Å—Ç–∞–Ω–æ–≤–∏—Ç—å.

In [324]:
print("–ü—Ä–∏–≤–µ—Ç, –º–∏—Ä! - –ü–µ—Ä–µ–≤–æ–¥ –Ω–∞ —ç–ª—å—Ñ–∏–π—Å–∫–∏–π.".encode("cp1251").decode("cp1252"))

√è√∞√®√¢√•√≤, √¨√®√∞! - √è√•√∞√•√¢√Æ√§ √≠√† √Ω√´√º√¥√®√©√±√™√®√©.


In [325]:
from hexdump import hexdump

### <a name="ascii"></a> ASCII

TLDR: ASCII - –ø–æ–ª–æ–≤–∏–Ω–∞ 8-–±–∏—Ç–Ω–æ–π –∫–æ–¥–∏—Ä–æ–≤–∫–∏. 

–¢–æ –µ—Å—Ç—å –Ω–∞ —Å–∞–º–æ–º –¥–µ–ª–µ –µ—Å—Ç—å —Ü–µ–ª–æ–µ —Å–µ–º–µ–π—Å—Ç–≤–æ 8-–±–∏—Ç–Ω—ã—Ö –∫–æ–¥–∏—Ä–æ–≤–æ–∫ (–Ω–∞–ø—Ä–∏–º–µ—Ä, KOI-8), —É –∫–æ—Ç–æ—Ä—ã—Ö –ø–µ—Ä–≤—ã–µ 128 —Å–∏–º–≤–æ–ª–æ–≤ - ASCII. 

–í ASCII –≤—Ö–æ–¥—è—Ç –¥–µ—Å—è—Ç–∏—á–Ω—ã–µ —Ü–∏—Ñ—Ä—ã, –ª–∞—Ç–∏–Ω—Ü—Å–∫–∏–π –∞–ª—Ñ–∞–≤–∏—Ç, –∑–Ω–∞–∫–∏ –ø—Ä–µ–ø–∏–Ω–∞–Ω–∏—è, ... —Å–º–æ—Ç—Ä–∏—Ç–µ –∫–∞—Ä—Ç–∏–Ω–∫—É

![ascii](./ascii.png)

In [326]:
hexdump("AABBCC__112233".encode("ascii"))
hexdump("Hello!".encode("ascii"))

00000000: 41 41 42 42 43 43 5F 5F  31 31 32 32 33 33        AABBCC__112233
00000000: 48 65 6C 6C 6F 21                                 Hello!


In [327]:
hexdump("–Ø –≤–∏–∂—É –≤–∞—Å".encode("ascii"))

UnicodeEncodeError: 'ascii' codec can't encode character '\u042f' in position 0: ordinal not in range(128)

–ü—Ä–æ KOI-8

In [328]:
hexdump("AABBCC__112233".encode("koi8-r"))
hexdump("–Ø–Ø–û–û–Å–Å__–¨–¨–£–£–ó–ó".encode("koi8-r"))
hexdump("–Ø –≤–∏–∂—É –≤–∞—Å".encode("koi8-r"))


00000000: 41 41 42 42 43 43 5F 5F  31 31 32 32 33 33        AABBCC__112233
00000000: F1 F1 EF EF B3 B3 5F 5F  F8 F8 F5 F5 FA FA        ......__......
00000000: F1 20 D7 C9 D6 D5 20 D7  C1 D3                    . .... ...


### <a name="utf-8"></a> UTF-8

UTF-8 - –∫–æ–¥–∏—Ä–æ–≤–∫–∞ –¥–ª—è Unicode.

TLDR: Unicode - –ø—Ä–æ–Ω—É–º–µ—Ä–æ–≤–∞–Ω–Ω–æ–µ –º–Ω–æ–∂–µ—Å—Ç–≤–æ —Å–∏–º–≤–æ–ª–æ–≤ (–∫–∞–∫ –µ–¥–∏–Ω–∏—Ü —Å–º—ã—Å–ª–∞, –∞ –Ω–µ –≥–ª–∏—Ñ–æ–≤) + —Å–µ–º–µ–π—Å—Ç–≤–æ –∫–æ–¥–∏—Ä–æ–≤–æ–∫ UTF

In [329]:
hexdump("AABBCC__112233".encode("ascii"))
hexdump("AABBCC__112233".encode("utf-8"))

00000000: 41 41 42 42 43 43 5F 5F  31 31 32 32 33 33        AABBCC__112233
00000000: 41 41 42 42 43 43 5F 5F  31 31 32 32 33 33        AABBCC__112233


In [330]:
hexdump("–Ø–Ø–û–û–Å–Å__–¨–¨–£–£–ó–ó".encode("koi8-r"))
print()
hexdump("–Ø–Ø–û–û–Å–Å__–¨–¨–£–£–ó–ó".encode("utf-8"))

00000000: F1 F1 EF EF B3 B3 5F 5F  F8 F8 F5 F5 FA FA        ......__......

00000000: D0 AF D0 AF D0 9E D0 9E  D0 81 D0 81 5F 5F D0 AC  ............__..
00000010: D0 AC D0 A3 D0 A3 D0 97  D0 97                    ..........


In [331]:
def add_spaces(s):
    return "".join(c + ("" if (i + 1) % 6 else " ") for i, c in enumerate(s[::-1]))[::-1]

def show_utf_8(c):
    num = c if isinstance(c, int) else ord(c) 
    print("       CHR:", chr(num))
    encoded = chr(num).encode("utf-8")
    print("   BIN NUM: {:b}".format(num))
    print("  BIN NUM2:", add_spaces("{:b}".format(num)))
    print(" UTF-8 BIN:", " ".join("{:b}".format(b) for b in encoded))

show_utf_8("üòä")
show_utf_8("–®")
show_utf_8("0")

       CHR: üòä
   BIN NUM: 11111011000001010
  BIN NUM2: 11111 011000 001010
 UTF-8 BIN: 11110000 10011111 10011000 10001010
       CHR: –®
   BIN NUM: 10000101000
  BIN NUM2: 10000 101000
 UTF-8 BIN: 11010000 10101000
       CHR: 0
   BIN NUM: 110000
  BIN NUM2:  110000
 UTF-8 BIN: 110000


In [332]:
print(chr(0x0418) + chr(0x0306))
print(chr(0x0418) + " " + chr(0x0306))

–òÃÜ
–ò ÃÜ


&#x0418;&#x0306;