In [1]:
"""modified from https://gist.github.com/endes0/0967d7c5bb1877559c4ae84be05e036c"""
from tika import parser

import torchaudio
import argparse
from sanitize_filename import sanitize
import re
from pathlib import Path
from tqdm.auto import tqdm
from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices
from tortoise.utils.tokenizer import VoiceBpeTokenizer

import torch
import json
from dataclasses import dataclass
# import pysbd
from typing import List
from loguru import logger
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

@dataclass
class Writer:
    out_dir: Path
    # tts: TTS
    
    def __post_init__(self):
        self.m3u = open(self.out_dir / 'playlist.m3u', 'w')
        self.m3u.write('#EXTM3U\n')
        self.chapter = 1

    def write_chapter(self, waveforms: torch.tensor, SAMPLE_RATE=24000):
        wav_f = self.out_dir / f'{self.chapter:03g}.ogg'
        torchaudio.save(wav_f, waveforms.cpu(), SAMPLE_RATE)
        self.m3u.write(f'{wav_f}\n')
        self.chapter += 1
        return wav_f


    def close(self):
        self.m3u.close()

def split_into_sentences(text, tokenizer) -> List[str]:        
    limit = 200
    chunk_limit = limit
    splitter = RecursiveCharacterTextSplitter(
        length_function=lambda x: len(tokenizer.encode(x)),
        chunk_size=chunk_limit,
        chunk_overlap=0,
        keep_separator=True,
        strip_whitespace=True,
        separators=[
            "\n\n", "\n", "\xa0", '<div>', '<p>', '<br>', "\r", ".",  "!", "?", 
            '"', "'", "‘", "’", "“", "”", "„", "‟",  
            "(", ")", "[", "]", "{", "}", 
            "…", ":", ";", "—", "   "
            " ", '' # these ensure that there is always something to split by so chunks are always at limit
    ],
    )
    texts = splitter.split_text(text)
    ls = [splitter._length_function(x) for x in texts]
    logger.debug(f'split lengths {ls}. max={max(ls)} chunk_limit={chunk_limit}')
    assert all([l<=limit for l in ls]), 'all senteces should be below limit'
    return texts


In [3]:
__file__ = '../01_epub_tortise.ipynb'
root_dir = Path(__file__).resolve().absolute().parent
root_dir


PosixPath('/media/wassname/SGIronWolf/projects5/tts-ai/use-tts-mjc')

In [4]:
# Get the command line arguments
parser2 = argparse.ArgumentParser()
parser2.add_argument('--epub', type=Path, 
                     default=root_dir/'data/A Short Guide to the Inner Citadel - Massimo Pigliucci.epub',
                    #  default=root_dir/'data/golden_saying_of_epictetus.epub',
                    help='PDF file to read')
parser2.add_argument('-o', '--out', type=Path, default=None, help='Output folder')
parser2.add_argument('-f', '--force', action='store_true', default=False, help='Overwrite')
parser2.add_argument('-t', '--test', action='store_true', default=False, help='Overwrite')
parser2.add_argument('-l', '--limit', type=int, default=400,
                    help='Maximum number of characters to synthesize at once')
parser2.add_argument('-m', '--model', type=str, 
                    default="tts_models/multilingual/multi-dataset/xtts_v1",
                    # default='facebook/fastspeech2-en-ljspeech',
                    help='fairseq model to use from HuggingFace Hub')
parser2.add_argument('-s', '--speaker', type=Path, default=root_dir / "data/speakers/donaldrobertson.wav",
                    help='Speaker wav to use from the model')
args = parser2.parse_args([])

if args.out is None:
    from datetime import datetime
    timestamp = datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
    args.out = root_dir / 'out' / (sanitize(args.epub.stem).replace(' ', '_').lower() + ' ' + timestamp)

# load epib
parsed = parser.from_file(str(args.epub))
text = parsed["content"]
if args.test:
    text = text[:1000]


# make output directory
out_dir = Path(args.out)
if out_dir.exists():
    if not args.force:
        logger.warning('Output folder already exists. Use -f to overwrite.')
        exit(1)
    else:
        for f in out_dir.glob('*'):
            f.unlink()
        out_dir.rmdir()
out_dir.mkdir()
logger.info(f'Output folder: {out_dir}')


[32m2023-10-08 11:19:11.630[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m43[0m - [1mOutput folder: /media/wassname/SGIronWolf/projects5/tts-ai/use-tts-mjc/out/a_short_guide_to_the_inner_citadel_-_massimo_pigliucci20231008_03-19-11[0m


In [5]:


# write metadata to dir
from json_tricks import dump, dumps, load, loads, strip_comments
f_metadata = out_dir / 'metadata.json'
with open(f_metadata, 'w') as fo:
    dump(dict(
        epub_metadata=parsed['metadata'],
        args=args.__dict__,
        
    ), fo, indent=4)

# should be torch tensors containing 22.05kHz waveform data.
# see https://github.com/neonbjb/tortoise-tts/blob/5bbb0e0b97ea2f62c12e90402e8ad4faee55e697/tortoise/api.py#L365C82-L365C140
ref, INPUT_SAMPLE_RATE = torchaudio.load(args.speaker)
reference_clips = [ref[..., -400000:]] # take just the last ~12 seconds

# load model
use_cuda = False if args.test else torch.cuda.is_available()
logger.info(f'use_cuda {use_cuda}')


tts = TextToSpeech(use_deepspeed=True, kv_cache=True, half=True)


[32m2023-10-08 11:19:11.774[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1muse_cuda True[0m


[2023-10-08 11:19:20,690] [INFO] [logging.py:93:log_dist] [Rank -1] DeepSpeed info: version=0.8.3, git-hash=unknown, git-branch=unknown
[2023-10-08 11:19:20,692] [INFO] [logging.py:93:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1
Installed CUDA version 11.5 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination


Using /home/wassname/.cache/torch_extensions/py310_cu117 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/wassname/.cache/torch_extensions/py310_cu117/transformer_inference/build.ninja...
Building extension module transformer_inference...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)


ninja: no work to do.
Time to load transformer_inference op: 0.051093101501464844 seconds
[2023-10-08 11:19:21,249] [INFO] [logging.py:93:log_dist] [Rank -1] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 1024, 'intermediate_size': 4096, 'heads': 16, 'num_hidden_layers': -1, 'fp16': True, 'pre_layer_norm': True, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-05, 'mp_size': 1, 'q_int8': False, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': <ActivationFuncType.GELU: 1>, 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 1024, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False}
Installed CUDA version 11.5 does not match the version torch was compiled with 11.7 but since th

Loading extension module transformer_inference...
Using /home/wassname/.cache/torch_extensions/py310_cu117 as PyTorch extensions root...
No modifications detected for re-loaded extension module transformer_inference, skipping build step...
Loading extension module transformer_inference...


In [6]:
INPUT_SAMPLE_RATE
OUTPUT_SAMPLE_RATE = 24000


In [7]:
tokenizer = tts.tokenizer
segs = split_into_sentences(text, tokenizer)
waveforms = []
writer = Writer(out_dir)
for i, t in enumerate(tqdm(segs, desc='chunks')):
    t = t.replace('\n', ' ').strip()
    # Skip empty text
    if t == None or t == '':
        continue
    # check if contains words or numbers
    if not re.search('[a-zA-Z0-9]', t):
        logger.debug(f'Skipping text without words or numbers `{t}`')
        continue
    logger.debug(f'current sentence `{t}`')
    
    wav_t = tts.tts_with_preset(t, voice_samples=reference_clips, preset='fast', verbose=i==0) # ultra_fast, fast, standard
    wav = wav_t.cpu()
    waveforms.append(wav)
    
    len_wav = sum([w.shape[-1] for w in waveforms])
    if len_wav > 10000000//4:  # ~20G of RAM, ~2 minutes of audio output, ~7 minutes to generate
        wavs = torch.concat(waveforms, dim=-1).cpu().squeeze(0)
        wav_f = writer.write_chapter(wavs, OUTPUT_SAMPLE_RATE)
        logger.warning(f"wrote chapter {wav_f}")
        waveforms = []
        
if len(waveforms):  
    wavs = torch.concat(waveforms, dim=-1).cpu().squeeze(0)
    wav_f = writer.write_chapter(wavs)
    logger.warning(f"wrote chapter {wav_f}")
writer.close()


[32m2023-10-08 11:19:24.549[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36msplit_into_sentences[0m:[36m41[0m - [34m[1msplit lengths [22, 185, 169, 140, 123, 147, 18, 172, 30, 137, 112, 157, 174, 157, 104, 124, 125, 138, 23, 194, 120, 171, 197, 5, 126, 124, 134, 129, 166, 135, 174, 186, 130, 75, 75, 115, 97, 73, 73, 163, 114, 93, 143, 94, 147, 130, 24, 169, 142, 133, 73, 192, 136, 134, 73, 131, 89, 31, 60, 165, 1, 175, 120, 162, 96, 1, 189, 174, 59, 92, 163, 46, 150, 86, 176, 25, 196, 18, 124, 177, 139, 143, 96, 170, 51, 175, 191, 156, 186, 171, 99, 108, 17, 189, 39, 20, 144, 140, 161, 96, 82, 123, 187, 106, 116, 84, 194, 191, 110, 117, 184, 104, 140, 102, 155, 1, 197, 80, 95, 198, 191, 129, 193, 177, 113, 116, 144, 143, 158, 118, 124, 32, 190, 171, 158, 78, 148, 58, 152, 102, 135, 55, 177, 136, 138, 182, 24, 76, 158, 121, 154, 165, 172, 67, 104, 119, 123, 157, 189, 105, 43, 170, 58, 168, 190, 137, 199, 163, 41, 111, 17, 186, 112, 199, 170, 183, 149, 156, 131, 88, 160, 163, 

Generating autoregressive samples..




------------------------------------------------------
Free memory : 8.120789 (GigaBytes)  
Total memory: 10.731750 (GigaBytes)  
Requested memory: 1.687500 (GigaBytes) 
Setting maximum total tokens (input + output) to 1024 
------------------------------------------------------


100%|██████████| 12/12 [00:03<00:00,  3.42it/s]


Computing best candidates using CLVP


100%|██████████| 12/12 [00:01<00:00,  9.13it/s]


Transforming autoregressive outputs into audio..


100%|██████████| 80/80 [00:02<00:00, 27.28it/s]
chunks:   0%|          | 1/389 [00:10<1:09:21, 10.72s/it][32m2023-10-08 11:19:35.276[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `A Short Guide to The Inner Citadel On Pierre Hadot’s Classic Analysis of Marcus Aurelius’ Meditations By Massimo Pigliucci   © Massimo Pigliucci, 2021   A Short Guide to The Inner Citadel — On Pierre Hadot’s Classic Analysis of Marcus Aurelius’ Meditations`[0m
chunks:   1%|          | 2/389 [01:17<4:41:40, 43.67s/it][32m2023-10-08 11:20:42.010[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `By Massimo Pigliucci, K.D. Irani Professor of Philosophy, the City College of New York    Stoa Nova Publications   Cover: Pierre Hadot, Wikipedia   If you like this free booklet, please consider supporting my writings at Patreon or Medium figsinwinter.blog`[0m
chunks:   1%|          | 3/389 [02:17<5:27

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  17%|█▋        | 68/389 [50:42<3:58:49, 44.64s/it][32m2023-10-08 12:10:07.428[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. Rather, I wrote them down at random, in the order in which each matter presented itself to me.”`[0m
chunks:  18%|█▊        | 69/389 [51:02<3:23:40, 38.19s/it][32m2023-10-08 12:10:26.649[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `In the following century it was the turn of the Latin author Aulus Gellius to write his hypomnemata, which became known with the title of Attic Nights`[0m
chunks:  18%|█▊        | 70/389 [51:32<3:12:15, 36.16s/it][32m2023-10-08 12:10:57.221[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. In the preface he explains: “Whether I was reading a Greek or Latin book, or whether I had heard someone say something worthy of being remembered, I jotted d

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  22%|██▏       | 87/389 [1:04:54<4:53:27, 58.30s/it][32m2023-10-08 12:24:19.398[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `Hadot interestingly connects these rules to three distinct domains of reality (respectively: our faculty of judgment, universal nature, and human nature), as well as to three types of activity (respectively: judgment, desire, and impulse to action)`[0m
chunks:  23%|██▎       | 88/389 [1:05:54<4:53:53, 58.58s/it][32m2023-10-08 12:25:18.630[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. So, for instance, consenting to “fate” is logically connected to the Stoic understanding of universal nature, since the cosmos is a material ensemble governed by relations of cause and effect. This in turn is linked to desire, because such realization ought to affect what is proper or not proper for us to desire`[0m
chunks:  23%|██▎       | 89/389 [

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice c

chunks:  24%|██▍       | 94/389 [1:11:37<4:45:24, 58.05s/it][32m2023-10-08 12:31:02.428[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `’ll be lovers, misers, others who lust after consulate or kingship`[0m
chunks:  24%|██▍       | 95/389 [1:11:55<3:44:35, 45.83s/it][32m2023-10-08 12:31:19.758[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `.” (Meditations, IV.32)`[0m
chunks:  25%|██▍       | 96/389 [1:12:08<2:56:24, 36.12s/it][32m2023-10-08 12:31:33.222[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `The point here is that there is nothing new under the Sun. Not in the sense that the specifics aren’t different, of course they are. But human nature has remained pretty much the same. We still go after the same things and recoil from the same others`[0m
chunks:  25%|██▍       | 97/389 [1:12:58<3:16:20, 40.34

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  26%|██▋       | 103/389 [1:17:57<4:12:15, 52.92s/it][32m2023-10-08 12:37:21.648[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. I have to constantly reformulate ideas, come up with new metaphors and analogies. All of which helps me to internalize Stoic wisdom and, hopefully, live a better life.`[0m
chunks:  27%|██▋       | 104/389 [1:18:36<3:52:04, 48.86s/it][32m2023-10-08 12:38:01.025[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `4-The philosopher-slave and the emperor-philosopher       How many men - like Chrysippus, like Socrates, like Epictetus - has Eternity swallowed up! (Meditations, VII.19.2)`[0m
chunks:  27%|██▋       | 105/389 [1:19:25<3:51:33, 48.92s/it][32m2023-10-08 12:38:50.095[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `Eternity may have swallowed these men up, as Marcus Aure

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice c

chunks:  28%|██▊       | 107/389 [1:21:36<4:43:21, 60.29s/it][32m2023-10-08 12:41:00.937[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `Epictetus is hardly known these days, except for the very recent resurgence of Stoicism. But he was one of the most appreciated philosophers of antiquity up until the 19th century, and in his own time he was the great philosopher. He influenced early and later Christian thought, from Origen to Thomas Aquinas`[0m
chunks:  28%|██▊       | 108/389 [1:23:03<5:20:34, 68.45s/it][32m2023-10-08 12:42:28.428[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. As is well known, he began his life as a slave in Hierapolis (modern day Pamukkale, Turkey), was acquired by Nero’s secretary, Epaphroditus, and brought to Rome`[0m
chunks:  28%|██▊       | 109/389 [1:23:40<4:34:15, 58.77s/it][32m2023-10-08 12:43:04.607[0m | [34m[1mDEBUG   [0m | [3

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  31%|███       | 120/389 [1:33:15<4:06:21, 54.95s/it][32m2023-10-08 12:52:40.398[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. Then follows a rule that needs to be applied in specific cases, here the notion that if we imagine that we have been injured by an external, we are, in fact, mistaken (which also agrees with another dogma, that the only things that can truly injure us are our own bad judgments, since only those are under our control).`[0m


No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice c

chunks:  31%|███       | 121/389 [1:34:40<4:42:43, 63.30s/it][32m2023-10-08 12:54:05.165[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `The most crucial bit in chapter 4 of The Inner Citadel arrives when Hadot explains how Epictetus’ famous three disciplines (desire and aversion, action, and assent) are the key to reading the entire Meditations`[0m
chunks:  31%|███▏      | 122/389 [1:35:31<4:25:41, 59.71s/it][32m2023-10-08 12:54:55.893[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. We will talk about these in detail when we’ll get to chapters 6, 7, and 8, which are dedicated to each of the disciplines in turn. For now it is worth noting that the three disciplines are not found in any other Stoic writing, and appear therefore to be one of Epictetus’ original contributions to the philosophy`[0m
chunks:  32%|███▏      | 123/389 [1:36:35<4:30:27, 61.01s/it][32m2023

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  39%|███▉      | 152/389 [2:01:47<4:17:32, 65.20s/it][32m2023-10-08 13:21:11.864[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“It is quite remarkable that Epictetus is representing the moral life as a dialectical exercise, in which we engage in a dialogue with events, as they ask us questions. [quoting Epictetus:] ‘His ship sank.’ ‘What happened?’ ‘His ship sank.’ ‘He was sent to prison`[0m


No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  39%|███▉      | 153/389 [2:03:07<4:33:38, 69.57s/it][32m2023-10-08 13:22:31.636[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `.’ But if you add the proposition ‘a terrible thing happened to him,’ then that is coming from you.” (p. 85)`[0m
chunks:  40%|███▉      | 154/389 [2:03:31<3:39:26, 56.03s/it][32m2023-10-08 13:22:56.063[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `At this point Hadot introduces the classical distinction among the three topoi that Epictetus uses as foundational for his philosophy: desire and aversion, action, and assent`[0m
chunks:  40%|███▉      | 155/389 [2:04:14<3:22:42, 51.98s/it][32m2023-10-08 13:23:38.594[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. The first one has to do with developing the right desires (and aversions), that is with training ourselves to des

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  41%|████      | 159/389 [2:08:00<3:34:28, 55.95s/it][32m2023-10-08 13:27:25.458[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. From there it’s but a small step to directly connect the fields of studies and the disciplines, in this way, which has become standard in modern Stoicism after Hadot:`[0m
chunks:  41%|████      | 160/389 [2:08:45<3:20:05, 52.43s/it][32m2023-10-08 13:28:09.671[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `Physics <> Desire / Aversion Ethics <> Action Logic <> Assent`[0m
chunks:  41%|████▏     | 161/389 [2:09:01<2:38:36, 41.74s/it][32m2023-10-08 13:28:26.477[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `The idea is that an understanding of how the world works (physics) informs us about what is proper for us to desire (things under our control) or not (things not under 

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  42%|████▏     | 165/389 [2:12:43<3:23:18, 54.46s/it][32m2023-10-08 13:32:08.205[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. This point is controversial among modern Stoics -- particularly because Epictetus is so clear about assent coming last. But in reading Hadot, I’m inclined to agree with him: teaching philosophy is one thing, living it is another`[0m
chunks:  43%|████▎     | 166/389 [2:13:37<3:21:59, 54.35s/it][32m2023-10-08 13:33:02.301[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. While it is true that deploying logic to secure our assent only to propositions that are good for us to assent to is the most difficult and advanced task for the student of Stoicism, it’s not like we can live our lives for years before starting to use reason to assess impressions. Imperfectly, but we need to do it from the get go.`[0m
chunks:  43%|████▎     | 167/3

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  43%|████▎     | 168/389 [2:15:57<3:49:54, 62.42s/it][32m2023-10-08 13:35:22.170[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. All in the pursuit of a eudaimonic life, a life worth living:`[0m
chunks:  43%|████▎     | 169/389 [2:16:12<2:57:01, 48.28s/it][32m2023-10-08 13:35:37.467[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“The doctrine of the three exercises-themes, disciplines, or rules of life thus contains within itself the whole essence of Stoicism, recapitulated in a grandiose way.” (p. 100)`[0m
chunks:  44%|████▎     | 170/389 [2:16:56<2:50:52, 46.82s/it][32m2023-10-08 13:36:20.863[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `6-The discipline of assent`[0m
chunks:  44%|████▍     | 171/389 [2:17:07<2:11:20, 36.15s/it][32m2023-10-08 13:36:32.127[0m | [34m[1mDEBUG   [0m | [36m

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  46%|████▌     | 177/389 [2:24:15<3:59:08, 67.68s/it][32m2023-10-08 13:43:40.473[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `The important bit here is that these images in our mind are accompanied by an inner discourse, or a pre-judgment. Like: chocolate cake (from sensation) + “chocolate cake is good!” (inner discourse) = pre-reflective desire for chocolate cake (representation)`[0m


No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  46%|████▌     | 178/389 [2:25:29<4:04:32, 69.54s/it][32m2023-10-08 13:44:54.348[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. The notion, then, is that we can give or withhold assent to these representations, essentially by confirming or challenging the pre-reflective inner discourse. Like this: “no, chocolate cake is not good, because I’m diabetic.”`[0m
chunks:  46%|████▌     | 179/389 [2:26:22<3:45:21, 64.39s/it][32m2023-10-08 13:45:46.711[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `That’s why Marcus often reminds himself of the difference between the row image (which is emotionally neutral) and the judgment (which is not):`[0m
chunks:  46%|████▋     | 180/389 [2:26:47<3:03:09, 52.58s/it][32m2023-10-08 13:46:11.747[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“Don’t tell yourself anyth

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  48%|████▊     | 186/389 [2:32:44<3:31:36, 62.54s/it][32m2023-10-08 13:52:08.727[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. Another way to say this is that for the Stoics our freedom is circumscribed to our freedom of thought. And nothing else, because everything else does not depend entirely on us. Accordingly, Epictetus famously says:`[0m
chunks:  48%|████▊     | 187/389 [2:33:39<3:22:49, 60.25s/it][32m2023-10-08 13:53:03.615[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“What troubles people is not things, but their judgments about things.” (Enchiridion 5)   Even modern critics of Stoicism often make the mistake to think that the Stoics artificially separated emotions and reason. Nope, that was Plato’s mistake, as Hadot makes very clear:`[0m


No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  48%|████▊     | 188/389 [2:34:35<3:17:39, 59.00s/it][32m2023-10-08 13:53:59.720[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“[For the Stoics] there is no opposition, as the Platonists had held, between one part of the soul which is rational and good in and of itself, and another part which is irrational and bad. Rather, it is reason -- and the ego itself -- which becomes either good or bad, as a function of the judgments which it forms about things.” (p`[0m


No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  49%|████▊     | 189/389 [2:35:55<3:38:02, 65.41s/it][32m2023-10-08 13:55:20.086[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. 109)`[0m
chunks:  49%|████▉     | 191/389 [2:37:02<2:49:29, 51.36s/it][32m2023-10-08 13:56:27.035[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. All we are saying here is that there is a dynamic set of mental executive processes that make decisions for the organism. The existence of such set is a scientific fact, not a metaphysically dubious construct.`[0m
chunks:  49%|████▉     | 192/389 [2:37:57<2:52:39, 52.59s/it][32m2023-10-08 13:57:22.485[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `What is the point of practicing the discipline of assent? Hadot explains:   “Thanks to the discipline of assent, the transformation of our consciousness of the world brings about a

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice c

chunks:  51%|█████     | 197/389 [2:43:20<3:28:58, 65.30s/it][32m2023-10-08 14:02:44.564[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `IV. The course of events. The universe, for the Stoics (and for modern science), unfolds by way of a complex web of cause and effect, of which we are a part, but a tiny and rather uninfluential part. It then makes sense to focus on those parts we can actually influence and let the rest be`[0m


No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  51%|█████     | 198/389 [2:44:34<3:36:14, 67.93s/it][32m2023-10-08 14:03:58.611[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. Again, we don’t really have a choice, so what’s the point of complaining about it?`[0m
chunks:  51%|█████     | 199/389 [2:44:49<2:45:03, 52.13s/it][32m2023-10-08 14:04:13.867[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `The last bit is where a famous passage from Epictetus, which has inspired the title of this site, comes into play:`[0m
chunks:  51%|█████▏    | 200/389 [2:45:12<2:17:09, 43.54s/it][32m2023-10-08 14:04:37.375[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“Remember that what you love is mortal, and that nothing of what you love belongs to you in the proper sense of the term. It has been given to you for the time being, not forever or in such a way tha

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  54%|█████▍    | 210/389 [2:53:34<2:41:43, 54.21s/it][32m2023-10-08 14:12:59.533[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `., automatic, instinctive reactions to events, then they are what they are, and they are not going to change. But the focus here is on the “passions,” in Stoic lingo, i.e., on the fully formed emotions, which have a cognitive component, as confirmed by modern psychological research`[0m
chunks:  54%|█████▍    | 211/389 [2:54:51<3:00:26, 60.82s/it][32m2023-10-08 14:14:15.779[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. And if they have a cognitive component, then we can change them by altering that component. It is the same principle as cognitive behavioral therapy: change the way you think and that will change (over time, with repetition and effort) the way you feel.`[0m
chunks:  54%|█████▍    | 212/389 [2:55:50<2:58:16, 60.43s

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  57%|█████▋    | 220/389 [3:02:55<2:44:29, 58.40s/it][32m2023-10-08 14:22:20.044[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. The mistake most of us normally make is to desire or be averse to things we don’t actually control (Yes, a new car! No, a disease!). Marcus puts this squarely in the context of Stoic determinism:`[0m
chunks:  57%|█████▋    | 221/389 [3:03:50<2:40:16, 57.24s/it][32m2023-10-08 14:23:14.589[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“So something has happened to you? Good! Every event that you encounter has been linked to you by Destiny, and has, since the beginning, been woven together with you from the All.” (Meditations, IV.26)`[0m
chunks:  57%|█████▋    | 222/389 [3:05:00<2:50:03, 61.10s/it][32m2023-10-08 14:24:24.687[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence 

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  58%|█████▊    | 224/389 [3:07:25<3:04:31, 67.10s/it][32m2023-10-08 14:26:50.082[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. If we understand that we live in a deterministic cosmos regulated by universal laws, then we also understand what the proper attitude should be toward events. That’s also why Epictetus says:`[0m
chunks:  58%|█████▊    | 225/389 [3:08:09<2:44:33, 60.20s/it][32m2023-10-08 14:27:34.184[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“Do not seek for things to happen the way you want them to; rather, wish that what happens happen the way it happens: then you will be happy.” (Enchiridion, 8)`[0m
chunks:  58%|█████▊    | 226/389 [3:08:43<2:21:49, 52.20s/it][32m2023-10-08 14:28:07.723[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `You can see why this sounds very much like Nie

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  60%|█████▉    | 232/389 [3:14:34<2:27:52, 56.51s/it][32m2023-10-08 14:33:58.617[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [34m[1mSkipping text without words or numbers `.`[0m
[32m2023-10-08 14:33:58.618[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `This discussion is closely related to the famous “gods or atoms” moments that recur in the Meditations, where Marcus — who is definitely on board with Stoic metaphysics (i.e., with the “gods” option) — does nonetheless entertain the possibility that the Epicureans are correct (i.e`[0m
chunks:  60%|██████    | 234/389 [3:15:42<1:59:18, 46.18s/it][32m2023-10-08 14:35:06.886[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `., the “atoms” alternative), and concludes that ultimately he still has to behave properly (i.e., virtuously) toward other human beings. I will not go int

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  62%|██████▏   | 241/389 [3:22:35<2:35:33, 63.07s/it][32m2023-10-08 14:42:00.378[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“Imagine them as they are when they are eating, when they are sleeping, when they are making love, or going to the bathroom. Then imagine them when they are putting on airs; when they make those haughty gestures, or when they get angry and upbraid people with such a superior air.” (Meditations, IX.9)`[0m
chunks:  62%|██████▏   | 242/389 [3:23:40<2:35:55, 63.64s/it][32m2023-10-08 14:43:05.388[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `In reality, here and in several other passages, Marcus is simply deploying the standard Stoic technique of adopting a broader, more neutral perspective, forcing himself to redescribe things in a more objective, less emotional way`[0m
chunks:  62%|██████▏   | 243/389 [3:24:30<2:24:29, 59.38s/it][3

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  64%|██████▍   | 250/389 [3:29:59<2:02:45, 52.99s/it][32m2023-10-08 14:49:24.119[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `It is the trained mind of the philosopher`[0m
chunks:  65%|██████▍   | 251/389 [3:30:11<1:33:17, 40.56s/it][32m2023-10-08 14:49:35.650[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `(here, of course, in the broad sense of somehow who studies and practices philosophy, not in the technical sense of a professional academic philosopher) that can accept without surprise or complaint the mud on the road he travels, and at the same time appreciate the unexpected beauty of baked bread`[0m
chunks:  65%|██████▍   | 252/389 [3:31:13<1:47:38, 47.14s/it][32m2023-10-08 14:50:38.166[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [34m[1mSkipping text without words or numbers `.`[0m
[32m2023-10-08 14:50:38.166

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  71%|███████▏  | 278/389 [3:50:34<1:57:36, 63.57s/it][32m2023-10-08 15:09:59.423[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `Following a pretty tight line of reasoning, discussion of the fact that intentions to act are under our control leads Hadot to focus on the complementary fact that the outcomes of such actions, by contrast, are not under our (complete) control. Which is why Stoics are supposed to begin anything they do with a “reserve clause`[0m
chunks:  72%|███████▏  | 279/389 [3:51:47<2:01:38, 66.35s/it][32m2023-10-08 15:11:12.263[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `.” As Seneca says:`[0m
chunks:  72%|███████▏  | 280/389 [3:51:56<1:29:21, 49.18s/it][32m2023-10-08 15:11:21.392[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“I want to do thus and so, as long as nothing happens

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  73%|███████▎  | 285/389 [3:57:22<1:48:30, 62.60s/it][32m2023-10-08 15:16:46.606[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. That which impeded action thus becomes profitable to action, and that which blocked the road allows me to advance along the road.” (V.20.2)`[0m
chunks:  74%|███████▎  | 286/389 [3:57:57<1:33:37, 54.53s/it][32m2023-10-08 15:17:22.328[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `One of the fundamental attitudes that is “appropriate” to a human being, and therefore falls into our duties, is benevolence. Seneca wrote a whole book entitled On Benefits, where he says that the benefactor should not consider the person that he is helping as somehow in debt to him. Marcus agrees:`[0m
chunks:  74%|███████▍  | 287/389 [3:59:03<1:38:10, 57.75s/it][32m2023-10-08 15:18:27.572[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  76%|███████▌  | 294/389 [4:05:14<1:25:29, 54.00s/it][32m2023-10-08 15:24:38.749[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. (Interestingly, the Greek koinonikon is the same word used by early Christians to refer to the chanting during the sacrament of communion.)`[0m
chunks:  76%|███████▌  | 295/389 [4:05:47<1:14:45, 47.72s/it][32m2023-10-08 15:25:11.822[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“For rational animals, action in conformity with nature is at the same time in conformity with reason.” (VII.11)   And:`[0m
chunks:  76%|███████▌  | 296/389 [4:06:20<1:07:04, 43.28s/it][32m2023-10-08 15:25:44.730[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“My City and my Fatherland, insofar as I am an Antonine, is Rome. My City and my Fatherland, insofar as I am a man, is the world. Everyth

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice c

chunks:  77%|███████▋  | 301/389 [4:10:54<1:17:50, 53.07s/it][32m2023-10-08 15:30:19.170[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. We have already discussed the first two points, but what things have value, according to the Stoics?`[0m
chunks:  78%|███████▊  | 302/389 [4:11:15<1:02:44, 43.27s/it][32m2023-10-08 15:30:39.579[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `Broadly speaking, there are three categories of things that have the sort of value that makes a given action “appropriate” (or, to put it otherwise, a duty):`[0m
chunks:  78%|███████▊  | 303/389 [4:11:45<56:37, 39.51s/it]  [32m2023-10-08 15:31:10.309[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `(a) Things that are an integral part of living according to nature, and are therefore inherently virtuous. These include exercises of self

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice c

chunks:  84%|████████▍ | 327/389 [4:32:32<1:03:14, 61.21s/it][32m2023-10-08 15:51:57.162[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `9-Marcus Aurelius — the man himself`[0m
chunks:  84%|████████▍ | 328/389 [4:32:47<48:11, 47.40s/it]  [32m2023-10-08 15:52:12.353[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `We are now at the end of Pierre Hadot’s The Inner Citadel: The Meditations of Marcus Aurelius. It’s a long and difficult book, but it’s a crucial entry in the modern Stoic literature, which is why I spent so much time — and really put to the test my readers’ patience, I'm afraid — with this series`[0m


No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  85%|████████▍ | 329/389 [4:34:13<58:59, 58.99s/it][32m2023-10-08 15:53:38.365[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. In this last post I will skip the short chapter 9, on “Virtue and Joy,” and focus on selected passages of the very long chapter 10, “Marcus Aurelius in his Meditations,” where Hadot does his best to glean the character of the man behind the philosophy`[0m
chunks:  85%|████████▍ | 330/389 [4:35:09<56:58, 57.94s/it][32m2023-10-08 15:54:33.873[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. However, this isn’t just a biographical chapter, as Marcus’ character, life, and philosophy are deeply intertwined. Which means we are just as likely to learn about the man from his philosophy and life as we are about the philosophy by looking at how this extraordinary man attempted to put it into practice throughout his life.`[0m
chunks:  85%|██

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice c

chunks:  86%|████████▌ | 333/389 [4:38:37<1:03:03, 67.56s/it][32m2023-10-08 15:58:01.963[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `After a detour on the question of whether Marcus was an opium addict (not likely), and another one on Marcus stylistic elegance (he was a very good writer), Hadot attempts to derive some chronological signposts from the Meditations, a book that comes across as rather atemporal`[0m
chunks:  86%|████████▌ | 334/389 [4:39:33<58:46, 64.12s/it]  [32m2023-10-08 15:58:58.075[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `(which is probably one of the reasons it keeps being fascinating almost two millennia after it was written)`[0m
chunks:  86%|████████▌ | 335/389 [4:39:52<45:39, 50.73s/it][32m2023-10-08 15:59:17.543[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. We know that 

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.


chunks:  94%|█████████▍| 366/389 [5:06:32<32:35, 85.03s/it][32m2023-10-08 16:25:56.571[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `. All these men have in common the fact that they put their lives on the line to fight against tyranny and for what they regarded as liberty (albeit usually limited to the male dominant class). It is highly indicative that Marcus mentions them with admiration. Accordingly, Marcus articulates his own ideal for how to run the Roman state:`[0m
chunks:  94%|█████████▍| 367/389 [5:07:51<30:34, 83.37s/it][32m2023-10-08 16:27:16.057[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `“A State in which the laws are equal for all, administered on the basis of equality and freedom of speech, and of a monarchy that respects the freedom of its subjects above all else.” (I.14.2)`[0m
chunks:  95%|█████████▍| 368/389 [5:08:49<26:30, 75.75s/it][32m2023

No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice clips. This typically means the spoken audio is too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, try breaking up your input text.
No stop tokens found in one of the generated voice c

chunks:  99%|█████████▉| 387/389 [5:24:02<01:43, 51.68s/it][32m2023-10-08 16:43:26.672[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `8-The discipline of action, in the service of humanity 	9-Marcus Aurelius — the man himself 	About the Author`[0m
chunks: 100%|█████████▉| 388/389 [5:24:27<00:43, 43.75s/it][32m2023-10-08 16:43:51.929[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [34m[1mcurrent sentence `function Body_onLoad() { }`[0m
chunks: 100%|██████████| 389/389 [5:24:37<00:00, 50.07s/it]


AttributeError: 'list' object has no attribute 'cpu'

In [None]:
# # Test

# len_wav = sum([w.shape[-1] for w in waveforms])
# print(len_wav)

# wavs = torch.concat(waveforms, dim=-1).cpu().squeeze(0)
# writer.write_chapter(wavs)


In [22]:
def join_folder(folder: Path):
    files = sorted(folder.glob('*.ogg'))
    tensors = [torchaudio.load(f)[0] for f in files]
    tensor = torch.concat(tensors, 1)
    f = str(folder) + '.ogg'
    torchaudio.save(f, tensor, 24000)
    return f
    
f = join_folder(out_dir)
print(f"saved final file to {f}")


'/media/wassname/SGIronWolf/projects5/tts-ai/use-tts-mjc/out/a_short_guide_to_the_inner_citadel_-_massimo_pigliucci20231008_03-19-11.ogg'