In [None]:
from fastai import *        # Quick accesss to most common functionality
from fastai.text import *   # Quick accesss to NLP functionality

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
path

PosixPath('/home/ubuntu/notebooks/data/imdb_sample')

In [None]:
def open_text(fn:PathOrStr):
    with open(fn,'r') as f: return ''.join(f.readlines())

In [None]:
class Category(ItemBase):
    def __init__(self, idx, cat): self.data,self.cat = idx,cat
    def __str__(self):  return str(self.cat)

class CategoryList(ItemList):
    def __init__(self, items:Iterator, classes:Collection=None):
        super().__init__(items)
        if classes is None: classes = uniqueify(items)
        self.classes = classes
        self.class2idx = {v:k for k,v in enumerate(self.classes)}
        
    def new(self, items):
        return self.__class__(items, self.classes)
    
    def get(self, o): return Category(self.class2idx[o], o)

In [None]:
class MultiCategory(Category):
    def __str__(self):  return ';'.join(map(str, self.cat))

class MultiCategoryList(CategoryList):
    def __init__(self, items:Iterator, classes:Collection=None, sep=None):
        if sep is not None: items = array(list(csv.reader(items, delimiter=sep)))
        if classes is None: classes = uniqueify(np.concatenate(items))
        super().__init__(items, classes)
    
    def get(self, o):
        return MultiCategory([self.class2idx[it] for it in o], o)

In [None]:
def _treat_html(o:str)->str:
    return o.replace('\n','\\n')

def _text2html_table(items:Collection[Collection[str]], widths:Collection[int])->str:
    html_code = f"<table>"
    for w in widths: html_code += f"  <col width='{w}%'>"
    for line in items:
        html_code += "  <tr>\n"
        html_code += "\n".join([f"    <th>{_treat_html(o)}</th>" for o in line if len(o) >= 1])
        html_code += "\n  </tr>\n"
    return html_code + "</table>\n"

In [None]:
class Text(ItemBase):
    def __init__(self, ids, text): self.data,self.text = ids,text
    def __str__(self):  return str(self.text)
    
    def show_batch(self, idxs:Collection[int], rows:int, ds:Dataset, figsize:Tuple[int,int]=(9,10))->None:
        from IPython.display import clear_output, display, HTML
        items = [['text', 'label']]
        for i in idxs[:rows]:
            x,y = ds[i]
            items.append([str(x), str(y)])
        display(HTML(_text2html_table(items, [90,10])))

class NumericalizedTextList(ItemList):
    def __init__(self, items:Iterator, vocab:Vocab=None, create_func:Callable=None, path:PathOrStr='.'):
        super().__init__(items, create_func, path)
        self.vocab = vocab
        
    def new(self, items):
        return self.__class__(items, self.vocab)
    
    def get(self, o): return Text(o, self.vocab.textify(o))
    
class TokenizedTextList(NumericalizedTextList):
    
    def preprocess(self, vocab:Vocab=None, max_vocab:int=60000, min_freq:int=2):
        self.vocab = ifnone(vocab, Vocab.create(self.items, max_vocab, min_freq))
        self.preprocess_kwargs = {'vocab': self.vocab}
        self.items = np.array([self.vocab.numericalize(t) for t in self.items])
    
class TextList(TokenizedTextList):
    def preprocess(self, tokenizer:Tokenizer=None, chunksize:int=10000, vocab:Vocab=None, 
                 max_vocab:int=60000, min_freq:int=2):
        tokenizer = ifnone(tokenizer, Tokenizer())
        tokens = []
        for i in progress_bar(range(0,len(self.items),chunksize), leave=False):
            tokens += tokenizer.process_all(self.items[i:i+chunksize])
        self.items = tokens
        super().preprocess(vocab, max_vocab, min_freq)
    
class TextFilesList(TextList):
    def __init__(self, items:Iterator, create_func:Callable=None, path:PathOrStr='.'):
        texts = [open_text(fn) for fn in items]
        super().__init__(texts, create_func, path)

In [None]:
il = TextList.from_csv(path, 'texts.csv', create_func=None, col='text')

In [None]:
ll = il.label_from_df(CategoryList, cols=0)

In [None]:
sd = ll.random_split_by_pct()

In [None]:
sd = sd.preprocess()

In [None]:
data = sd.databunch()

In [None]:
data.show_batch()

text,label
"sam mraovich should never be allowed to touch a camera again . if he does he should be arrested on the spot ... at the very least for petty xxunk . anybody who pays even a dime to rent any of his garbage should file a claim and be xxunk . this was innocently my first viewing of his "" xxunk ... and it will my last . ed wood looks awfully good to me right now . \n\n when i return this piece of crap to the video store , i will personally ask that it be taken off the shelf . an active xxunk of gay cinema , i am xxunk and xxunk that this warped , xxunk xxunk man - child be allowed to xxunk and xxunk something like this , with a xxunk pretty boy on the cover ( jamie xxunk xxunk , who , thankfully , has no other acting credits in imdb ) and an interesting synopsis on the back used as bait , and then market it as a "" movie "" rental . trust me , this has no place being on any rack anywhere ; it is simply not a movie in any sense of the word . offensive , irresponsible junk such as this can only be detrimental to the efforts being made to promote and support gay cinema ( hell , gay rights in general ! ) for those xxunk to rent this out because of the cover , you xxup will be disappointed . xxunk is not as xxunk to look at on film as he is on the cover , and he appears once or twice without a shirt -- that 's it . instead , the xxunk xxunk xxunk on us his own disgusting , sorry - looking xxunk - boy xxunk . \n\n this "" thing "" he "" xxunk "" is a xxunk xxunk project for mraovich . both he and his friend michael xxunk ( who , i understand , puts out similar sleazy garbage ) are terrible in this . mraovich is xxunk posing as a complete no - talent ( in all fields ) , desperate to grab onto any "" loser "" attention he can for himself . he is to be xxunk .",negative
"director / writer michael winner 's feature is a better than expected xxunk supernatural horror film ( although still schlock xxunk xxunk for ) , which really does by go xxunk . sure it might borrow ideas from other similar xxunk horror movies of this period , but still manages to bring its own psychological xxunk to the xxunk material ( of good vs. evil ) and a unique vision that has a fair share of impressively xxunk , if somewhat xxunk set - pieces . as a whole it 's xxunk , however remains intriguing by xxunk an ominous charge without going gang - xxunk with the scares . actually there 's always something going on amongst its busy xxunk , but it 's rather down - played with its xxunk xxunk to xxunk patterns and atmospheric xxunk , up until its vividly xxunk and grisly climax with a downbeat revelation . winner 's dressed up xxunk might feel xxunk , however it 's the ensemble cast that really holds it together as you try to spot the faces . there 's plenty too . some having more to do with the scheme of things than others , but there 's no doubts every one of them are committed , despite the xxunk crude nature of it all . it 's interesting to see names like xxunk miles ( who 's significantly creepy ! ) , beverly xxunk ( likewise ) , xxunk xxunk , xxunk xxunk , christopher walken , william xxunk ( a neat xxunk ) , jeff goldblum , jerry orbach and tom xxunk in bit parts . then you got a mild - mannered chris xxunk and xxunk gorgeous cristina raines in the leads . offering able support xxunk xxunk , martin xxunk , xxunk xxunk , john carradine , xxunk xxunk and arthur kennedy . the script does throw around many characters , as well as notions but gets xxunk xxunk by trying to xxunk all of it in . however it 's xxunk air works in its favour in establishing the xxunk and xxunk of what 's really going on here . is there a reason for all of this , and why is it surrounding raines ' character ? the emphasis is mainly built upon that moody angle , as it begins to slowly shed light of her inner xxunk and that of the strange / worrying experiences she encounters when she 's moves into her new apartment . this is where winner tries to pull out the xxunk shades , which projects some xxunk moments . xxunk xxunk was the man responsible for the grand , xxunk xxunk score that never misses a xxunk and richard xxunk xxunk xxunk the sweeping , xxunk - like photography .",positive
"this movie is witty , watchable and utterly touching . and now often do you get to see jean harlow ( or any actress of this era , for that matter ) give another woman a swift punch in the xxunk ? ( twice ! ) \n\n after harlow 's ruby is sent to a xxunk after getting mixed up with gable 's edward hall ( he of that cheesy yet endearing crooked smile ) , her xxunk becomes all the more complicated when she discovers that she is pregnant , and she 's convinced that this xxunk has abandoned her , but in fact , her love has xxunk him and he comes to see her , despite the fact that he will be arrested , and from the help of a xxunk , are married . \n\n the wonderful relationship that harlow shares with her fellow xxunk is second only to her electric chemistry with gable , who was her most frequent leading man . her cynical character is a perfect match for gable 's smooth - talking xxunk . what 's not to like ? \n\n "" you know , you would n't be a bad looking dame - if it was n't for your face ! "" ruby xxunk remarks to gypsy , her xxunk . "" if you 're going to get that close to me , i 'll have to open the other window ! "" \n\n priceless ! ! !",positive
"i 've seen a movie that s sort of like this , were a xxunk drugs woman and he then picks there nose with a knife and xxunk there nose to xxunk . he then xxunk there tongue and eats it . \n\n the most gruesome part of the movie is were he cuts there left eye out and starts dancing with it . he then starts to eat the woman naked . \n\n ( i 'm not sure what the movies called but i know it 's a cult movie and that it was made in germany ) . \n\n anyway xxup the xxup nose xxup xxunk is fairly crap . \n\n its a crap movie and the picture and xxunk quality is very rubbish . \n\n please do n't waste you 're time buying and watching this movie its totally crap . \n\n i prefer xxup day of xxup the xxup woman also known as i xxup spit on xxup your xxup grave ( its one of the best cult movies ever ) check out this link xxunk : / / xxunk / title / xxunk /",negative
"i wo n't waste a whole lot of time of this one because as far as i 'm concerned it is n't really a movie to start with , just a xxunk xxunk - xxunk of borrowed footage and embarrassingly amateurish new footage made solely for the purpose of xxunk the whole mess together and call it a "" boogeyman "" sequel . literally 80 % of this film is stolen from its far superior predecessor "" the boogeyman "" , a film that the writers of this garbage apparently did n't even bother to watch because they could n't even get actress xxunk love 's original character 's name ( xxunk ) right . and to add insult to xxunk the killer is invisible in the original footage and xxunk in the new footage , apparently they think their audience is as stupid as they are . 0 out of 10 and i wish imdb 's rating system went that low , the most xxunk and xxunk attempt to rip off people 's money i 've even seen , xxup you xxup have xxup been xxup warned !",negative
"come on . the new twist is nearly ok , but from xxunk the elm street children freddy is just killing people now . more of the same : special effects with no actual character development or anything . simply bad and xxunk . xxup scary .. ? nope . not at all . just bad .",negative
"someone has already mentioned "" being at the right time at the right place "" it was so true for this documentary that i had doubts about the xxunk of the scenes and thought it included perhaps some acting but it is not . it is all real . the story is nothing new for the people of the developing and / or poor countries . it xxunk light on the xxunk of the people by xxunk media , the xxunk , the artificial xxunk of the people by deliberately creating tension on the streets , sometimes to the point that the army , intelligence xxunk or even the xxunk believe , led by the us ) uses agents who attack "" any "" side to provoke the masses into violence and therefore xxunk their coups . a xxunk officer in the film mentions this also . that they wanted to see the peoples confront on the streets . all of these scenarios have been played in turkey ( xxup usa 's pet dog in the middle east ) throughout its history who has experienced 3 coups and xxunk , secret plans made by the xxunk army have been exposed , ironically through a pro - government religious / conservative media xxunk the a - religious xxunk of the army , in which a very important xxunk is xxunk by an army agent to provoke the people etc . \n\n what makes this film unique is that they were filming from inside , perhaps by chance , when the events have happened . it is clear that the directors are pro chavez . whether or not this caused the directors to xxunk and xxunk the events and the information , i would not know . \n\n and whether chavez will be xxunk the people of xxunk against the xxunk of us and the global economy without xxunk any xxunk thought with force and in the end becoming a self - indulgent xxunk , history will tell . \n\n but at least chavez is hope and i believe it is worth taking the chance .",positive
"what can i say , this is a piece of brilliant film - making that should have won an oscar . a copy should be kept safe in a secure xxunk for xxunk . it should be required viewing for all high school students across the world . sam mraovich is a genius , perhaps the most genius writer / director / producer / xxunk / xxunk / xxunk xxunk to ever grace the cinema world with his art . \n\n where do i begin with this one ? every xxunk of ben and arthur was so completely breathtaking ! and mraovich as arthur , wow , he is so attractive i 'm surprised he did n't go for mr . universe . i could n't contain myself during the nude scene . i xxunk this movie to my brother and he called me on the phone saying how arthur 's nude scene turned him gay . i am totally supportive of course , because of this film and it 's beautifully crafted lessons in xxunk . why just yesterday i burned down a church and i wrote "" for sam and arthur "" in its xxunk xxunk . \n\n the cinematography was the best thing about this film . when that fed - ex plane took to the xxunk xxunk the palm trees of xxunk , i xxunk ! why , i never even knew they had palm trees in xxunk or that people could travel on fed - ex xxunk before this film . it opened my eyes to a new realm of possibilities . this film inspired me to xxunk in sam mraovich 's school of screen writing , acting , directing , xxunk , casting , producing , production design and real estate . i just want to say , "" thank you , mr . mraovich . thank you for bringing this creation into the world . we can never re - pay you enough . """,negative
