In [None]:
from fastai import *        # Quick accesss to most common functionality
from fastai.text import *   # Quick accesss to NLP functionality

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
path

PosixPath('/home/jhoward/.fastai/data/imdb_sample')

In [None]:
def open_text(fn:PathOrStr):
    with open(fn,'r') as f: return ''.join(f.readlines())

In [None]:
def _treat_html(o:str)->str:
    return o.replace('\n','\\n')

def _text2html_table(items:Collection[Collection[str]], widths:Collection[int])->str:
    html_code = f"<table>"
    for w in widths: html_code += f"  <col width='{w}%'>"
    for line in items:
        html_code += "  <tr>\n"
        html_code += "\n".join([f"    <th>{_treat_html(o)}</th>" for o in line if len(o) >= 1])
        html_code += "\n  </tr>\n"
    return html_code + "</table>\n"

In [None]:
class Text(ItemBase):
    def __init__(self, ids, text): self.data,self.text = ids,text
    def __str__(self):  return str(self.text)
    
    def show_batch(self, idxs:Collection[int], rows:int, ds:Dataset, figsize:Tuple[int,int]=(9,10))->None:
        from IPython.display import clear_output, display, HTML
        items = [['text', 'label']]
        for i in idxs[:rows]:
            x,y = ds[i]
            items.append([str(x), str(y)])
        display(HTML(_text2html_table(items, [90,10])))

class NumericalizedTextList(ItemList):
    def __init__(self, items:Iterator, vocab:Vocab=None, **kwargs):
        super().__init__(items, **kwargs)
        self.vocab = vocab
        
    def new(self, items:Iterator, **kwargs)->'NumericalizedTextList':
        return super().new(items=items, vocab=self.vocab, **kwargs)
    
    def get(self, i):
        o = super().get(i)
        return Text(o, self.vocab.textify(o))
    
class TextList(NumericalizedTextList):
    def __post_init__(self):
        if self.processor is None: self.processor=TextProcessor()
        
class TextFilesList(TextList):
    def __init__(self, items:Iterator, create_func:Callable=None, path:PathOrStr='.'):
        texts = [open_text(fn) for fn in items]
        super().__init__(texts, create_func, path)

In [None]:
class TextProcessor(PreProcessor):
    def __init__(self, tokenizer:Tokenizer=None, chunksize:int=10000,
                 vocab:Vocab=None, max_vocab:int=60000, min_freq:int=2):
        self.chunksize,self.max_vocab,self.min_freq = chunksize,max_vocab,min_freq
        self.tokenizer,self.vocab = ifnone(tokenizer, Tokenizer()),vocab

    def process(self, ds):
        tokens = []
        for i in progress_bar(range(0,len(ds),self.chunksize), leave=False):
            tokens += self.tokenizer.process_all(ds.items[i:i+self.chunksize])
        ds.items = tokens
        if self.vocab is None: self.vocab = Vocab.create(ds.items, self.max_vocab, self.min_freq)
        ds.vocab = self.vocab
        ds.items = np.array([self.vocab.numericalize(t) for t in ds.items])

In [None]:
il = (TextList.from_csv(path, 'texts.csv', create_func=None, col='text')
        .random_split_by_pct()
        .label_from_df(cols=0)
     )

In [None]:
len(il.valid.vocab.itos), len(il.train.vocab.itos)

(6092, 6092)

In [None]:
df = pd.read_csv(path/'texts.csv')
df.head()

Unnamed: 0,label,text,is_valid
0,negative,Un-bleeping-believable! Meg Ryan doesn't even ...,False
1,positive,This is a extremely well-made film. The acting...,False
2,negative,Every once in a long while a movie will come a...,False
3,positive,Name just says it all. I watched this movie wi...,False
4,negative,This movie succeeds at being one of the most u...,False


In [None]:
il.add_test(df['text'].values);

In [None]:
il.train.y.classes,il.valid.y.classes,il.test.y.classes

(['negative', 'positive'], ['negative', 'positive'], ['negative', 'positive'])

In [None]:
len(il.valid.vocab.itos), len(il.train.vocab.itos), len(il.test.vocab.itos)

(6092, 6092, 6092)

In [None]:
data = il.databunch()

In [None]:
data.show_batch(ds_type=DatasetType.Test)

text,label
"un - xxunk - believable ! meg ryan does n't even look her usual xxunk lovable self in this , which normally makes me forgive her shallow xxunk acting xxunk . hard to believe she was the producer on this dog . plus kevin kline : what kind of suicide trip has his career been on ? xxunk ... xxunk ! ! ! finally this was directed by the guy who did big xxunk ? must be a replay of jonestown - hollywood style . xxunk !",negative
"this is a extremely well - made film . the acting , script and camera - work are all first - rate . the music is good , too , though it is mostly early in the film , when things are still relatively xxunk . there are no really xxunk in the cast , though several faces will be familiar . the entire cast does an excellent job with the script . \n\n but it is hard to watch , because there is no good end to a situation like the one presented . it is now xxunk to blame the british for setting hindus and muslims against each other , and then xxunk xxunk them into two countries . there is some merit in this view , but it 's also true that no one forced hindus and muslims in the region to xxunk each other as they did around the time of partition . it seems more likely that the british simply saw the xxunk between the xxunk and were clever enough to exploit them to their own ends . \n\n the result is that there is much cruelty and inhumanity in the situation and this is very unpleasant to remember and to see on the screen . but it is never painted as a black - and - white case . there is xxunk and xxunk on both sides , and also the hope for change in the younger generation . \n\n there is redemption of a sort , in the end , when xxunk has to make a hard choice between a man who has ruined her life , but also truly loved her , and her family which has xxunk her , then later come looking for her . but by that point , she has no xxunk that is without great pain for her . \n\n this film carries the message that both muslims and hindus have their grave faults , and also that both can be xxunk and caring people . the reality of partition makes that xxunk all the more wrenching , since there can never be real xxunk across the india / pakistan border . in that sense , it is similar to "" mr & mrs xxunk "" . \n\n in the end , we were glad to have seen the film , even though the resolution was heartbreaking . if the uk and us could deal with their own xxunk of racism with this kind of xxunk , they would certainly be better off .",negative
"every once in a long while a movie will come along that will be so awful that i feel compelled to warn people . if i labor all my days and i can save but one soul from watching this movie , how great will be my joy . \n\n where to begin my discussion of pain . for xxunk , there was a musical montage every five minutes . there was no character development . every character was a stereotype . we had xxunk guy , fat guy who eats donuts , goofy foreign guy , etc . the script felt as if it were being written as the movie was being shot . the production value was so incredibly low that it felt like i was watching a junior high video presentation . have the directors , producers , etc . ever even seen a movie before ? xxunk is getting worse and worse with every new entry . the concept for this movie sounded so funny . how could you go wrong with gary coleman and a handful of somewhat legitimate actors . but trust me when i say this , things went wrong , xxup very xxup wrong .",negative
"name just says it all . i watched this movie with my dad when it came out and having served in xxunk he had great admiration for the man . the disappointing thing about this film is that it only concentrate on a short period of the man 's life - interestingly enough the man 's entire life would have made such an epic bio - xxunk that it is staggering to imagine the cost for production . \n\n some posters xxunk to the flawed xxunk about the man , which are cheap shots . the theme of the movie "" duty , honor , country "" are not just mere words xxunk from the lips of a high - xxunk officer - it is the deep xxunk of one man 's total devotion to his country . \n\n ironically xxunk being the liberal that he was xxunk a better understanding of the man . he does a great job showing the xxunk general xxunk with the xxunk side of the man .",negative
"this movie succeeds at being one of the most unique movies you 've seen . however this comes from the fact that you ca n't make heads or xxunk of this mess . it almost seems as a series of challenges set up to determine whether or not you are willing to walk out of the movie and give up the money you just paid . if you do n't want to feel xxunk you 'll sit through this horrible film and develop a real sense of pity for the actors involved , they 've all seen better days , but then you realize they actually got paid quite a bit of money to do this and you 'll lose pity for them just like you 've already done for the film . i ca n't go on enough about this horrible movie , its almost something that ed wood would have made and in that case it surely would have been his masterpiece . \n\n to start you are forced to sit through an opening dialogue the likes of which you 've never seen / heard , this thing has got to be five minutes long . on top of that it is narrated , as to suggest that you the viewer can not read . then we meet mr . xxunk and the xxunk of terrible lines gets xxunk , it is as if he is operating solely to get lines on to the movie poster tag line . soon we meet stephen xxunk , who i typically enjoy ) and he does his best not to drown in this but ultimately he does . then comes the ultimate insult , tara reid playing an intelligent role , oh help us ! tara reid is not a very talented actress and somehow she xxunk gets roles in movies , in my opinion though she should stick to movies of the american pie type . \n\n all in all you just may want to see this for yourself when it comes out on video , i know that i got a kick out of it , i mean lets all be honest here , sometimes its xxunk to xxunk in the shortcomings of others .",negative
"from the start , you know how this movie will end . it 's so full of clichés your typical xxup xxunk member will not even like this movie . i give it 2 out of 10 , only because of the acting of william xxunk . i ca n't believe people xxunk 6 + for this movie . it 's so biased towards a ' certain point of view ' ( once a thief ... ) . people are n't born bad . neither are they born good . they are born with a clean xxunk . it 's society , parents and education what makes them who they are . and if they take the wrong turn , somewhere down the line , it certainly is n't going to be the american xxunk system that gets them back on track ! anyway , avoid this movie like the xxunk . i bet you have better things to do with your time than waste it on this piece of crap . \n\n",negative
"there were a lot of truly great horror movies produced in the seventies - but this film certainly is n't one of them ! it 's a shame the child is n't better as it works from a decent idea that takes in a couple of sometimes successful horror themes . we have the idea of a xxunk child , which worked so well in classic films such as the bad xxunk and then we have the central zombie theme , which of course has been the xxunk of many a successful horror movie . the plot is basically this : young girl blames a load of people for the death of her mother , so she goes to the graveyard and raises the dead to get revenge ( as you do ) . this is all well and good , except for the fact that it 's boring ! nothing happens for most of the film , and although it does pick up at the end with some nice gore ; it 's not enough of a finale to justify sitting through the rest of it . the film was obviously shot on a budget as the locations look cheap and all the actors are rubbish . there 's really not much i can say about the film overall as there is n't much to it . the child is a xxunk seventies horror flick and i certainly do n't recommend it .",negative
"i was xxunk enough to meet george pal ( and still have my xxunk : xxup xxunk poster xxunk by him ) at a convention shortly after the release , and asked him why he chose to do the film "" camp "" . before he could answer , two studio flacks xxunk and xxunk me on how the studio "" knew best "" and how "" no one will take such a film seriously "" . i had been reading the xxunk xxunk for a couple of years thanks to a friend ( xxunk xxunk of the 1970s will recall xxunk and his band ? i was in a couple of years of that with him ) , and had higher hopes than what we got . \n\n the flacks xxunk that no high adventure would ever be done seriously , and so doing ' camp ' was the only way . several other fans jumped in on my side , with pal listening as best he could . at the end of the little event , pal came up to us and xxunk , xxunk he could have done more and better . \n\n xxup star xxup wars put the lie to the flacks , and a year after pal 's death , spielberg and lucas proved that doc savage could have easily been the next major movie franchise ... if it had n't been for the flacks . \n\n tear out the memory or history of doc , and the film would have been worth a 6 / 10 rating as nothing more than a mindless popcorn xxunk . \n\n but destroying the legacy like that was no less an xxunk than killing a baby in the xxunk . \n\n doc savage can still come to the screen , and survive the inevitable comparisons by the ill - informed to indiana jones , but it would have to be done in all xxunk and earnest to xxunk the glory that we should expect from the first american superhero . \n\n xxup xxunk : yes , there was a second script for xxup xxunk of xxup evil , and it 's a lot more serious . yes , there was xxunk footage shot , but mostly establishing shots and very little with actors . and , yes , there _ is _ a one - xxunk of ron xxunk xxunk over a xxunk wall and xxunk at something over his shoulder with a specially built bronze xxunk . xxunk 's wearing a xxunk over a button down white shirt with a bronze tie , and the words "" xxup doc xxup savage : xxup xxunk of xxup evil ... coming next summer ! "" xxup xxunk : if anyone knows who the studio flacks were that accompanied george pal in 1975 to san diego for the convention , xxunk the idiots up the side of the head and call them the idiots that they are . at the time , they were doing xxunk and fu xxunk in xxunk and xxunk xxunk pants , and carrying xxunk xxunk .",negative
"this film is the xxunk effort of xxunk xxunk and her new production company . while it suffers from a few problems , as every low budget production does , it is a good start for ms . xxunk and her company . \n\n the story is not terribly new having been done in films like the burning and every friday the xxunk since part 2 . but , the performances are xxunk . so many big budget movies just have the actors going through the xxunk , its always nice to see actors really trying to xxunk their craft . \n\n the story deals with the xxunk possible return ) of a xxunk xxunk . the others are xxunk to xxunk , but the xxunk of the event xxunk each person in different directions in their xxunk . ten years later , the friends are murdered one by one by a gruesome xxunk known as "" the xxunk "" . who will survive ? you have to watch . \n\n if you are roger ebert or any number of arrogant critics , you probably should n't bother . but if your taste run more towards joe bob xxunk and you want to see a group of people xxunk their craft , then check out "" the xxunk "" .",negative
