In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.22.2-py3-none-any.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 15.2 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 41.8 MB/s 
Collecting huggingface-hub<1.0,>=0.9.0
  Downloading huggingface_hub-0.10.0-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 77.5 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.0 tokenizers-0.12.1 transformers-4.22.2


In [None]:
from transformers import pipeline # pipeline provides pre-trained model

In [None]:
# refer to the following article
# https://note.com/npaka/n/n5bb043191cc9

# text positive/negative classification
nlp = pipeline("sentiment-analysis") # you can use: pipeline('sentiment-analysis', model='nlptown/bert-base-multilingual-uncased-sentiment')

print(nlp("I like an apple!"))
print(nlp("I hate an apple!"))

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

[{'label': 'POSITIVE', 'score': 0.9997887015342712}]
[{'label': 'NEGATIVE', 'score': 0.9996813535690308}]


In [None]:
# question and answer
nlp = pipeline("question-answering")

context = 'I thought I eat curry rice initially yesterday, but I ate hamburgers finally. What should I eat tomorrow... Oh! Let\'s have ramen tomorrow.'

print(nlp(question="What did he eat yesterday?", context=context))
print(nlp(question="What he will eat tomorrow?", context=context))

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


{'score': 0.9226830005645752, 'start': 16, 'end': 26, 'answer': 'curry rice'}
{'score': 0.6353263854980469, 'start': 123, 'end': 128, 'answer': 'ramen'}


In [None]:
# fill masked text
nlp = pipeline("fill-mask")

display(nlp(f'Python is the very convenient programming language that makes {nlp.tokenizer.mask_token} models'))

No model was supplied, defaulted to distilroberta-base and revision ec58a5b (https://huggingface.co/distilroberta-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'score': 0.14385366439819336,
  'token': 30412,
  'token_str': ' mathematical',
  'sequence': 'Python is the very convenient programming language that makes mathematical models'},
 {'score': 0.07993928343057632,
  'token': 2632,
  'token_str': ' complex',
  'sequence': 'Python is the very convenient programming language that makes complex models'},
 {'score': 0.050539638847112656,
  'token': 31909,
  'token_str': ' scalable',
  'sequence': 'Python is the very convenient programming language that makes scalable models'},
 {'score': 0.03419441729784012,
  'token': 27930,
  'token_str': ' predictive',
  'sequence': 'Python is the very convenient programming language that makes predictive models'},
 {'score': 0.018205706030130386,
  'token': 10813,
  'token_str': ' interactive',
  'sequence': 'Python is the very convenient programming language that makes interactive models'}]

In [None]:
# generate text
text_generator = pipeline("text-generation")

display(text_generator("The deep learning revolution started around 2010.", max_length=100))

No model was supplied, defaulted to gpt2 and revision 6c0e608 (https://huggingface.co/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'The deep learning revolution started around 2010. We are now able to scale and grow such sophisticated methods using traditional computer science techniques. The biggest lesson from 2010 is that AI is a highly complex and often difficult problem. However, when it comes to deep learning, we will see this from two very different perspectives: that of our past progress, and as part of the AI system designed by Deep Learning to solve problems in every field of the future.\n\nA number of problems that we will have to deal'}]

In [None]:
# extract entity
nlp = pipeline("ner")

display(nlp("TESLA Inc. was established in 2003 by Martin Eberhard and Marc Tarpenning in the America."))

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'entity': 'I-ORG',
  'score': 0.9990362,
  'index': 1,
  'word': 'T',
  'start': 0,
  'end': 1},
 {'entity': 'I-ORG',
  'score': 0.9989913,
  'index': 2,
  'word': '##ES',
  'start': 1,
  'end': 3},
 {'entity': 'I-ORG',
  'score': 0.9988979,
  'index': 3,
  'word': '##LA',
  'start': 3,
  'end': 5},
 {'entity': 'I-ORG',
  'score': 0.9993414,
  'index': 4,
  'word': 'Inc',
  'start': 6,
  'end': 9},
 {'entity': 'I-PER',
  'score': 0.99963415,
  'index': 11,
  'word': 'Martin',
  'start': 38,
  'end': 44},
 {'entity': 'I-PER',
  'score': 0.9997868,
  'index': 12,
  'word': 'E',
  'start': 45,
  'end': 46},
 {'entity': 'I-PER',
  'score': 0.99601907,
  'index': 13,
  'word': '##ber',
  'start': 46,
  'end': 49},
 {'entity': 'I-PER',
  'score': 0.9994281,
  'index': 14,
  'word': '##hard',
  'start': 49,
  'end': 53},
 {'entity': 'I-PER',
  'score': 0.9995648,
  'index': 16,
  'word': 'Marc',
  'start': 58,
  'end': 62},
 {'entity': 'I-PER',
  'score': 0.9996847,
  'index': 17,
  'word':

In [None]:
# summarize text
summarizer = pipeline("summarization")

# text from wikipedia
text = """ Toyota Motor Corporation is a Japanese multinational automotive manufacturer headquartered in Toyota City, Aichi, Japan. 
It was founded by Kiichiro Toyoda and incorporated on August 28, 1937. 
Toyota is one of the largest automobile manufacturers in the world, producing about 10 million vehicles per year. 
"""

display(summarizer(text, max_length=30, min_length=10))

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'summary_text': ' Toyota Motor Corporation is a Japanese multinational automotive manufacturer . It was founded by Kiichiro Toyoda and incorporated on August 28, 1937 .'}]

In [None]:
# translation
translator = pipeline("translation_en_to_de")

print(translator("California is a place where the weather is always sunny.", max_length=30))

No model was supplied, defaulted to t5-base and revision 686f1db (https://huggingface.co/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/892M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


[{'translation_text': 'Kalifornien ist ein Ort, wo das Wetter immer sonnig ist.'}]
