In [53]:
%%writefile static/index.html
<!doctype html>
<html>
    <head>
        <meta charset="utf-8">
        <title>KoGPT2novel</title>
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/css/materialize.min.css">
        <style>
        .card{
            animation: slide-up 0.4s ease;
        }
        @keyframes slide-up {
            0% {
                opacity: 0;
                transform: translateY(100px);
            }
            100% {
                opacity: 1;
                transform: translateY(0);
            }
        }
        #loading{
            display:none;
        }
        #output{
            font-size: 20px;
            display:none;
        }
        #output::after{
            content: "|";
            animation: blink .75s step-end infinite;
        }
        @keyframes blink {
            from, to { color: transparent }
            50% { color: DeepSkyBlue }
        }
        </style>
    </head>
    <body>
        <nav>
            <div class="nav-wrapper blue">
                <a href="#" class="brand-logo center">KoGPT2novel</a>
            </div>
        </nav>
        <div class="container">
            <div class="row center">
                <div class="col s12 m9 offset-m1 l8 offset-l2">
                    <div class="card ">
                        <div class="card-content">
                            <div class="row center ">
                              <div class="col s10 offset-s1">
                                
                                <div class="row">
                                    <div class="input-field col s12">
                                      <input type="text" id="textInput">
                                      <label for="textInput">Write Text</label>
                                    </div>
                                </div>
                                
                                
                                <div class="row">
                                    <a class="waves-effect waves-light btn-small blue" id="generate">Generate</a>                            
                                </div>
                                

                                <div class="row">
                                    <div id="loading">
                                        <div class="preloader-wrapper big active">
                                          <div class="spinner-layer spinner-blue-only">
                                            <div class="circle-clipper left">
                                              <div class="circle"></div>
                                            </div><div class="gap-patch">
                                              <div class="circle"></div>
                                            </div><div class="circle-clipper right">
                                              <div class="circle"></div>
                                            </div>
                                          </div>
                                        </div>
                                    </div>                           
                                    <p class="header col s12 light" id="output"></p> 
                                </div>

                              </div>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>


        <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
        <script src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/js/materialize.min.js"></script>
        <script src="https://unpkg.com/axios/dist/axios.min.js"></script>
        <script>

        let processing=false;
        let intervalId=null;
        
        //detect enter key
        $('#textInput').keypress(e => {
          if (e.which == 13) {
              processText()
          }
        });
        $("#generate").click(processText);
        
        async function processText() {
            if (processing==true) return 
            processing=true
        
            let inputText= $("#textInput").val();
            let result=""
            $('#output').hide();
            $('#loading').show();
            if(inputText.length>1){
                try {
                    const response = await axios.post('/predict',{"text": inputText});
                    result=response.data["result"];
                } catch (error) {
                    console.error(error);
                    result=error.stack;
                    if (error.response.status==504){
                        result="Server is busy"
                    }
                }                
            }else{
                result="Input is too short"
            }

           $('#loading').hide();           
           $('#output').show(); 
           doTypingEffect(result);
           processing=false;
        };
        
        function doTypingEffect(str){
            clearInterval(intervalId);
            let i=0;
            $("#output").text("");
            intervalId=setInterval(function (){
                if(str.length<=i) clearInterval(intervalId);
                $("#output").append(str[i++])
            }, 50);
        }
        </script>
    </body>
</html>

Overwriting static/index.html


In [54]:
%%writefile BentomlService.py

import bentoml
import json
from bentoml.adapters import JsonInput
from bentoml.frameworks.transformers import TransformersModelArtifact
from bentoml.types import JsonSerializable, InferenceError, InferenceResult



@bentoml.env(pip_packages=[
    "torch==1.7.1",
    "transformers==4.10.2"
])
@bentoml.artifacts([TransformersModelArtifact("model")])
@bentoml.web_static_content('./static')
class TransformerService(bentoml.BentoService):
    @bentoml.api(input=JsonInput(), batch=False)
    def predict(self, parsed_json: JsonSerializable):
        text = parsed_json.get("text")    
        model = self.artifacts.model.get("model")
        tokenizer = self.artifacts.model.get("tokenizer")
    
        #model process
        inputs = tokenizer.encode(text, return_tensors="pt", max_length=128, truncation=True)
        output = model.generate(inputs, 
                                max_length=128, 
                                repetition_penalty=2.0,
                                use_cache=True,
                               )
        output = tokenizer.decode(output[0], skip_special_tokens=True)
        
        #json
        json_out = json.dumps({
            "result": output
        })
        return InferenceResult(
            data=json_out,
            http_status=200,
            http_headers={"Content-Type": "application/json"},
        )

Overwriting BentomlService.py


In [56]:
from transformers import GPT2LMHeadModel, PreTrainedTokenizerFast
from BentomlService import TransformerService

    
model_name = "ttop324/kogpt2novel"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = PreTrainedTokenizerFast.from_pretrained(model_name,
  bos_token='</s>', eos_token='</s>', unk_token='<unk>',
  pad_token='<pad>', mask_token='<mask>') 

Downloading:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/513M [00:00<?, ?B/s]

In [57]:
service = TransformerService()
service.pack("model", {
    "model": model,
    "tokenizer": tokenizer
})
service.save()

[2021-09-19 17:12:11,037] INFO - BentoService bundle 'TransformerService:20210919171206_228F50' saved to: /home/user/bentoml/repository/TransformerService/20210919171206_228F50


'/home/user/bentoml/repository/TransformerService/20210919171206_228F50'

In [58]:
#build docker
!bentoml containerize TransformerService:latest -t ttop324/transformer-service:latest
#upload docker
!docker push ttop324/transformer-service:latest     


[2021-09-19 17:12:15,981] INFO - Getting latest version TransformerService:20210919171206_228F50
[39mFound Bento: /home/user/bentoml/repository/TransformerService/20210919171206_228F50[0m
Containerizing TransformerService:20210919171206_228F50 with local YataiService and docker daemon from local environment|[32m
Build container image: ttop324/transformer-service:latest[0m
The push refers to repository [docker.io/ttop324/transformer-service]

[1B66859589: Preparing 
[1B1e372a5e: Preparing 
[1B39ea78d6: Preparing 
[1B4b8d575f: Preparing 
[1B19e4f924: Preparing 
[1Baadfe5cc: Preparing 
[1B61d7a0be: Preparing 
[1B93cf5172: Preparing 
[1B500408ed: Preparing 
[1B59c9d32c: Preparing 
[1B5605a904: Preparing 
[1Bc8cc8a1b: Preparing 
[1B7e2bf845: Preparing 
[1B404288d7: Preparing 
[1B8beeb7d5: Preparing 
[1B2b510000: Preparing 
[1Ba5b53a93: Preparing 
[1B08ab7cf3: Preparing 
[1B5b992fc1: Preparing 
[1B8986f350: Preparing 
[1Bab4c463e: Preparing 
[21Be372a5e: Pushed   51

In [16]:
#test docker on local
!docker run -p 23776:5000 ttop324/transformer-service:latest --workers=1 --enable-microbatch

"""
curl -i \
  --header "Content-Type: application/json" \
  --request POST \
  --data '{"text": "전자발찌(위치추적 전자장치) 훼손 전후 여성 2명을 살해한 "}' \
  http://localhost:23776/predict       
"""


[2021-09-19 04:09:46,420] INFO - Starting BentoML proxy in production mode..
[2021-09-19 04:09:46,420] INFO - Starting BentoML API server in production mode..
[2021-09-19 04:09:46,550] INFO - Running micro batch service on :5000
[2021-09-19 04:09:46 +0000] [25] [INFO] Starting gunicorn 20.1.0
[2021-09-19 04:09:46 +0000] [25] [INFO] Listening at: http://0.0.0.0:55795 (25)
[2021-09-19 04:09:46 +0000] [25] [INFO] Using worker: sync
[2021-09-19 04:09:46 +0000] [26] [INFO] Booting worker with pid: 26
[2021-09-19 04:09:46 +0000] [1] [INFO] Starting gunicorn 20.1.0
[2021-09-19 04:09:46 +0000] [1] [INFO] Listening at: http://0.0.0.0:5000 (1)
[2021-09-19 04:09:46 +0000] [1] [INFO] Using worker: aiohttp.worker.GunicornWebWorker
[2021-09-19 04:09:46 +0000] [27] [INFO] Booting worker with pid: 27
[2021-09-19 04:09:46,666] INFO - Your system nofile limit is 1048576, which means each instance of microbatch service is able to hold this number of connections at same time. You can increase the number o

'\ncurl -i   --header "Content-Type: application/json"   --request POST   --data \'{"text": "위치추적 전자장치(전자발찌) 훼손 "}\'   http://localhost:23776/predict       \n'

In [None]:
#kubernautic.com yaml
#pod.yaml
apiVersion: v1
kind: Pod
metadata:
  name: kogpt2novel
  labels:
    app: kogpt2novel
spec:
  containers:
  - name: kogpt2novel
    image: ttop324/transformer-service:latest
    ports:
    - containerPort: 5000
    resources:
      limits:
        cpu: "1"
        memory: 1Gi
#service.yaml
apiVersion: v1        
kind: Service
metadata:
  name: kogpt2novelservice
spec:
  selector:
    app: kogpt2novel
  ports:
  - protocol: TCP
    port: 10000
    targetPort: 5000


        
#cloud.okteto.com yaml============================
services:
  web:
    image: ttop324/transformer-service:latest
    ports:
      - 8080:5000
    replicas: 5
