#### Install the Google Video Intelligence package

In [1]:
!pip install --upgrade google-cloud-videointelligence

Collecting google-cloud-videointelligence
  Using cached https://files.pythonhosted.org/packages/78/ca/092cf1f664de8255bee5aca6f35306763fbe2ffe67558f58c4445d5692b7/google_cloud_videointelligence-1.3.0-py2.py3-none-any.whl
Collecting google-api-core[grpc]<2.0.0dev,>=0.1.0 (from google-cloud-videointelligence)
  Using cached https://files.pythonhosted.org/packages/ab/d0/8393a844c1a4cf9f74908ca1b1a137665519157643e15e112a4a21b71d56/google_api_core-1.3.0-py2.py3-none-any.whl
Collecting pytz (from google-api-core[grpc]<2.0.0dev,>=0.1.0->google-cloud-videointelligence)
  Using cached https://files.pythonhosted.org/packages/30/4e/27c34b62430286c6d59177a0842ed90dc789ce5d1ed740887653b898779a/pytz-2018.5-py2.py3-none-any.whl
Collecting requests<3.0.0dev,>=2.18.0 (from google-api-core[grpc]<2.0.0dev,>=0.1.0->google-cloud-videointelligence)
  Using cached https://files.pythonhosted.org/packages/65/47/7e02164a2a3db50ed6d8a6ab1d6d60b69c4c3fdf57a284257925dfc12bda/requests-2.19.1-py2.py3-none-any.whl
C

In [1]:
from google.cloud import videointelligence

#### Initialize the VideoIntelligenceServiceClient

In [2]:
video_client = videointelligence.VideoIntelligenceServiceClient() 

#### Set the features which need to be extracted from a video
Here, we perform a LABEL_DETECTION. We can also do a FACE_DETECTION or EXPLICIT_CONTENT_DETECTION and a few other tasks

In [3]:
features = [videointelligence.enums.Feature.LABEL_DETECTION]

#### Set the video file which will be scanned
We have created a video montage from files downloaded from the site video.pexels.com. The individual clips are: <br />
* https://videos.pexels.com/videos/aerial-shot-of-city-854336
* https://videos.pexels.com/videos/man-texting-on-the-street-855574
* https://videos.pexels.com/videos/video-of-flower-blooming-855183
* https://videos.pexels.com/videos/dog-eating-854132

In [4]:
input_uri = 'gs://cloud-ml-api/video_montage.mp4'

#### Define the asynchronous operation which will be performed on the video
The annotate_video() function will perform a label detection on the video

In [5]:
operation = video_client.annotate_video(input_uri, features=features)

#### Gather the results in the results object
Since the operation will run asynchronously, we define a results object to gather the annotations. We set a timeout to prevent the operation from running too long

In [6]:
result = operation.result(timeout=180)

#### Retrieve the annotations from the result
The annotations are available as shot labels (for each shot in the video)

In [7]:
video_labels = result.annotation_results[0].segment_label_annotations

#### View the labels
We iterate over the annotations and display the following: <br />
* The entity description for the video segment (video_label.entity.description.entity.description)
* The category labels associated with the entity for that video segment (e.g. an entity "golden retriever" will have a category "dog")
* Specific details about each segment in the video:
 * its start and end times 
 * the confidence the API has in the details for the segment

In [8]:
for i, video_label in enumerate(video_labels):
  
    print('Video label description: {}'.format(
        video_label.entity.description))
        
    for category_entity in video_label.category_entities:
        print('\tLabel category description: {}'.format(
            category_entity.description))

    for i, segment in enumerate(video_label.segments):
      
        start_time = (segment.segment.start_time_offset.seconds +
                      segment.segment.start_time_offset.nanos / 1e9)
        
        end_time = (segment.segment.end_time_offset.seconds +
                    segment.segment.end_time_offset.nanos / 1e9)
        
        positions = '{}s to {}s'.format(start_time, end_time)
        
        confidence = segment.confidence
        
        print('\tVideo {}: {}'.format(i, positions))
        print('\tConfidence: {}'.format(confidence))
        
    print('='*50, '\n')

Video label description: flowering plant
	Label category description: plant
	Video 0: 0.0s to 42.208833s
	Confidence: 0.7932839393615723

Video label description: urban area
	Label category description: city
	Video 0: 0.0s to 42.208833s
	Confidence: 0.4772138297557831

Video label description: petal
	Video 0: 0.0s to 42.208833s
	Confidence: 0.6524234414100647

Video label description: dog
	Label category description: pet
	Video 0: 0.0s to 42.208833s
	Confidence: 0.8028566241264343

Video label description: pet
	Label category description: animal
	Video 0: 0.0s to 42.208833s
	Confidence: 0.630623996257782

Video label description: garden roses
	Label category description: rose
	Video 0: 0.0s to 42.208833s
	Confidence: 0.6506539583206177

Video label description: puppy
	Label category description: dog
	Video 0: 0.0s to 42.208833s
	Confidence: 0.5478790998458862

Video label description: retriever
	Label category description: dog
	Video 0: 0.0s to 42.208833s
	Confidence: 0.878660142421722

#### Analyzing for explicit content
We modify the features to look for in the video. Here, we scan for explicit content

In [9]:
features = [videointelligence.enums.Feature.EXPLICIT_CONTENT_DETECTION]

In [10]:
operation = video_client.annotate_video(input_uri, features=features)

result = operation.result(timeout=180)

#### View the results of the operation
The results appear in the form of frames and contain details about that frame which includes: <br />
* The start of that frame in terms of seconds and nanoseconds from the beginning
* The likelihood of explicit content in that frame

In [20]:
for frame in result.annotation_results[0].explicit_annotation.frames:
  
  print('\n', frame)
  
  frame_time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
  
  print('Time: {}s'.format(frame_time))
  
  print('\n', '='*50)


 time_offset {
  nanos: 645533000
}
pornography_likelihood: VERY_UNLIKELY

Time: 0.645533s


 time_offset {
  seconds: 1
  nanos: 479450000
}
pornography_likelihood: VERY_UNLIKELY

Time: 1.47945s


 time_offset {
  seconds: 2
  nanos: 299484000
}
pornography_likelihood: VERY_UNLIKELY

Time: 2.299484s


 time_offset {
  seconds: 3
  nanos: 270922000
}
pornography_likelihood: VERY_UNLIKELY

Time: 3.270922s


 time_offset {
  seconds: 4
  nanos: 285030000
}
pornography_likelihood: VERY_UNLIKELY

Time: 4.28503s


 time_offset {
  seconds: 5
  nanos: 227728000
}
pornography_likelihood: VERY_UNLIKELY

Time: 5.227728s


 time_offset {
  seconds: 6
  nanos: 270038000
}
pornography_likelihood: VERY_UNLIKELY

Time: 6.270038s


 time_offset {
  seconds: 7
  nanos: 420623000
}
pornography_likelihood: VERY_UNLIKELY

Time: 7.420623s


 time_offset {
  seconds: 8
  nanos: 511086000
}
pornography_likelihood: VERY_UNLIKELY

Time: 8.511086s


 time_offset {
  seconds: 9
  nanos: 324985000
}
pornography