Skip to content
This repository has been archived by the owner on Nov 26, 2021. It is now read-only.

Commit

Permalink
Update SQL training to Flink 1.9.0
Browse files Browse the repository at this point in the history
* Update Docker training image
  * Move build code completely into Dockerfile. Remove build.sh
  * Upgrade Base image of SQL client image
  * Upgrade UDF dependencies to Flink 1.9.0
  * Add Java-based data producer
  * Remove Python dependencies and Kafka tools

* Update docker-compose.yml
* Upgrade Kafka image and Flink SQL Kafka connector
* Upgrade Zookeeper image
  • Loading branch information
fhueske committed Sep 12, 2019
1 parent 098bf1d commit 90cb56e
Show file tree
Hide file tree
Showing 77 changed files with 1,320 additions and 2,228 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
build-image/sql-udfs/target
**/target
**/dependency-reduced-pom.xml
**/.idea
62 changes: 43 additions & 19 deletions build-image/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,53 @@
# limitations under the License.
###############################################################################

FROM flink:1.7.2-scala_2.12
###############################################################################
# Build Click Count Job
###############################################################################

ADD VERSION .
FROM maven:3.6-jdk-8-slim AS builder

WORKDIR /opt/sql-client
# Get UDF code and compile it
COPY ./java/sql-training-udfs /opt/sql-udfs
RUN cd /opt/sql-udfs; \
mvn clean install

ENV SQL_CLIENT_HOME /opt/sql-client
# Get data producer code and compile it
COPY ./java/sql-training-data-producer /opt/data-producer
RUN cd /opt/data-producer; \
mvn clean install

###############################################################################
# Build SQL Playground Image
###############################################################################

FROM flink:1.9.0-scala_2.11

# Copy sql-client dependencies
ADD VERSION .

# Copy sql-client configuration
COPY sql-client/ /opt/sql-client
COPY kafka-client /opt/kafka-client

# Copy playground UDFs
COPY --from=builder /opt/sql-udfs/target/sql-training-udfs-*.jar /opt/sql-client/lib/

# Copy data producer
COPY --from=builder /opt/data-producer/target/sql-training-data-producer-*.jar /opt/data/data-producer.jar

# Copy configuration
COPY conf/* /opt/flink/conf/

# Download connector libraries
RUN wget -P /opt/sql-client/lib/ http://central.maven.org/maven2/org/apache/flink/flink-json/${FLINK_VERSION}/flink-json-${FLINK_VERSION}.jar; \
wget -P /opt/sql-client/lib/ http://central.maven.org/maven2/org/apache/flink/flink-sql-connector-kafka_2.11/${FLINK_VERSION}/flink-sql-connector-kafka_2.11-${FLINK_VERSION}.jar; \
wget -P /opt/sql-client/lib/ http://central.maven.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch6_2.11/${FLINK_VERSION}/flink-sql-connector-elasticsearch6_2.11-${FLINK_VERSION}.jar; \
# Create data folders
RUN mkdir -p /opt/data
RUN mkdir -p /opt/data/kafka
RUN mkdir -p /opt/data/batch

RUN apt-get update; apt-get -y install python2.7 && \
chmod +x /opt/sql-client/sql-client.sh && \
wget -P /opt/sql-client/lib/ http://central.maven.org/maven2/org/apache/flink/flink-json/${FLINK_VERSION}/flink-json-${FLINK_VERSION}.jar && \
wget -P /opt/sql-client/lib/ http://central.maven.org/maven2/org/apache/flink/flink-connector-kafka-0.11_2.12/${FLINK_VERSION}/flink-connector-kafka-0.11_2.12-${FLINK_VERSION}-sql-jar.jar && \
wget -P /opt/sql-client/lib/ http://central.maven.org/maven2/org/apache/flink/flink-connector-elasticsearch6_2.12/${FLINK_VERSION}/flink-connector-elasticsearch6_2.12-${FLINK_VERSION}-sql-jar.jar

COPY docker-entrypoint.sh /
# Configure container
ENTRYPOINT ["/docker-entrypoint.sh"]
mkdir -p /opt/data; \
mkdir -p /opt/data/stream; \
# Download data files
wget -O /opt/data/driverChanges.txt.gz 'https://drive.google.com/uc?export=download&id=1pf4tfv-YpoVQ9_O0948M8oXeCfVH-0MH'; \
wget -O /opt/data/fares.txt.gz 'https://drive.google.com/uc?export=download&id=1SriiwcIdMvY7uJsWSY4Hhh32iO3F4ND2'; \
wget -O /opt/data/rides.txt.gz 'https://drive.google.com/uc?export=download&id=1gY8W07OFvB7_4lHlAyingM4WQzs0_8lT';

WORKDIR /opt/sql-client
ENV SQL_CLIENT_HOME /opt/sql-client
14 changes: 0 additions & 14 deletions build-image/README.md

This file was deleted.

48 changes: 0 additions & 48 deletions build-image/build.sh

This file was deleted.

17 changes: 17 additions & 0 deletions build-image/conf/flink-conf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
################################################################################
# Copyright 2019 Ververica GmbH
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

jobmanager.rpc.address: jobmanager
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Copyright 2019 Ververica GmbH
#
# http://www.apache.org/licenses/LICENSE-2.0
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

Expand Down Expand Up @@ -56,14 +54,14 @@ tables:
connector:
property-version: 1
type: kafka
version: 0.11
version: universal
topic: Rides
startup-mode: earliest-offset
properties:
- key: zookeeper.connect
value: ${ZOOKEEPER}:2181
value: zookeeper:2181
- key: bootstrap.servers
value: ${KAFKA}:9092
value: kafka:9092
- key: group.id
value: testGroup
format:
Expand Down Expand Up @@ -96,14 +94,14 @@ tables:
connector:
property-version: 1
type: kafka
version: 0.11
version: universal
topic: Fares
startup-mode: earliest-offset
properties:
- key: zookeeper.connect
value: ${ZOOKEEPER}:2181
value: zookeeper:2181
- key: bootstrap.servers
value: ${KAFKA}:9092
value: kafka:9092
- key: group.id
value: testGroup
format:
Expand All @@ -130,14 +128,14 @@ tables:
connector:
property-version: 1
type: kafka
version: 0.11
version: universal
topic: DriverChanges
startup-mode: earliest-offset
properties:
- key: zookeeper.connect
value: ${ZOOKEEPER}:2181
value: zookeeper:2181
- key: bootstrap.servers
value: ${KAFKA}:9092
value: kafka:9092
- key: group.id
value: testGroup
format:
Expand All @@ -163,7 +161,7 @@ tables:
connector:
property-version: 1
type: kafka
version: 0.11
version: universal
topic: TenMinPsgCnts
startup-mode: earliest-offset
properties:
Expand Down Expand Up @@ -203,16 +201,16 @@ tables:
functions:
- name: timeDiff
from: class
class: com.dataartisans.udfs.TimeDiff
class: com.ververica.sql_training.udfs.TimeDiff
- name: isInNYC
from: class
class: com.dataartisans.udfs.IsInNYC
class: com.ververica.sql_training.udfs.IsInNYC
- name: toAreaId
from: class
class: com.dataartisans.udfs.ToAreaId
class: com.ververica.sql_training.udfs.ToAreaId
- name: toCoords
from: class
class: com.dataartisans.udfs.ToCoords
class: com.ververica.sql_training.udfs.ToCoords

#==============================================================================
# Execution properties
Expand Down
16 changes: 0 additions & 16 deletions build-image/docker-entrypoint.sh

This file was deleted.

Loading

0 comments on commit 90cb56e

Please sign in to comment.