In [102]:
import pyspark
from pyspark import SQLContext
from pyspark.sql.types import StructType, StructField, IntegerType, FloatType, StringType
from pyspark.sql.functions import udf
from pyspark.sql import Row


conf = pyspark.SparkConf() 

 
sc = pyspark.SparkContext.getOrCreate(conf=conf)
spark = SQLContext(sc)

schema = StructType([
    StructField("sales", FloatType(),True),    
    StructField("employee", StringType(),True),
    StructField("ID", IntegerType(),True)
])

data = [[ 10.2, "Fred",123]]

df = spark.createDataFrame(data,schema=schema)

In [103]:
colsInt = udf(lambda z: toInt(z), IntegerType())
spark.udf.register("colsInt", colsInt)

def toInt(s):
    if isinstance(s, str) == True:
        st = [str(ord(i)) for i in s]
        return(int(''.join(st)))
    else:
         return Null

In [104]:
df2 = df.withColumn( 'semployee',colsInt('employee'))


In [105]:
df2.show()

+-----+--------+---+----------+
|sales|employee| ID| semployee|
+-----+--------+---+----------+
| 10.2|    Fred|123|1394624364|
+-----+--------+---+----------+



In [106]:
spark.registerDataFrameAsTable(df, "dftab")

In [107]:
df3 = spark.sql("select sales, employee, ID, colsInt(employee) as iemployee from dftab")

In [108]:
df3.show()

+-----+--------+---+----------+
|sales|employee| ID| iemployee|
+-----+--------+---+----------+
| 10.2|    Fred|123|1394624364|
+-----+--------+---+----------+



In [109]:
rdd = df.rdd

In [110]:
def toIntEmployee(rdd):
    s = rdd["employee"]
    if isinstance(s, str) == True:
        st = [str(ord(i)) for i in s]
        e = int(''.join(st)) 
    else:
        e = s
    
    return Row(rdd["sales"],rdd["employee"],rdd["ID"],e)
    

In [111]:
rdd = df.rdd.map(toIntEmployee) 

In [112]:
for x in rdd.collect():
    print(x)

<Row(10.199999809265137, 'Fred', 123, 70114101100)>
