In [1]:
from pyspark.sql.types import StructType,StructField,IntegerType,StringType,DoubleType

In [2]:
from pyspark.ml.feature import StringIndexer
from pyspark.ml import Pipeline

In [3]:
spark = SparkSession.builder.appName("pysparkARM").getOrCreate()

In [None]:
schema = StructType([
        StructField('Rank', IntegerType(), True),
        StructField('Name', StringType(), True),
        StructField('Platform', StringType(), True),
        StructField('Year', IntegerType(), True),
    StructField('Genre', StringType(), True),
    StructField('Publisher', StringType(), True),
    StructField('NA_Sales', DoubleType(), True),
    StructField('EU_Sales', DoubleType(), True),
    StructField('JP_Sales', DoubleType(), True),
    StructField('Other_Sales', DoubleType(), True),
    StructField('Global_Sales', DoubleType(), True)
        ])

In [4]:
df = spark.read.csv('vgsales.csv',inferSchema=True,header='true')

In [None]:
indexers = [StringIndexer(inputCol=column, outputCol=column+"_index").fit(df) 
            for column in list(["Genre","Publisher","Platform"]) ]
pipeline = Pipeline(stages=indexers)
df = pipeline.fit(df).transform(df)

In [6]:
def ARMformat(a,b,tb):
    df.createOrReplaceTempView(tb)
    collect = []
    sel = df.select(a).distinct().rdd.map(lambda r: r[0]).collect()
    for x in sel:
        res = spark.sql("SELECT "+b+" FROM "+tb+" WHERE "+a+"='"+str(x)+"'")
        res=res.select(b).distinct().rdd.map(lambda r: r[0]).collect()
        resclean = []
        for i in res:
            if i!=None:
                resclean.append(i)
        resmin = resclean[0:10] #terpaksa biar cepat saja
        collect.append(tuple((str(x),list(resmin))))
    return collect

In [None]:
df.select('Genre').distinct().rdd.map(lambda r: r[0]).collect()

In [None]:
df.toPandas()['Genre'].unique()

In [7]:
df = ARMformat('Genre','Publisher','vgsales')
len(df)

12

In [None]:
print(df)

In [8]:
data = spark.createDataFrame(df,["id","items"])

In [None]:
print(data)

In [None]:
data.show()

In [9]:
from pyspark.ml.fpm import FPGrowth
fpGrowth = FPGrowth(itemsCol="items", minSupport=0.5, minConfidence=0.6, numPartitions=10)
model = fpGrowth.fit(data)

In [10]:
model.freqItemsets.show()

+--------------------+----+
|               items|freq|
+--------------------+----+
|        [Infogrames]|  11|
|           [Ubisoft]|  10|
|[Ubisoft, Infogra...|   9|
|               [3DO]|   9|
|      [3DO, Ubisoft]|   7|
|[3DO, Ubisoft, In...|   6|
|   [3DO, Infogrames]|   8|
|  [Storm City Games]|   8|
|[Storm City Games...|   6|
|[Storm City Games...|   7|
|[Storm City Games...|   6|
|[Storm City Games...|   7|
+--------------------+----+



In [11]:
model.associationRules.show()

+--------------------+------------------+------------------+------------------+
|          antecedent|        consequent|        confidence|              lift|
+--------------------+------------------+------------------+------------------+
|           [Ubisoft]|      [Infogrames]|               0.9|0.9818181818181819|
|           [Ubisoft]|             [3DO]|               0.7|0.9333333333333332|
|           [Ubisoft]|[Storm City Games]|               0.7|              1.05|
|      [3DO, Ubisoft]|      [Infogrames]|0.8571428571428571| 0.935064935064935|
|               [3DO]|         [Ubisoft]|0.7777777777777778|0.9333333333333333|
|               [3DO]|      [Infogrames]|0.8888888888888888|0.9696969696969697|
|               [3DO]|[Storm City Games]|0.6666666666666666|               1.0|
|[Storm City Games...|         [Ubisoft]|0.8571428571428571|1.0285714285714285|
|[Ubisoft, Infogra...|             [3DO]|0.6666666666666666|0.8888888888888888|
|[Ubisoft, Infogra...|[Storm City Games]

In [12]:
model.transform(data).show()

+------------+--------------------+--------------------+
|          id|               items|          prediction|
+------------+--------------------+--------------------+
|   Adventure|[Funbox Media, 3D...|           [Ubisoft]|
|      Sports|[Media Rings, 3DO...|                  []|
|      Racing|[Media Rings, bit...|               [3DO]|
|Role-Playing|[Media Rings, Ice...|[Ubisoft, Storm C...|
|     Shooter|[3DO, id Software...|  [Storm City Games]|
|        Misc|[Funbox Media, Te...|               [3DO]|
|    Platform|[Jack of All Game...|                  []|
|      Puzzle|[Telegames, 3DO, ...|        [Infogrames]|
|    Fighting|[3DO, Infogrames,...|  [Storm City Games]|
|      Action|[Funbox Media, Ti...|                  []|
|    Strategy|[Iceberg Interact...|                  []|
|  Simulation|[bitComposer Game...|[3DO, Storm City ...|
+------------+--------------------+--------------------+

