-
-
Notifications
You must be signed in to change notification settings - Fork 362
/
Copy pathpd-melt.py
131 lines (104 loc) · 4.47 KB
/
pd-melt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
"""
#Understanding Pandas Melt - pd.melt()
* Tutorial: https://news.towardsai.net/pdm
* Github: https://github.com/towardsai/tutorials/tree/master/pandas/pd-melt.py
"""
#Import Required Libraries:
import pandas as pd
#Raw data in form of dictionary:
data = {"Person":["Alan","Berta","Charlie","Danielle"], #Name of Person
"House":["A","B","A","C"], #Name of houses they live in
"Age":[32,46,35,28], #Age of Person
"Books":[100,30,20,40], #Number of books owned
"Movies":[10,20,80,60] #Number of movie watched
}
#Converting the raw data into pandas DataFrame:
data_wide = pd.DataFrame(data)
#Printing the pandas DataFrame:
data_wide
#Melting the DataFrame from wide to long format:
#Without specifying any parameters:
data_wide.melt()
#Melting the DataFrame from wide to long format:
#id_vars
data_wide.melt(id_vars=["Person","House"]) #Identifier columns
#Melting the DataFrame from wide to long format:
#id_vars
#value_vars
data_wide.melt(id_vars=["Person","House"], #Identifier columns
value_vars=["Age","Books","Movies"]) #Columns to be melted
#Melting the DataFrame from wide to long format:
#id_vars
#value_vars
data_wide.melt(id_vars=["Person"], #Identifier columns
value_vars=["Books","Movies"]) #Columns to be melted
#Melting the DataFrame from wide to long format:
#id_vars
#value_vars
#var_name
#value_name
data_wide.melt(id_vars=["Person","House"], #Identifier columns
value_vars=["Age","Books","Movies"], #Columns to be melted
var_name="Info", #Renaming the variable column name
value_name="Numerical") #Renaming the value column name
#Melting the DataFrame from wide to long format:
#id_vars
#value_vars
#var_name
#value_name
data_wide.melt(id_vars=["Person"], #Identifier columns
value_vars=["Books","Movies"], #Columns to be melted
var_name="Info", #Renaming the variable column name
value_name="Numerical") #Renaming the value column name
#Melting the DataFrame from wide to long format:
#id_vars
#value_vars
#var_name
#value_name
#ignore_index
data_wide.melt(id_vars=["Person","House"], #Identifier columns
value_vars=["Age","Books","Movies"], #Columns to be melted
var_name="Info", #Renaming the variable column name
value_name="Numerical", #Renaming the value column name
ignore_index=False) #Using the original index
#Creating multiple indexes for columns:
data_wide.columns = [["Person","House","Age","Books","Movies"],
["Name","Flat","Old","Text","Video"]]
#Printing the DataFrame:
data_wide
#Melting the DataFrame from wide to long format:
#id_vars
#value_vars
#var_name
#value_name
#col_level
data_wide.melt(id_vars=["Person","House"], #Identifier columns
value_vars=["Age","Books","Movies"], #Columns to be melted
var_name="Info", #Renaming the variable column name
value_name="Numerical", #Renaming the value column name
col_level=0) #Using the 0th column level index
#Melting the DataFrame from wide to long format:
#id_vars
#value_vars
#var_name
#value_name
#col_level
data_wide.melt(id_vars=["Name","Flat"], #Identifier columns
value_vars=["Old","Text","Video"], #Columns to be melted
var_name="Info", #Renaming the variable column name
value_name="Numerical", #Renaming the value column name
col_level=1) #Using the 1st column level index
#Melting the DataFrame from wide to long format:
#id_vars
#value_vars
#var_name
#value_name
#col_level
#ignore_index
data_wide.melt(id_vars=["Name","Flat"], #Identifier columns
value_vars=["Old","Text","Video"], #Columns to be melted
var_name="Info", #Renaming the variable column name
value_name="Numerical", #Renaming the value column name
ignore_index=False, #Using the original index
col_level=1) #Using the 1st column level index