<a href="https://colab.research.google.com/github/venkatesh1226/CMPE255-DataMining/blob/main/eda_d3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
from string import Template
import IPython.display as dp
import pandas as pd


In [15]:
df_billionaires = pd.read_csv("/content/drive/MyDrive/Billionaires Statistics Dataset.csv")


In [19]:
average_networth_by_country = df_billionaires.groupby('country')['finalWorth'].mean().reset_index()


In [20]:
s = Template(
    r"""
<style>
    /* Add your styles here */
    #$id {
        text-align: center;
    }
</style>

<div id="$id"></div>

<script type="module">
    import * as d3 from 'https://cdn.skypack.dev/d3';

    // Parse the data
    const data = d3.csvParse(`$df`, d3.autoType);

    // D3.js code for the bar chart goes here
    // ...

</script>
    """
)


In [22]:
s = Template(
    r"""
<style>
    /* Add your styles here */
    #$id {
        text-align: center;
    }
    .bar {
        fill: steelblue;
    }
</style>

<div id="bar_chart_id"></div>

<script type="module">
    import * as d3 from 'https://cdn.skypack.dev/d3';

    const data = d3.csvParse(`$df`, d3.autoType);

    const margin = { top: 20, right: 20, bottom: 70, left: 40 },
          width = 960 - margin.left - margin.right,
          height = 500 - margin.top - margin.bottom;

    const x = d3.scaleBand().rangeRound([0, width], .05).padding(0.1);
    const y = d3.scaleLinear().range([height, 0]);

    const xAxis = d3.axisBottom(x);
    const yAxis = d3.axisLeft(y).ticks(10);

    const svg = d3.select("#$id")
                  .append("svg")
                  .attr("width", width + margin.left + margin.right)
                  .attr("height", height + margin.top + margin.bottom)
                  .append("g")
                  .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

    x.domain(data.map(d => d.country));
    y.domain([0, d3.max(data, d => d.finalWorth)]);

    svg.append("g")
       .attr("class", "x axis")
       .attr("transform", "translate(0," + height + ")")
       .call(xAxis)
       .selectAll("text")
       .style("text-anchor", "end")
       .attr("dx", "-.8em")
       .attr("dy", "-.55em")
       .attr("transform", "rotate(-90)" );

    svg.append("g")
       .attr("class", "y axis")
       .call(yAxis)
       .append("text")
       .attr("transform", "rotate(-90)")
       .attr("y", 6)
       .attr("dy", ".71em")
       .style("text-anchor", "end")
       .text("Net Worth (in billions)");

    svg.selectAll("bar")
       .data(data)
       .enter().append("rect")
       .style("fill", "steelblue")
       .attr("x", d => x(d.country))
       .attr("width", x.bandwidth())
       .attr("y", d => y(d.finalWorth))
       .attr("height", d => height - y(d.finalWorth));

</script>
    """
)

# Render the visualization
dp.HTML(s.safe_substitute({"df": average_networth_by_country.to_csv(index=False), "id": "bar_chart_id"}))


**Force Layout (Common Organisations between different billionaires)**

In [29]:
import pandas as pd

# Load the dataset
df_billionaires = pd.read_csv("/content/drive/MyDrive/Billionaires Statistics Dataset.csv")

# Extract unique billionaires as nodes
nodes = [{"id": name} for name in df_billionaires['personName'].unique()]

# Create links between billionaires if they share the same organization
links = []
for org, group in df_billionaires.groupby('organization'):
    members = group['personName'].tolist()
    for i in range(len(members)):
        for j in range(i+1, len(members)):
            links.append({
                "source": members[i],
                "target": members[j]
            })

# Print the first few nodes and links as a sample
print(nodes[:5])
print(links[:5])


[{'id': 'Bernard Arnault & family'}, {'id': 'Elon Musk'}, {'id': 'Jeff Bezos'}, {'id': 'Larry Ellison'}, {'id': 'Warren Buffett'}]
[{'source': 'Brian Chesky', 'target': 'Joe Gebbia'}, {'source': 'Alain Bouchard', 'target': "Jacques D'Amours"}, {'source': 'Larry Page', 'target': 'Sergey Brin'}, {'source': 'Warren Buffett', 'target': 'Charles Munger'}, {'source': 'Bill Gates', 'target': 'Melinda French Gates'}]


In [35]:
# Prepare the D3.js script with data
d3_script_with_labels = f"""
<style>
    .node {{
        fill: #ccc;
        stroke: #fff;
        stroke-width: 1.5px;
    }}
    .link {{
        stroke: #999;
        stroke-opacity: 0.6;
    }}
    .label {{
        font-size: 10px;
        font-family: Arial;
    }}
</style>

<div id="forceGraphWithLabels"></div>

<script type="module">
    import * as d3 from 'https://cdn.skypack.dev/d3';

    const nodes = {nodes};
    const links = {links};

    const width = 800;
    const height = 600;

    const simulation = d3.forceSimulation(nodes)
        .force("link", d3.forceLink(links).id(d => d.id))
        .force("charge", d3.forceManyBody().strength(-200))
        .force("center", d3.forceCenter(width / 2, height / 2));

    const svg = d3.select("#forceGraphWithLabels").append("svg")
        .attr("width", width)
        .attr("height", height);

    const link = svg.append("g")
        .attr("class", "links")
        .selectAll("line")
        .data(links)
        .enter().append("line")
        .attr("class", "link");

    const node = svg.append("g")
        .attr("class", "nodes")
        .selectAll("circle")
        .data(nodes)
        .enter().append("circle")
        .attr("class", "node")
        .attr("r", 5);

    const label = svg.append("g")
        .attr("class", "labels")
        .selectAll("text")
        .data(nodes)
        .enter().append("text")
        .attr("class", "label")
        .text(d => d.id);

    simulation
        .nodes(nodes)
        .on("tick", ticked);

    simulation.force("link")
        .links(links);

    function ticked() {{
        link
            .attr("x1", d => d.source.x)
            .attr("y1", d => d.source.y)
            .attr("x2", d => d.target.x)
            .attr("y2", d => d.target.y);

        node
            .attr("cx", d => d.x)
            .attr("cy", d => d.y);

        label
            .attr("x", d => d.x)
            .attr("y", d => d.y);
    }}
</script>
"""

# Display the D3.js visualization in the Colab output cell
display(HTML(d3_script_with_labels))


**Sankey Diagram to represent the flow from a billionaire's country to their source of wealth**

In [49]:
from IPython.display import display, HTML

# 1. Prepare the Data
import pandas as pd
from IPython.core.display import display, HTML

# Load the dataset
df_billionaires = pd.read_csv("/content/drive/MyDrive/Billionaires Statistics Dataset.csv")
# Prepare nodes for the Sankey diagram
nodes_data = list(df_billionaires['source'].unique()) + list(df_billionaires['country'].unique())
nodes = [{"name": node} for node in nodes_data]

# Prepare links between nodes
links_data = df_billionaires.groupby(['source', 'country']).size().reset_index(name='value')
links = links_data.rename(columns={"source": "source", "country": "target"}).to_dict('records')

# Convert source and target in links from names to indices
for link in links:
    link['source'] = nodes_data.index(link['source'])
    link['target'] = nodes_data.index(link['target'])


# Extract unique sources and countries
sources = df_billionaires['source'].unique().tolist()
countries = df_billionaires['country'].unique().tolist()

# Create nodes and links for the Sankey diagram
nodes = [{"name": name} for name in sources + countries]

links = []
for _, row in df_billionaires.iterrows():
    source_index = sources.index(row['source'])
    target_index = len(sources) + countries.index(row['country'])
    links.append({"source": source_index, "target": target_index, "value": 1})



In [51]:
from IPython.core.display import display, HTML

sankey_code = """
<style>
    .node rect {
      fill-opacity: .9;
      shape-rendering: crispEdges;
      stroke-width: 0;
    }
    .node text {
      text-shadow: 0 1px 0 #fff;
      pointer-events: none;
    }
    .link {
      fill: none;
      stroke: #000;
      stroke-opacity: .2;
    }
</style>

<div id="sankey"></div>

<script src="https://d3js.org/d3.v5.min.js"></script>
<script src="https://unpkg.com/d3-sankey@0.7.1/dist/d3-sankey.min.js"></script>
<script>
    // Sankey data
    var data = {
      nodes: """ + str(nodes_data) + """,
      links: """ + str(links) + """
    };

    var units = "Billionaires";

    var margin = {top: 10, right: 10, bottom: 10, left: 10},
        width = 700 - margin.left - margin.right,
        height = 400 - margin.top - margin.bottom;

    var formatNumber = d3.format(",.0f"),
        format = function(d) { return formatNumber(d) + " " + units; },
        color = d3.scaleOrdinal(d3.schemeCategory10);

    var svg = d3.select("#sankey").append("svg")
        .attr("width", width + margin.left + margin.right)
        .attr("height", height + margin.top + margin.bottom)
      .append("g")
        .attr("transform",
              "translate(" + margin.left + "," + margin.top + ")");

    var sankey = d3.sankey()
        .nodeWidth(36)
        .nodePadding(290)
        .size([width, height]);

    var path = sankey.link();

    sankey
       .nodes(data.nodes)
       .links(data.links)
       .layout(32);

    var link = svg.append("g").selectAll(".link")
        .data(data.links)
      .enter().append("path")
        .attr("class", "link")
        .attr("d", path)
        .style("stroke-width", function(d) { return Math.max(1, d.dy); })
        .sort(function(a, b) { return b.dy - a.dy; });

    link.append("title")
          .text(function(d) {
          return d.source.name + " → " +
                  d.target.name + "\n" + format(d.value); });

    var node = svg.append("g").selectAll(".node")
        .data(data.nodes)
      .enter().append("g")
        .attr("class", "node")
        .attr("transform", function(d) {
          return "translate(" + d.x0 + "," + d.y0 + ")"; })
      .call(d3.drag()
        .subject(function(d) {
          return d;
        })
        .on("start", function() {
          this.parentNode.appendChild(this);
        })
        .on("drag", dragmove));

    node.append("rect")
        .attr("height", function(d) { return d.y1 - d.y0; })
        .attr("width", sankey.nodeWidth())
        .style("fill", function(d) {
          return d.color = color(d.name.replace(/ .*/, "")); })
        .style("stroke", function(d) {
          return d3.rgb(d.color).darker(2); })
      .append("title")
        .text(function(d) {
          return d.name + "\n" + format(d.value); });

    node.append("text")
        .attr("x", -6)
        .attr("y", function(d) { return (d.y1 - d.y0) / 2; })
        .attr("dy", ".35em")
        .attr("text-anchor", "end")
        .attr("transform", null)
        .text(function(d) { return d.name; })
      .filter(function(d) { return d.x0 < width / 2; })
        .attr("x", 6 + sankey.nodeWidth())
        .attr("text-anchor", "start");

    function dragmove(d) {
      d3.select(this)
        .attr("transform",
              "translate("
                 + d.x0
                 + ","
                 + (d.y0 = Math.max(
                    0, Math.min(height - (d.y1 - d.y0), d3.event.y))
                   ) + ")");
      sankey.relayout();
      link.attr("d", path);
    }
</script>
"""

display(HTML(sankey_code))


In [52]:
html_file_name = "sankey.html"

with open(html_file_name, "w") as f:
    f.write(sankey_code)

# Now, download the file and open it in a browser
from google.colab import files
files.download(html_file_name)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>