In [12]:
#r "nuget: System.Text.Json"
#r "nuget: Microsoft.ML"

In [13]:
using Microsoft.ML;
using Microsoft.ML.Data;
using System.Text.Json;
using System.IO;

using System.Text.Json.Serialization;

using System.Linq;



In [14]:

public class FeedbackRecord
{
    [JsonPropertyName("Id")]
    public string Id { get; set; }

    [JsonPropertyName("PartnerShortName")]
    public string PartnerShortName { get; set; }

    [JsonPropertyName("ServiceName")]
    public string ServiceName { get; set; }

    [JsonPropertyName("Type")]
    public string Type { get; set; }

    [JsonPropertyName("Title")]
    public string Title { get; set; }

    [JsonPropertyName("Blocking")]
    public string Blocking { get; set; }

    [JsonPropertyName("Description")]
    public string Description { get; set; }

    [JsonPropertyName("WorkaroundAvailable")]
    public string WorkaroundAvailable { get; set; }

    [JsonPropertyName("Priority")]
    public string Priority { get; set; }

    [JsonPropertyName("CustomerName")]
    public string CustomerName { get; set; }

    [JsonPropertyName("CustomerTpid")]
    public string CustomerTpid { get; set; }

    [JsonPropertyName("WorkaroundDescription")]
    public string WorkaroundDescription { get; set; }

    [JsonPropertyName("UserStory")]
    public string UserStory { get; set; }

    [JsonPropertyName("Embedding")]
    public float[] Embedding { get; set; } // Embedding for the user story as a float array

    // Implement the GetVector method from IVector interface
    public float[] GetVector()
    {
        return Embedding ?? throw new InvalidOperationException("Embedding vector is not set.");
    }

    // A method to get a safe version of the FeedbackRecord (similar to GetSafeVersion in FunctionCodePair)
    public FeedbackRecord GetSafeVersion()
    {
        return new FeedbackRecord
        {
            Id = this.Id,
            PartnerShortName = this.PartnerShortName,
            ServiceName = this.ServiceName,
            Type = this.Type,
            Title = this.Title,
            Blocking = this.Blocking,
            Description = this.Description,
            WorkaroundAvailable = this.WorkaroundAvailable,
            Priority = this.Priority,
            CustomerName = this.CustomerName,
            CustomerTpid = this.CustomerTpid,
            WorkaroundDescription = this.WorkaroundDescription,
            UserStory = this.UserStory,
            Embedding = null // We do not include the embedding in the safe version
        };
    }
}

In [16]:
public class ServiceCluster
{
    [JsonPropertyName("ServiceName")]
    public string ServiceName { get; set; }

    [JsonPropertyName("ClusterName")]
    public string ClusterName { get; set; }

    [JsonPropertyName("CommonElement")]
    public string CommonElement { get; set; }

    [JsonPropertyName("SimilarFeedbacks")]
    public int SimilarFeedbacks { get; set; }

    [JsonPropertyName("DistinctCustomers")]
    public int DistinctCustomers { get; set; }
}

public class EmbeddingData
{
    [VectorType(1536)] // Changed the vector size to 1536
    public float[] Embedding { get; set; }
}

// Display the top 5 clusters
public class ClusterPrediction
{
    [ColumnName("PredictedLabel")]
    public uint PredictedCluster { get; set; }
    [ColumnName("Score")]
    public float[] Distances { get; set; }
}

In [22]:
var jsonFilePath = "/Users/yoavdobrin/workspace/fta/feedback-ai-lens/sample-data/cosmosdb.json";
var jsonString = File.ReadAllText(jsonFilePath);
var feedbackRecords = JsonSerializer.Deserialize<List<FeedbackRecord>>(jsonString);


In [28]:
var clusterCount = 50;
var mlContext = new MLContext();
var embeddingData = feedbackRecords.Select(f => new EmbeddingData { Embedding = f.Embedding }).ToList();
var dataView = mlContext.Data.LoadFromEnumerable(embeddingData);

// Cluster the embeddings using KMeans (set number of clusters, e.g., 5)
var pipeline = mlContext.Clustering.Trainers.KMeans(featureColumnName: "Embedding", numberOfClusters: clusterCount);
var model = pipeline.Fit(dataView);

// Predict the cluster for each feedback record
var predictions = model.Transform(dataView);
var clusters = mlContext.Data.CreateEnumerable<ClusterPrediction>(predictions, reuseRowObject: false).ToList();

// Console.WriteLine($"Number of clusters: {clusters.Count}");

public class ClusterPrediction
{
    [ColumnName("PredictedLabel")]
    public uint PredictedCluster { get; set; }  // Cluster number (1, 2, 3, etc.)
}

// Assign the clusters to feedback records
var feedbackWithClusters = feedbackRecords.Zip(clusters, (feedback, cluster) => new
{
    Feedback = feedback,
    Cluster = cluster.PredictedCluster
}).ToList();

// print the number of numberOfClusters
var numberOfClusters = feedbackWithClusters.Select(f => f.Cluster).Distinct().Count();
Console.WriteLine($"Number of clusters: {numberOfClusters}");

Number of clusters: 50


In [32]:
// Method to display user stories for a given cluster
void DisplayUserStoriesForCluster(uint clusterNumber)
{
    var storiesInCluster = feedbackWithClusters
        .Where(x => x.Cluster == clusterNumber)
        .Select(x => x.Feedback.UserStory)
        .ToList();
    // print the number of clusters

    if (storiesInCluster.Any())
    {
        Console.WriteLine($"Cluster {clusterNumber} contains {storiesInCluster.Count} user stories:");
        // foreach (var story in storiesInCluster)
        // {
        //     Console.WriteLine("- " + story);
        // }
    }
    else
    {
        Console.WriteLine($"No user stories found in cluster {clusterNumber}.");
    }
}

// Example: Display user stories for cluster 1
// DisplayUserStoriesForCluster(1);

// You can loop over all clusters to display stories for each one
for (uint i = 1; i <= clusterCount; i++)
{
    DisplayUserStoriesForCluster(i);
}

Cluster 1 contains 8 user stories:
Cluster 2 contains 33 user stories:
Cluster 3 contains 16 user stories:
Cluster 4 contains 21 user stories:
Cluster 5 contains 13 user stories:
Cluster 6 contains 3 user stories:
Cluster 7 contains 11 user stories:
Cluster 8 contains 2 user stories:
Cluster 9 contains 16 user stories:
Cluster 10 contains 2 user stories:
Cluster 11 contains 2 user stories:
Cluster 12 contains 14 user stories:
Cluster 13 contains 6 user stories:
Cluster 14 contains 4 user stories:
Cluster 15 contains 16 user stories:
Cluster 16 contains 12 user stories:
Cluster 17 contains 3 user stories:
Cluster 18 contains 1 user stories:
Cluster 19 contains 13 user stories:
Cluster 20 contains 7 user stories:
Cluster 21 contains 20 user stories:
Cluster 22 contains 24 user stories:
Cluster 23 contains 13 user stories:
Cluster 24 contains 14 user stories:
Cluster 25 contains 15 user stories:
Cluster 26 contains 3 user stories:
Cluster 27 contains 2 user stories:
Cluster 28 contains 16

In [10]:
var groupedClusters = feedbackRecords
    .Zip(clusters, (feedback, cluster) => new { feedback.UserStory, cluster.PredictedCluster })
    .GroupBy(x => x.PredictedCluster)
    .OrderByDescending(g => g.Count())
    .Take(5)
    .Select(g => new
    {
        Cluster = g.Key,
        UserStories = g.Select(x => x.UserStory).ToList()
    });


In [11]:
display(groupedClusters);

index,value
,
,
,
,
,
0,"{ Cluster = 3, UserStories = System.Collections.Generic.List`1[System.String] }Cluster3UserStories[ As a cloud infrastructure administrator, I want to configure the Mem.ShareForceSalting parameter to 0 in Azure VMware Solution, so that I can enable memory sharing across all VMs within the same host and reduce RAM usage, thereby staying within the allocated budget., As a cloud infrastructure manager, I want to deploy Azure VMware Solution Stretched Cluster in the Italy North region, so that I can ensure high availability and disaster recovery for our VMware workloads., As a cloud infrastructure manager, I want to enable Azure VMware Solutions in the Brazil South region, so that I can deploy and manage VMware workloads locally., As a network administrator, I want to configure an HTTP Proxy for Azure Spring Apps, so that I can ensure all traffic is routed through our on-premise security infrastructure and meet our security requirements., As a cloud infrastructure manager, I want to automatically reschedule pods on spot node pools when capacity becomes available, so that I can optimize resource utilization and reduce total cost of ownership (TCO)., As an IT manager, I want to evaluate Azure VMware Solution with add-on storage of 200TB++, so that I can determine if it is a cost-effective and viable alternative to our current on-premise Nutanix solution., As an IT administrator, I want to monitor detailed metrics on time taken at every stage of VM migration using HCX, so that I can have an alert mechanism in place and ensure smooth migration processes., As a cloud administrator, I want to view my current subnet usage for App Services/ASEs in Fairfax, so that I can prevent subnet IP exhaustion and ensure continuous production workloads., As an IT manager, I want Azure Automation available in the Spain Central region, so that I can fully automate my environment and progress with my planned projects without delays., As a cloud infrastructure manager, I want to replicate AppStacks between on-prem and AVS without using an NFS share, so that I can ensure seamless application delivery and maintain consistency across environments., As a Kubernetes administrator, I want to create role assignments based on namespace labels, so that I can manage access permissions more efficiently across a large number of namespaces. ]"
,
Cluster,3
UserStories,"[ As a cloud infrastructure administrator, I want to configure the Mem.ShareForceSalting parameter to 0 in Azure VMware Solution, so that I can enable memory sharing across all VMs within the same host and reduce RAM usage, thereby staying within the allocated budget., As a cloud infrastructure manager, I want to deploy Azure VMware Solution Stretched Cluster in the Italy North region, so that I can ensure high availability and disaster recovery for our VMware workloads., As a cloud infrastructure manager, I want to enable Azure VMware Solutions in the Brazil South region, so that I can deploy and manage VMware workloads locally., As a network administrator, I want to configure an HTTP Proxy for Azure Spring Apps, so that I can ensure all traffic is routed through our on-premise security infrastructure and meet our security requirements., As a cloud infrastructure manager, I want to automatically reschedule pods on spot node pools when capacity becomes available, so that I can optimize resource utilization and reduce total cost of ownership (TCO)., As an IT manager, I want to evaluate Azure VMware Solution with add-on storage of 200TB++, so that I can determine if it is a cost-effective and viable alternative to our current on-premise Nutanix solution., As an IT administrator, I want to monitor detailed metrics on time taken at every stage of VM migration using HCX, so that I can have an alert mechanism in place and ensure smooth migration processes., As a cloud administrator, I want to view my current subnet usage for App Services/ASEs in Fairfax, so that I can prevent subnet IP exhaustion and ensure continuous production workloads., As an IT manager, I want Azure Automation available in the Spain Central region, so that I can fully automate my environment and progress with my planned projects without delays., As a cloud infrastructure manager, I want to replicate AppStacks between on-prem and AVS without using an NFS share, so that I can ensure seamless application delivery and maintain consistency across environments., As a Kubernetes administrator, I want to create role assignments based on namespace labels, so that I can manage access permissions more efficiently across a large number of namespaces. ]"
1,"{ Cluster = 5, UserStories = System.Collections.Generic.List`1[System.String] }Cluster5UserStories[ As a business user, I want to filter Power Automate runs by datetime of execution, so that I can quickly locate specific runs without scrolling extensively., As a developer, I want to use a VSCode extension for Azure API Management, so that I can manage APIs efficiently within my development environment., As a system administrator, I want to enable gMSA v2 support on Windows AKS, so that I can ensure our API pods can access a domain-joined SQL server without disruptions., As a system administrator, I want to upload static images to a backend storage like Azure Blob instead of using Git for Static Web Apps, so that I can avoid hitting the gitpacks hard limit and ensure continuous updates to the business-critical app., As a developer, I want to receive comprehensive guidance and support on using Azure Functions for background processes in Dataverse, so that I can confidently justify the need for permissions and demonstrate the benefits to my IT managers., As a DevOps engineer, I want to have extended support for older Kubernetes versions on Azure Kubernetes Service, so that I can have more time to migrate to fully supported versions and maintain consistency across multi-cloud environments. ]"

Unnamed: 0,Unnamed: 1
Cluster,3
UserStories,"[ As a cloud infrastructure administrator, I want to configure the Mem.ShareForceSalting parameter to 0 in Azure VMware Solution, so that I can enable memory sharing across all VMs within the same host and reduce RAM usage, thereby staying within the allocated budget., As a cloud infrastructure manager, I want to deploy Azure VMware Solution Stretched Cluster in the Italy North region, so that I can ensure high availability and disaster recovery for our VMware workloads., As a cloud infrastructure manager, I want to enable Azure VMware Solutions in the Brazil South region, so that I can deploy and manage VMware workloads locally., As a network administrator, I want to configure an HTTP Proxy for Azure Spring Apps, so that I can ensure all traffic is routed through our on-premise security infrastructure and meet our security requirements., As a cloud infrastructure manager, I want to automatically reschedule pods on spot node pools when capacity becomes available, so that I can optimize resource utilization and reduce total cost of ownership (TCO)., As an IT manager, I want to evaluate Azure VMware Solution with add-on storage of 200TB++, so that I can determine if it is a cost-effective and viable alternative to our current on-premise Nutanix solution., As an IT administrator, I want to monitor detailed metrics on time taken at every stage of VM migration using HCX, so that I can have an alert mechanism in place and ensure smooth migration processes., As a cloud administrator, I want to view my current subnet usage for App Services/ASEs in Fairfax, so that I can prevent subnet IP exhaustion and ensure continuous production workloads., As an IT manager, I want Azure Automation available in the Spain Central region, so that I can fully automate my environment and progress with my planned projects without delays., As a cloud infrastructure manager, I want to replicate AppStacks between on-prem and AVS without using an NFS share, so that I can ensure seamless application delivery and maintain consistency across environments., As a Kubernetes administrator, I want to create role assignments based on namespace labels, so that I can manage access permissions more efficiently across a large number of namespaces. ]"

Unnamed: 0,Unnamed: 1
Cluster,5
UserStories,"[ As a business user, I want to filter Power Automate runs by datetime of execution, so that I can quickly locate specific runs without scrolling extensively., As a developer, I want to use a VSCode extension for Azure API Management, so that I can manage APIs efficiently within my development environment., As a system administrator, I want to enable gMSA v2 support on Windows AKS, so that I can ensure our API pods can access a domain-joined SQL server without disruptions., As a system administrator, I want to upload static images to a backend storage like Azure Blob instead of using Git for Static Web Apps, so that I can avoid hitting the gitpacks hard limit and ensure continuous updates to the business-critical app., As a developer, I want to receive comprehensive guidance and support on using Azure Functions for background processes in Dataverse, so that I can confidently justify the need for permissions and demonstrate the benefits to my IT managers., As a DevOps engineer, I want to have extended support for older Kubernetes versions on Azure Kubernetes Service, so that I can have more time to migrate to fully supported versions and maintain consistency across multi-cloud environments. ]"

Unnamed: 0,Unnamed: 1
Cluster,1
UserStories,"[ As a cloud infrastructure architect, I want to extend the bandwidth of AVS ExpressRoute GlobalReach to more than 25Gbps, so that I can support the planned ramp-up of 70 AVS hosts and ensure optimal network performance. ]"

Unnamed: 0,Unnamed: 1
Cluster,2
UserStories,"[ As a system administrator, I want to extend the current 15 ASEv1/v2 environments until the end of September 2024, so that I can ensure critical banking services continue to operate without disruption. ]"

Unnamed: 0,Unnamed: 1
Cluster,4
UserStories,"[ As a developer, I want to monitor HTTP result codes for my function on a Linux consumption plan, so that I can create alerts when the function returns any 4xx status code. ]"
