<a href="https://colab.research.google.com/github/yeabwang/tensorflow/blob/main/Sparse_Tensors_strings_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import tensorflow as tf

In [32]:
# Sparse tensor
# A sparse tensor is a memory-efficient representation of a tensor where only the non-zero elements and their indices are stored.
# It is used to represent tensors that contain a large number of zero values.

# Dense Tensor
# A dense tensor is a regular tensor where every element in the tensor is explicitly stored, including zeros.
# We use it when the tensor is small and mostly filled with non zero elements

# Define a 5x5 dense tensor
original_dense_tensor = tf.constant([
    [1, 0, 0, 3, 0],
    [0, 0, 4, 0, 0],
    [5, 0, 0, 6, 0],
    [0, 0, 0, 0, 7],
    [8, 0, 0, 9, 0]
])

print("Original tensor" , original_dense_tensor)

# Sparse tensor
# The indices refer to the specific locations in the matrix where the non-zero values are stored. Each index is a pair of coordinates, indicating the row and column where a non-zero value exists in the matrix.
indices = tf.where(original_dense_tensor != 0)
print("Indices",indices)

# The values are the actual non-zero values from the matrix, and they correspond to the indices. Each non-zero value in the matrix has a matching index indicating where it belongs in the original matrix.
values = tf.gather_nd(original_dense_tensor,indices)
print("values", values)

# The dense shape refers to the full shape of the tensor, as if we were to write out all the values (including zeros). This is important because the sparse tensor represents the same matrix, but only stores the non-zero elements and their positions.
shape = original_dense_tensor.shape
print("Dense Shape" , shape)

# Getting our sparse tensor
sparse_tensor = tf.SparseTensor(indices,values,shape)
print("Sparse Tensor" , sparse_tensor)

# Turing it back to dense tensor

dense_tensor = tf.sparse.to_dense(sparse_tensor)
print("Dense Tensor" , dense_tensor)

Original tensor tf.Tensor(
[[1 0 0 3 0]
 [0 0 4 0 0]
 [5 0 0 6 0]
 [0 0 0 0 7]
 [8 0 0 9 0]], shape=(5, 5), dtype=int32)
Indices tf.Tensor(
[[0 0]
 [0 3]
 [1 2]
 [2 0]
 [2 3]
 [3 4]
 [4 0]
 [4 3]], shape=(8, 2), dtype=int64)
values tf.Tensor([1 3 4 5 6 7 8 9], shape=(8,), dtype=int32)
Dense Shape (5, 5)
Sparse Tensor SparseTensor(indices=tf.Tensor(
[[0 0]
 [0 3]
 [1 2]
 [2 0]
 [2 3]
 [3 4]
 [4 0]
 [4 3]], shape=(8, 2), dtype=int64), values=tf.Tensor([1 3 4 5 6 7 8 9], shape=(8,), dtype=int32), dense_shape=tf.Tensor([5 5], shape=(2,), dtype=int64))
Dense Tensor tf.Tensor(
[[1 0 0 3 0]
 [0 0 4 0 0]
 [5 0 0 6 0]
 [0 0 0 0 7]
 [8 0 0 9 0]], shape=(5, 5), dtype=int32)


In [64]:
# String tensors - ontain variable-length strings.

string_tensor = tf.constant(["Hello", "This", "Is", "Yeabsira"])
string_numbers_tensor = tf.constant(["3.14", "2.32", "0.22"])
print(string_tensor)

#Common operations
# String contatination
concatenated_string = tf.strings.join(string_tensor, separator = " ")
print(concatenated_string)

# String length
string_length = tf.strings.length(string_tensor)
print(string_length)

# String split
string_split = tf.strings.split(string_tensor, sep = ",")
print(string_split)

# Substrings
substring = tf.strings.substr(string_tensor, pos = 0, len = 2)
print(substring)

# matching patterns - searching the pattern we want from the tensors
pattern_match = tf.strings.regex_full_match(string_tensor, pattern = "Yeabsira")
print(pattern_match)

# replcating patterns
replace_pattern = tf.strings.regex_replace("Yeabsira", "This", "Hello")
print(replace_pattern)

# Upper casing
uppper_case = tf.strings.upper(string_tensor)
print(uppper_case)

# lower casing
lower_case = tf.strings.lower(string_tensor)
print(lower_case)

# Casting to numerical values
numbers = tf.strings.to_number(string_numbers_tensor)
print(numbers)

# Replacing Patterns Using Regex - modify strings in a flexible and automated way. This is useful for text preprocessing tasks like normalizing data.
replaced = tf.strings.regex_replace("Hello", "l", "L")
print(replaced)

#combining all strings into one tensor
combined = tf.strings.reduce_join(string_tensor, separator = " ")
print(combined)


tf.Tensor([b'Hello' b'This' b'Is' b'Yeabsira'], shape=(4,), dtype=string)
tf.Tensor(b'Hello This Is Yeabsira', shape=(), dtype=string)
tf.Tensor([5 4 2 8], shape=(4,), dtype=int32)
<tf.RaggedTensor [[b'Hello'],
 [b'This'],
 [b'Is'],
 [b'Yeabsira']]>
tf.Tensor([b'He' b'Th' b'Is' b'Ye'], shape=(4,), dtype=string)
tf.Tensor([False False False  True], shape=(4,), dtype=bool)
tf.Tensor(b'Yeabsira', shape=(), dtype=string)
tf.Tensor([b'HELLO' b'THIS' b'IS' b'YEABSIRA'], shape=(4,), dtype=string)
tf.Tensor([b'hello' b'this' b'is' b'yeabsira'], shape=(4,), dtype=string)
tf.Tensor([3.14 2.32 0.22], shape=(3,), dtype=float32)
tf.Tensor(b'HeLLo', shape=(), dtype=string)
tf.Tensor(b'Hello This Is Yeabsira', shape=(), dtype=string)
