In [3]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1 (from tensorflow)
  Downloading tensorflow_io_gcs_filesystem-0.37.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting wheel<1.0,>=0.23.0 (from astunparse>=1.6.0->tensorflow

In [4]:
# Before this import kaggle.json file first

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

!kaggle datasets download -d arjuntejaswi/plant-village

import zipfile
zip_ref = zipfile.ZipFile('/content/plant-village.zip', 'r')
zip_ref.extractall('/content')
zip_ref.close()

Dataset URL: https://www.kaggle.com/datasets/arjuntejaswi/plant-village
License(s): unknown
plant-village.zip: Skipping, found more recently modified local copy (use --force to force download)


In [5]:
import tensorflow as tf

<h3 style='color:purple'>Create tf dataset from a list</h3>

In [6]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34,31]

tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

<h3 style='color:purple'>Iterate through tf dataset</h3>

In [7]:
for sales in tf_dataset:
    print(sales.numpy())

21
22
-108
31
-1
32
34
31


<h3 style='color:purple'>Iterate through elements as numpy elements</h3>

In [8]:
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-108
31
-1
32
34
31


<h3 style='color:purple'>Iterate through first n elements in tf dataset</h3>

In [9]:
for sales in tf_dataset.take(3):
    print(sales.numpy())

21
22
-108


<h3 style='color:purple'>Filter sales numbers that are < 0</h3>

In [10]:
tf_dataset = tf_dataset.filter(lambda x: x>0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


<h3 style='color:purple'>Convert sales numbers from USA dollars ($) to Indian Rupees (INR) Assuming 1->72 conversation rate</h3>

In [11]:
tf_dataset = tf_dataset.map(lambda x: x*72)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
1584
2232
2304
2448
2232


<h3 style='color:purple'>Shuffe</h3>

In [12]:
tf_dataset = tf_dataset.shuffle(2)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1584
1512
2304
2448
2232
2232


https://stackoverflow.com/questions/53514495/what-does-batch-repeat-and-shuffle-do-with-tensorflow-dataset

<h3 style='color:purple'>Batching</h3>

In [13]:
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[1512 1584]
[2232 2448]
[2304 2232]


<h3 style='color:purple'>Perform all of the above operations in one shot</h3>

In [14]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

tf_dataset = tf_dataset.filter(lambda x: x>0).map(lambda y: y*72).shuffle(2).batch(2)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

[1584 2232]
[2304 2448]
[2232 1512]


<h3>Images</h3>

In [None]:
import tensorflow as tf

# Replace this line:
# images_ds = tf.data.Dataset.list_files('/content/PlantVillage/*/*', shuffle=True)

# With these lines:
img_height = 128
img_width = 128
train_ds = tf.keras.utils.image_dataset_from_directory(
    '/content/PlantVillage',
    labels='inferred',
    label_mode='categorical',
    image_size=(img_height, img_width),
    interpolation='nearest',
    batch_size=64,
    shuffle=True
)

class_names = train_ds.class_names
print(class_names)

In [23]:
images_ds = tf.data.Dataset.list_files('/content/PlantVillage/*/*', shuffle=True)

In [24]:
image_count = len(images_ds)
image_count

20639

In [25]:
type(images_ds)

In [26]:
for file in images_ds.take(3):
    print(file.numpy())

b'/content/PlantVillage/Tomato_healthy/6270efe7-db0f-4953-a7c7-eb86bb8967dd___RS_HL 9761.JPG'
b'/content/PlantVillage/Potato___Late_blight/547d3544-5e37-484b-a853-254376c49081___RS_LB 2556.JPG'
b'/content/PlantVillage/Tomato__Target_Spot/d7b51750-838a-4abb-9d0a-d56bc5a64df2___Com.G_TgS_FL 0021.JPG'


In [27]:
images_ds = images_ds.shuffle(200)
for file in images_ds.take(3):
    print(file.numpy())

b'/content/PlantVillage/Tomato__Tomato_YellowLeaf__Curl_Virus/5fb985f8-4599-4275-aa1c-f9accb2d0bd3___UF.GRC_YLCV_Lab 08527.JPG'
b'/content/PlantVillage/Tomato_Septoria_leaf_spot/04840559-16ed-4f17-908b-2048ba83ab80___Keller.St_CG 1780.JPG'
b'/content/PlantVillage/Tomato_Late_blight/a3deb552-d098-4b15-992a-d6f2fc49ae4c___GHLB2 Leaf 109.1.JPG'


In [30]:
import os
class_names = sorted(os.listdir('/content/PlantVillage'))

In [31]:
print(class_names)

['Pepper__bell___Bacterial_spot', 'Pepper__bell___healthy', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy', 'Tomato_Bacterial_spot', 'Tomato_Early_blight', 'Tomato_Late_blight', 'Tomato_Leaf_Mold', 'Tomato_Septoria_leaf_spot', 'Tomato_Spider_mites_Two_spotted_spider_mite', 'Tomato__Target_Spot', 'Tomato__Tomato_YellowLeaf__Curl_Virus', 'Tomato__Tomato_mosaic_virus', 'Tomato_healthy']


In [32]:
len(class_names)

15

In [33]:
len(images_ds)

20639

In [39]:
train_size = int(image_count*0.8)
train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)

In [40]:
len(train_ds)

16511

In [41]:
len(test_ds)

4128

In [42]:
def get_label(file_path):
    import os
    parts = tf.strings.split(file_path, os.path.sep)
    return parts[-2]

In [50]:
for i in train_ds.map(get_label).take(3):
  print(i)

tf.Tensor(b'Tomato_Bacterial_spot', shape=(), dtype=string)
tf.Tensor(b'Tomato_Late_blight', shape=(), dtype=string)
tf.Tensor(b'Tomato__Target_Spot', shape=(), dtype=string)


In [44]:
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path) # load the raw data from the file as a string
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128, 128])
    return img, label

In [51]:
for i,j in train_ds.map(process_image).take(3):
  print(i,j)

tf.Tensor(
[[[177.   171.   175.  ]
  [181.5  175.5  179.5 ]
  [179.   173.   177.  ]
  ...
  [180.   171.   176.  ]
  [169.75 160.75 165.75]
  [178.75 169.75 174.75]]

 [[188.   182.   186.  ]
  [181.75 175.75 179.75]
  [176.25 170.25 174.25]
  ...
  [174.25 165.25 170.25]
  [170.75 161.75 166.75]
  [179.5  170.5  175.5 ]]

 [[179.75 173.75 177.75]
  [181.25 175.25 179.25]
  [181.75 175.75 179.75]
  ...
  [168.75 159.75 164.75]
  [170.75 161.75 166.75]
  [174.5  165.5  170.5 ]]

 ...

 [[101.25  89.25  89.25]
  [103.5   91.5   91.5 ]
  [114.25 102.25 102.25]
  ...
  [115.   103.   103.  ]
  [102.5   90.5   90.5 ]
  [104.25  92.25  92.25]]

 [[108.75  96.75  96.75]
  [116.75 104.75 104.75]
  [115.5  103.5  103.5 ]
  ...
  [ 99.25  87.25  87.25]
  [108.    96.    96.  ]
  [ 98.75  86.75  86.75]]

 [[100.75  88.75  88.75]
  [117.   105.   105.  ]
  [116.5  104.5  104.5 ]
  ...
  [104.25  92.25  92.25]
  [ 93.25  81.25  81.25]
  [ 99.75  87.75  87.75]]], shape=(128, 128, 3), dtype=float32