In [3]:
# This is just the tutorial from Colab with notes

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print("Tensorflow version:", tf.__version__)

# This is a database of handwritten digits that came from the tf website
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data() 
# The above command initializes x and y train and test from the mnist database
# Since the values are between 0 and 255, we scale between 0 and 1 by dividing the values by 255

x_train, x_test = x_train / 255.0, x_test / 255.0

# Here's the part where we build an actual machine learning model
model = tf.keras.models.Sequential([
    # It looks like the sequential model has a list of layers and a name for the model as possible arguemts
    # It's very useful for stacking layers where each layer has one input tensor and one output tensor
        # Tensor is the core framework, and all of the computations involve tensors
        # tensors are vectors / matrices of n dimensions
        # They can be from input data, or the output
    # Layers are functions with a known math structure that can be reused, and have trainable variables
    # tensorflow models are made out of layers
    # the flatten, dense, and dropout functions below are layers
  tf.keras.layers.Flatten(input_shape=(28, 28)),
    # flattens the input, but more importantly gives the input shape for the dense layer below
  tf.keras.layers.Dense(128, activation='relu'),
    #  Now the model will take as input arrays of shape (28, 28) and output arrays of shape (None, 128)?
  tf.keras.layers.Dropout(0.2),
    # Initializes BaseRandomLayer. Googled it, have no idea what this does
  tf.keras.layers.Dense(10)
    # Now we change the output array shape again
])

##predictions = model(x_train[:1]).numpy()
# For each example, the model returns a vector of logits or log-odds scores, one for each class
# A logit is the vector of raw, non normalized predictions that a classification model generates, which is ordinarily passed to normalize function
# Predictions returns ten logits because it is 10 handwritten numbers that it's being trained on 

# print(predictions)

# This function converts the logits into probabilities for each class
# Also .numpy() converts a tensor object into an numpuy.ndarray object, which means that the converted tensor will be now processed on the cpu
# This vector gives that when it sees a number, it thinks that theres a 10% chance of it being any number
##prob_predictions = tf.nn.softmax(predictions).numpy()

# print(prob_predictions)

# this defines a los function for training

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# takes a vector of ground truth values and vector of logits and returns a scalar loss for each example
# equal to neg log prob of true class; loss = 0 if model is sure of class
# Untrained model gives 1/10 for each class, so -tf.math.log(1/10) ~= 2.3


#print(loss_fn(y_train[:1], predictions).numpy())


# Before training, we configure and compile the model using Keras Model.compile, set optimizer to adam, set loss to fn above, and specify metric to be evaluated
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

# TRAINING THE MODEL

# We use the model.fit method to adjust the model parameters and minimize the loss
# loss closer to 0 = better
model.fit(x_train, y_train, epochs=5)

# The Model.evaluate method checks the model's performance, usually on a validation set or test set.
# This is the test set that we imported along with the dataset
model.evaluate(x_test,  y_test, verbose=2)
# Now this model is trained to 98% accuracy on this dataset!

Tensorflow version: 2.13.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 0s - loss: 0.0749 - accuracy: 0.9780 - 121ms/epoch - 385us/step


[0.07487266510725021, 0.9779999852180481]

In [4]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data() 
# The above command initializes x and y train and test from the mnist database
# Since the values are between 0 and 255, we scale between 0 and 1 by dividing the values by 255
print(x_train[10])


[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0  42 118 219 166 118 118   6
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 103 242 254 254 254 254 254  66
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  18 232 254 254 254 254 254 238
   70   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0 104 244 254 224 254 254 254
  141   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0 207 254 210 25

In [10]:
from PIL import Image
im = Image.open("0028C_S.gif")
pix = np.array(im)
print(pix.shape)
#num = 
#lix = np.array([pix,num])
#print(lix.shape)

im_ht = 256
im_wd = 256
bat_sz = 2
#training set
ds_train = tf.keras.preprocessing.image_dataset_from_directory(
    '/Users/zachderse//Desktop/cross_sectional_data/datasets/', 
    labels="inferred",
    label_mode = "categorical", 
    color_mode = "grayscale", 
    batch_size = bat_sz,
    seed = 123, 
    validation_split = .1, 
    image_size = (im_ht,im_wd),
    subset = "training")

#test set
ds_valid = tf.keras.preprocessing.image_dataset_from_directory(
    '/Users/zachderse//Desktop/cross_sectional_data/datasets/', 
    labels="inferred",
    label_mode = "categorical", 
    color_mode = "grayscale", 
    batch_size = bat_sz,
    seed = 123, 
    validation_split = .1, 
    image_size = (im_ht,im_wd),
    subset = "validation")

(208, 176)
Found 18 files belonging to 4 classes.
Using 17 files for training.
Found 18 files belonging to 4 classes.
Using 1 files for validation.


In [7]:

import os
import shutil  
from PIL import Image



        
        
package_num = 1
for i in range(1,42):
    #print("This is the file number ",i, ": ", end="")
    try:
        x = str(i).zfill(4)
        file_name = ("/Users/zachderse//Desktop/cross_sectional_data/disc1/OAS1_"+x+"_MR1/OAS1_"+x+"_MR1.txt")
        f = open(file_name)
        textlines = f.readlines()
        CDR_num = str(textlines[6][14:17])
        CDR_num = CDR_num.strip()
        
        match CDR_num:
            case "0":
                print("0")
            case "0.5":
                print("0.5")
            case "1":
                print("1")
            case "2":
                print("2")
  
            case _ :
                print("none available")
        
        
        
        
        #orgfile = "/Users/zachderse//Desktop/cross_sectional_data/disc1/OAS1_"+x+"_MR1/FSL_SEG/OAS1_"+x+"_MR1_mpr_n4_anon_111_t88_masked_gfc_fseg_tra_90.gif"
        #dest = "/Users/zachderse//Desktop/cross_sectional_data/CDR_"+str(CDR_num)+"/"+x+"C_S.gif"
        #try:
        #    shutil.copy(orgfile, dest)
        #    print("copied successfully")
        #except shutil.SameFileError:
        #    print("Source and destination represents the same file.")
            
            
            
        
    except:
        continue



0
0
0.5
none available
none available
none available
none available
none available
0
0
none available
0
none available
0.5
0.5
none available
0
0
0
0.5
0.5
0.5
none available
0
none available
1
none available
0
1
0
0
0
1
none available
none available
0.5
none available
0.5
