In [3]:
import cv2
import tensorflow as tf

In [2]:
!pip install opencv-python==4.6.0.66

Collecting opencv-python==4.6.0.66
  Using cached opencv_python-4.6.0.66-cp37-abi3-macosx_11_0_arm64.whl (30.0 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.6.0.66


In [4]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
import os
import numpy as np
from typing import List

In [3]:
path2='/Users/giannitallarita/datalips/data/s1/lrbe9a.mpg'

In [6]:
def load_video(path:str) -> List[float]: 

    cap = cv2.VideoCapture(path)
    frames = []
    for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))): 
        ret, frame = cap.read()
        frame = tf.image.rgb_to_grayscale(frame)
        frames.append(frame[190:236,80:220,:])
    cap.release()
    
    mean = tf.math.reduce_mean(frames)
    std = tf.math.reduce_std(tf.cast(frames, tf.float32))
    return tf.cast((frames - mean), tf.float32) / std

In [7]:
def load_video_frames(path:str) -> List[float]: 

    cap = cv2.VideoCapture(path)
    frames = []
    for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))): 
        ret, frame = cap.read()
        frame = tf.image.rgb_to_grayscale(frame)
        frames.append(frame[190:236,80:220,:])
    cap.release()
    
    mean = tf.math.reduce_mean(frames)
    std = tf.math.reduce_std(tf.cast(frames, tf.float32))
    return frame

In [4]:
import face_recognition

In [5]:
frames=(cv2.VideoCapture(path2)).read()

In [6]:
frames[1].shape

(288, 360, 3)

In [7]:
face_locations=face_recognition.face_locations(frames[1])

In [8]:
for (top, right, bottom, left) in face_locations:
        
        # Draw a box around the face
        cv2.rectangle(frames[1], (left, top), (right, bottom), (0, 0, 255), 2)


In [10]:
cv2.imshow('Video', frames[1])
cv2.waitKey(0)
if cv2.waitKey(1) & 0xFF == ord('q'):
    cv2.destroyAllWindows()



# cv2.destroyAllWindows()

In [10]:
load_video(path2)

Metal device set to: Apple M1 Max


<tf.Tensor: shape=(75, 46, 140, 1), dtype=float32, numpy=
array([[[[1.4380248 ],
         [1.4380248 ],
         [1.4011523 ],
         ...,
         [0.29497942],
         [0.29497942],
         [0.29497942]],

        [[1.4380248 ],
         [1.4380248 ],
         [1.4011523 ],
         ...,
         [0.29497942],
         [0.29497942],
         [0.29497942]],

        [[1.4748971 ],
         [1.4748971 ],
         [1.4380248 ],
         ...,
         [0.40559673],
         [0.33185187],
         [0.33185187]],

        ...,

        [[1.1061729 ],
         [1.0693004 ],
         [1.0693004 ],
         ...,
         [0.07374486],
         [0.03687243],
         [0.03687243]],

        [[1.032428  ],
         [1.032428  ],
         [1.032428  ],
         ...,
         [0.07374486],
         [0.03687243],
         [0.03687243]],

        [[1.032428  ],
         [1.032428  ],
         [1.032428  ],
         ...,
         [0.07374486],
         [0.03687243],
         [0.        ]]],


  

In [8]:
def load_data(path: str): 
    path = bytes.decode(path.numpy())
    #file_name = path.split('/')[-1].split('.')[0]
    # File name splitting for windows
    file_name = path.split('/')[-1].split('.')[0]
    video_path = os.path.join('/Users/giannitallarita/datalips/data/','s1',f'{file_name}.mpg')
    alignment_path = os.path.join('/Users/giannitallarita/datalips/data/','alignments','s1',f'{file_name}.align')
    frames = load_video(video_path) 
    alignments = load_alignments(alignment_path)
    
    return frames, alignments

## Vocabulary 

In [9]:
vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]

In [10]:
char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
num_to_char = tf.keras.layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
)

print(
    f"The vocabulary is: {char_to_num.get_vocabulary()} "
    f"(size ={char_to_num.vocabulary_size()})"
)

The vocabulary is: ['', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'", '?', '!', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' '] (size =40)


2023-06-09 09:41:53.366262: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2023-06-09 09:41:53.366295: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2023-06-09 09:41:53.366313: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2023-06-09 09:41:53.366353: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-06-09 09:41:53.366373: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [11]:

def load_alignments(path:str) -> List[str]: 
    with open(path, 'r') as f: 
        lines = f.readlines() 
    tokens = []
    for line in lines:
        line = line.split()
        if line[2] != 'sil': 
            tokens = [*tokens,' ',line[2]]
    return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]

In [12]:

def mappable_function(path:str) ->List[str]:
    result = tf.py_function(load_data, [path], (tf.float32, tf.int64))
    return result

In [26]:
data = tf.data.Dataset.list_files('/Users/giannitallarita/datalips/data/s1/*.mpg')
data = data.shuffle(500, reshuffle_each_iteration=False)
data = data.map(mappable_function)
data = data.padded_batch(10, padded_shapes=([75,None,None,None],[40]))
data = data.prefetch(tf.data.AUTOTUNE)

In [14]:
frames, alignments = data.as_numpy_iterator().next()

In [16]:
alignments.shape

(2, 40)

In [27]:
train = data.take(450)
test = data.skip(450)

In [16]:

sample = data.as_numpy_iterator()

2023-06-08 13:47:12.316488: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [17]:

val = sample.next()

In [None]:
## DNN

In [28]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler

In [29]:
model = Sequential()
model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(Conv3D(256, 3, padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(Conv3D(75, 3, padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(TimeDistributed(Flatten()))

#model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
model.add(Dropout(.5))

#model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
model.add(Dropout(.5))

model.add(Dense(char_to_num.vocabulary_size()+1, kernel_initializer='he_normal', activation='softmax'))

In [46]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_9 (Conv3D)           (None, 75, 46, 140, 128)  3584      
                                                                 
 activation_9 (Activation)   (None, 75, 46, 140, 128)  0         
                                                                 
 max_pooling3d_9 (MaxPooling  (None, 75, 23, 70, 128)  0         
 3D)                                                             
                                                                 
 conv3d_10 (Conv3D)          (None, 75, 23, 70, 256)   884992    
                                                                 
 activation_10 (Activation)  (None, 75, 23, 70, 256)   0         
                                                                 
 max_pooling3d_10 (MaxPoolin  (None, 75, 11, 35, 256)  0         
 g3D)                                                 

In [18]:
yhat=model.predict(val[0])



In [24]:
tf.strings.reduce_join([num_to_char(x) for x in tf.argmax(yhat[0],axis=1)])

<tf.Tensor: shape=(), dtype=string, numpy=b'cccccff!!!!!!!!!dddffffffddddeeeeeeeeeeffffffffff!!!!!!!ddddddddddddddddddd'>

In [None]:
## Loss function and training

In [18]:
def scheduler(epoch, lr):
    if epoch < 30:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

In [19]:
def CTCLoss(y_true, y_pred):
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
    label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    loss = tf.keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss

In [20]:
class ProduceExample(tf.keras.callbacks.Callback): 
    def __init__(self, dataset) -> None: 
        self.dataset = dataset.as_numpy_iterator()
    
    def on_epoch_end(self, epoch, logs=None) -> None:
        data = self.dataset.next()
        yhat = self.model.predict(data[0])
        decoded = tf.keras.backend.ctc_decode(yhat, [75,75], greedy=False)[0][0].numpy()
        for x in range(len(yhat)):           
            print('Original:', tf.strings.reduce_join(num_to_char(data[1][x])).numpy().decode('utf-8'))
            print('Prediction:', tf.strings.reduce_join(num_to_char(decoded[x])).numpy().decode('utf-8'))
            print('~'*100)

In [31]:
model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001), loss=CTCLoss)

In [22]:

checkpoint_callback = ModelCheckpoint(os.path.join('/Users/giannitallarita/datalips/models','checkpoint'), monitor='loss', save_weights_only=True) 

In [23]:
schedule_callback = LearningRateScheduler(scheduler)

In [24]:
example_callback = ProduceExample(test); example_callback

<__main__.ProduceExample at 0x298973910>

In [None]:
!pip uninstall 

In [33]:


with tf.device('/CPU:0'):
    model.fit(train, validation_data=test, epochs=1)

2023-06-09 09:55:14.977404: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.



  1/100 [..............................] - ETA: 1:36:46 - loss: 171.9605

KeyboardInterrupt: 

In [1]:
import tensorflow as tf

cifar = tf.keras.datasets.cifar100
(x_train, y_train), (x_test, y_test) = cifar.load_data()
model = tf.keras.applications.ResNet50(
    include_top=True,
    weights=None,
    input_shape=(32, 32, 3),
    classes=100,)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])
#with tf.device('/GPU:0'):
model.fit(x_train, y_train, epochs=1, batch_size=64)

2023-06-09 09:39:59.052944: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2023-06-09 09:39:59.052966: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2023-06-09 09:39:59.052970: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2023-06-09 09:39:59.053032: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-06-09 09:39:59.053065: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-06-09 09:40:01.488845: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




<keras.src.callbacks.History at 0x2c9e1b310>