### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

# Numpy
#### NumPy is a general-purpose array processing package. It provides a high-performance multi-dimentional array object,
#### and tools for working these arrays. It is the fundamental package for scientific computing with Python

### What is an array? 
#### An array is a data structure that stores values of same data type. 
#### In Python, this is the main difference between arrays and lists.
#### While python **lists** contain values corresponding to **different data types**, 
#### arrays in python can only contain values corresponding to __same data type__

In [None]:
# conda numpy installation on terminal not here
conda install numpy


In [2]:
# install numpy on jupyter notebooks
!pip install numpy



In [5]:
# printing numpy version
print(np.__version__)

2.3.4


In [6]:
import numpy as np

In [16]:
my_lst=[1,2,3,4,5]
my_arr=np.array(my_lst)


In [18]:
type(my_arr)

numpy.ndarray

In [21]:
my_arr.shape

(5,)

In [20]:
my_arr

array([1, 2, 3, 4, 5])

In [43]:
# Multinested Arrays
lst1=[1,2,3,4,5]
lst2=[6,7,8,9,10]
lst3=[11,12,13,14,15]

#Define array 
arr1=np.array([lst1,lst2,lst3])

In [44]:
arr1

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [45]:
# 3 rows and 5 columns with shape (first rows then cols)
arr1.shape

(3, 5)

In [46]:
# rearranging the array with reshape. Make sure always number of elements are same
arr1.reshape(5,2)

ValueError: cannot reshape array of size 15 into shape (5,2)

In [47]:
arr1.reshape(5,3)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [13, 14, 15]])

In [48]:
arr1.reshape(1,15)

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]])

In [49]:
arr1.reshape(15,1)

array([[ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15]])

In [50]:
# Array indexing with one dimensional

In [51]:
array2=np.array([1,2,3,4,5,6,7,8,9])

In [52]:
array2[4]

np.int64(5)

In [53]:
arr1

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [54]:
arr1[0:2]

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [55]:
arr1[1:2]

array([[ 6,  7,  8,  9, 10]])

In [56]:
arr1[2]

array([11, 12, 13, 14, 15])

In [57]:
arr1[2:3]

array([[11, 12, 13, 14, 15]])

In [58]:
arr1[1:,3:]

array([[ 9, 10],
       [14, 15]])

In [61]:
arr1

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [70]:
arr1[0:2,2:4] # 0:2 is row part,2:4 is col part

array([[3, 4],
       [8, 9]])

In [74]:
arr1[0:2,1:3]

array([[2, 3],
       [7, 8]])

In [78]:
array2=np.arange(0,10)

In [79]:
array2

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [80]:
array2=np.arange(0,30,2) #start,end,step size

In [81]:
array2

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [84]:
array2=np.arange(0,50,step=5) #start,end,step size

In [85]:
array2

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45])

# linspace 
#### numpy.linspace() creates an array of evenly spaced numbers over a specified interval.

In [86]:
np.linspace(1,5,10) # Number of samples to generate (default = 50).

array([1.        , 1.44444444, 1.88888889, 2.33333333, 2.77777778,
       3.22222222, 3.66666667, 4.11111111, 4.55555556, 5.        ])

In [89]:
np.linspace(0,5,5)

array([0.  , 1.25, 2.5 , 3.75, 5.  ])

In [91]:
np.linspace(1,10,50) # we need to use similar in EDA to generate quick samples

array([ 1.        ,  1.18367347,  1.36734694,  1.55102041,  1.73469388,
        1.91836735,  2.10204082,  2.28571429,  2.46938776,  2.65306122,
        2.83673469,  3.02040816,  3.20408163,  3.3877551 ,  3.57142857,
        3.75510204,  3.93877551,  4.12244898,  4.30612245,  4.48979592,
        4.67346939,  4.85714286,  5.04081633,  5.2244898 ,  5.40816327,
        5.59183673,  5.7755102 ,  5.95918367,  6.14285714,  6.32653061,
        6.51020408,  6.69387755,  6.87755102,  7.06122449,  7.24489796,
        7.42857143,  7.6122449 ,  7.79591837,  7.97959184,  8.16326531,
        8.34693878,  8.53061224,  8.71428571,  8.89795918,  9.08163265,
        9.26530612,  9.44897959,  9.63265306,  9.81632653, 10.        ])

In [92]:
1.36734694 - 1.18367347

0.18367347

In [93]:
1.55102041 - 1.36734694

0.18367347

In [94]:
# copy() function and broadcasting
array2

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45])

In [95]:
array2[3:]=00 # replace with "00" from index 3

In [96]:
array2

array([ 0,  5, 10,  0,  0,  0,  0,  0,  0,  0])

In [97]:
array2[5:]=100

In [98]:
array2

array([  0,   5,  10,   0,   0, 100, 100, 100, 100, 100])

In [99]:
arr1=array2 #assigning or copying with one another

In [100]:
arr1

array([  0,   5,  10,   0,   0, 100, 100, 100, 100, 100])

In [101]:
array1=[1,2,3,4,5]
array2=[6,7,8,9,10]
array1=array2

In [102]:
array1

[6, 7, 8, 9, 10]

In [103]:
array2

[6, 7, 8, 9, 10]

In [104]:
# value type and reference type in arrays

arr3 = np.array([1, 2, 3])
arr4 = arr3.copy() #.copy creates a new array in memory

In [105]:
arr3

array([1, 2, 3])

In [106]:
arr4

array([1, 2, 3])

In [107]:
arr4[0]=100

In [108]:
arr4

array([100,   2,   3])

In [109]:
arr3

array([1, 2, 3])

In [110]:
x=10
y=x
y=20

In [111]:
x,y # here,x & y are independent becuase integers are value types

(10, 20)

In [112]:
a=[1,2,3]
b=a


In [113]:
a

[1, 2, 3]

In [114]:
b

[1, 2, 3]

In [118]:
#numpy arrays are reference types, just like lists.

arr1=np.array([1,2,3])
arr2=arr1
arr2[0]=100

In [119]:
arr2

array([100,   2,   3])

In [120]:
arr1

array([100,   2,   3])

In [121]:
# both arr1 & arr2 pointing to same array in memory

### If you want an independent copy, use .copy() method.

In [122]:

arr3=np.array([1,2,3])
arr4=arr3.copy()
arr4[0]=100

In [123]:
arr3

array([1, 2, 3])

In [124]:
arr4

array([100,   2,   3])

In [132]:
# Create arrays and reshape
np.arange(0,10).reshape(5,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [137]:
np.arange(0,100).reshape(5,20) 
np.arange(0,100).reshape(20,5) 

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44],
       [45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54],
       [55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64],
       [65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74],
       [75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84],
       [85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94],
       [95, 96, 97, 98, 99]])

In [139]:
arr1=np.arange(0,10).reshape(2,5)
arr2=np.arange(0,10).reshape(2,5)
arr1

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [140]:
arr2

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [142]:
arr1*arr2

array([[ 0,  1,  4,  9, 16],
       [25, 36, 49, 64, 81]])

In [143]:
np.ones((2,5),dtype=int)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [147]:
np.ones(4) # Default dtype is float

array([1., 1., 1., 1.])

In [150]:
np.ones((2,5),dtype=np.int8) # or just "int" is sufficient

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]], dtype=int8)

In [151]:
np.ones((2,5),dtype=np.float64) # or just do not mention. Default is float

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [152]:
np.ones((2,5),dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [157]:
# Random distribution 
# if you execute this, the result will change as it is random distribution
np.random.rand(3,3)

array([[0.8469849 , 0.43144173, 0.92541588],
       [0.85590935, 0.4507669 , 0.51537521],
       [0.39132099, 0.20576738, 0.37735869]])

In [154]:
# Return a sample (or samples) from the "standard normal" distribution
arr_ex=np.random.randn(4,4)

In [155]:
arr_ex

array([[ 1.78859097, -0.04399673,  0.72228637,  1.52493816],
       [ 0.97904319,  1.05427148,  0.44043214, -0.3978892 ],
       [-0.01443619, -1.29549984, -1.35864779,  0.33438456],
       [-0.69366527, -1.34924573, -0.16567732, -1.11238172]])