date: 2024-10-04
title: ML-As-1
status: DONE
author:
  - AllenYGY
tags:
  - MachineLearing
  - Assignment
publish: TrueML-As-1
When 
Assume the probability of a certain disease is 
Gradient descent is the primary algorithm to search optimal parameters for our models. Typically, we want to solve optimization problems stated as
where 
The simplest parametric model entails learning a single-parameter constant function, = 𝜃. where we set 
when
Where  
Thus, the gradient descent update for a single data point is:
In SGD, this single sample gradient update is used to update \theta after each data point.
Instead of constant functions, we now consider a single-parameter linear model 
when
Where  
No, not all points get the same vote in the gradient update.
Each data point is weighted by  
If  
Whereas if  
Consider the Ridge Regression estimator
We know this is solved by
One interpretation of Ridge Regression is to find the Maximum A Posteriori (MAP) estimate on 
Note that each entry of vector 
The MAP estimate maximizes the posterior distribution:
From Bayes' rule:
Substituting the likelihood and prior:
Maximizing this expression with respect to 
# You should return your result. 
import numpy as np 
def insertSecond(a, b):
    return np.insert(a, 1, b)
assert np.array_equal(insertSecond(np.array([-5,-10,-12,-6]),5), np.array([-5, 5, -10, -12, -6]))
assert np.array_equal(insertSecond(np.array([1,2,3]),7), np.array([1, 7, 2, 3]))
assert np.array_equal(insertSecond(np.array([-5,-10,-12,-6]),8), np.array([ -5, 8, -10,-12, -6]))
assert np.array_equal(insertSecond(np.array([1,2,3]),12), np.array([1, 12, 2, 3]))
import numpy as np 
def mergeArrays(a,b):
    return np.sort(np.unique(np.concatenate((a,b))))
# Test cases 
assert np.array_equal(mergeArrays(np.array([1,1,4,8,1]), np.array([2, 3])), np.array([1, 2, 3, 4, 8])) 
assert np.array_equal(mergeArrays(np.array([-5,-10,-10,-6]), np.array([-5, 8, -10, -12,-6])),np.array([-12, -10, -6, -5, 8]) )
assert np.array_equal(mergeArrays(np.array([1,1,6,8,1]), np.array([2, 3])), np.array([1, 2, 3, 6, 8]))
import numpy as np
import matplotlib.pyplot as plt
# data to plot
n_groups = 5
men_means = (22, 30, 33, 30, 26)
women_means = (25, 32, 30, 35, 29)
alpha = 0.5
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.4
opacity = 0.8
rects1 = plt.bar(index, men_means, bar_width,
alpha=0.5,
color='g',
label='Men')
rects2 = plt.bar(index + bar_width, women_means, bar_width,
alpha=0.5,
color='r',
label='Women')
plt.xlabel('Person')
plt.ylabel('Scores')
plt.title('Scores by person')
plt.xticks(index + bar_width / 2, ('G1', 'G2', 'G3', 'G4', 'G5'))
plt.legend()
plt.tight_layout()
plt.show()

import pandas as pd
def setDataFrameZeros(df):
    rows = df.isin([0]).any(axis=1)
    cols = df.isin([0]).any(axis=0)
    df.loc[rows, :] = 0
    df.loc[:, cols] = 0
    return df
df1 = pd.DataFrame({'c1': [1, 4, 7], 'c2': [2, 0, 8], 'c3': [3, 6, 9]})
df2 = pd.DataFrame({'c1': [1, 0, 7], 'c2': [0, 0, 0], 'c3': [3, 0, 9]})
(df2.equals(setDataFrameZeros(df1)))
df1 = pd.DataFrame({'c1': [0, 3, 1], 'c2': [1, 4, 3], 'c3': [2, 5, 1], 'c4': [0, 2, 5]})
df2 = pd.DataFrame({'c1': [0, 0, 0], 'c2': [0, 4, 3], 'c3': [0, 5, 1], 'c4': [0, 0, 0]})
assert (df2.equals(setDataFrameZeros(df1)))
df1 = pd.DataFrame({'c1': [1, 4, 7], 'c2': [2, 0, 8], 'c3': [3, 6, 9]})
df2 = pd.DataFrame({'c1': [1, 0, 7], 'c2': [0, 0, 0], 'c3': [3, 0, 9]})
assert (df2.equals(setDataFrameZeros(df1)))
df1 = pd.DataFrame({'c1': [0, 3, 1], 'c2': [1, 4, 3], 'c3': [2, 5, 1], 'c4': [0, 2, 5]})
df2 = pd.DataFrame({
    'c1': [0, 0, 0],
    'c2': [0, 4, 3],
    'c3': [0, 5, 1],
    'c4': [0, 0, 0]
})
assert (df2.equals(setDataFrameZeros(df1)))