02 — Learning
Machine Learning Topics
A curated path from foundational regression models to advanced unsupervised learning. Each topic includes core concepts, sample implementation, and a link to the full notebook.
01
Cost FunctionMSER² ScoreMatplotlib
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# Training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
# Inference
y_pred = model.predict(X_test)
# Evaluation
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))
02
Feature ScalingStandardScalerMulticollinearityCoefficients
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
model = LinearRegression()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(scaler.transform(X_test))
03
OptimizationLearning RateGradientsNumpy
theta = np.zeros((n_features, 1))
for iteration in range(1000):
gradients = (2/m) * X_b.T.dot(X_b.dot(theta) - y)
theta = theta - learning_rate * gradients
# Prediction
y_pred = X_b.dot(theta)
04
Logistic RegressionKNNConfusion MatrixF1-score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
05
Gaussian NBMultinomial NBProbabilistic MLText Classification
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
06
HyperplaneKernelsSoft MarginSVC
from sklearn.svm import SVC
model = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
07
PruningEntropyInformation GainVisualization
from sklearn.tree import DecisionTreeClassifier, plot_tree
model = DecisionTreeClassifier(max_depth=5, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
08
BaggingRandom ForestFeature ImportanceHyperparameter Tuning
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
09
Voting ClassifierStackingBoostingAdaboost
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
clf1 = LogisticRegression()
clf2 = SVC(probability=True)
eclf = VotingClassifier(estimators=[('lr', clf1), ('svc', clf2)], voting='soft')
eclf.fit(X_train, y_train)
10
VarianceEigenvaluesFeature ReductionVisualization
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95) # Keep 95% variance
X_reduced = pca.fit_transform(X_scaled)
11
Elbow MethodSilhouette ScoreCentroidsUnsupervised
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
kmeans.fit(X_scaled)
labels = kmeans.predict(X_scaled)
centers = kmeans.cluster_centers_
12
Density-basedOutliersEpsilonMin Samples
from sklearn.cluster import DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
labels = dbscan.fit_predict(X_scaled)