Learning source code
import numpy as np import matplotlib.pyplot as plt def true_fun(X): # This is the real function we set, that is, the model of ground truth return 1.5*X + 0.2 np.random.seed(0) # Set random seed n_samples = 30 # Sets the number of sampling data points '''Generate random data as training set, and add some noise''' X_train = np.sort(np.random.rand(n_samples)) y_train = (true_fun(X_train) + np.random.randn(n_samples) * 0.05).reshape(n_samples,1) #Define model from sklearn.linear_model import LinearRegression # Import linear regression model model = LinearRegression() # Define model model.fit(X_train[:,np.newaxis], y_train) # Training model print("Output parameters w: ",model.coef_) # Output model parameter w print("Output parameters b: ",model.intercept_) # Output parameter b #Model test and comparison X_test = np.linspace(0, 1, 100) plt.plot(X_test, model.predict(X_test[:, np.newaxis]), label="Model") plt.plot(X_test, true_fun(X_test), label="True function") plt.scatter(X_train,y_train) # Draw the points of the training set plt.legend(loc="best") plt.show()
np.random.seed()
import numpy as np def abc(): for i in range(5): np.random.seed(1) print(np.random.rand(2)) abc() [0.417022 0.72032449] [0.417022 0.72032449] [0.417022 0.72032449] [0.417022 0.72032449] [0.417022 0.72032449] import numpy as np def abc_n(): np.random.seed(1) for i in range(5): print(np.random.rand(2)) abc_n() [0.417022 0.72032449] [1.14374817e-04 3.02332573e-01] [0.14675589 0.09233859] [0.18626021 0.34556073] [0.39676747 0.53881673]
Set the same NP random. Seed() yields the same random number
abc_n is not the NP of the first time at the second time of the cycle random. The number calculated by seed () is different.
np.sort
sort(a, axis=-1, kind=None, order=None):
a is the array to be sorted
axis=-1 or 1, - 1 is the default, sorted by row.
import numpy as np a=np.array( [ [1,2,3], [4,5,6], [9,8,7]]) a=np.sort(a) print(a) [[1 2 3] [4 5 6] [7 8 9]]
Set axis=0 to sort by column
import numpy as np a=np.array( [ [2,1,3], [4,5,6], [0,8,7]]) a=np.sort(a,axis=0) print(a) [[0 1 3] [2 5 6] [4 8 7]] `` ## model.fit() ```python def fit(self, X, y, sample_weight=None): """ Fit linear model. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Training data y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values. Will be cast to X's dtype if necessary sample_weight : array-like of shape (n_samples,), default=None Individual weights for each sample
plt. scatter()
def scatter( x, y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None, linewidths=None, verts=cbook.deprecation._deprecated_parameter, edgecolors=None, *, plotnonfinite=False, data=None, **kwargs):
Scatter function can generate a scatter diagram.
2, Polynomial regression
source code
import numpy as np import matplotlib.pyplot as plt from sklearn.pipeline import Pipeline#String different algorithms from sklearn.preprocessing import PolynomialFeatures # Import a class that can calculate polynomial characteristics from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_val_score def true_fun(X): # This is the real function we set, that is, the model of ground truth return np.cos(1.5 * np.pi * X)#Π=np.pi np.random.seed(0) n_samples = 30 # Set random seed X = np.sort(np.random.rand(n_samples)) y = true_fun(X) + np.random.randn(n_samples) * 0.1 degrees = [1, 4, 15] # Polynomial highest degree plt.figure(figsize=(14, 5)) for i in range(len(degrees)): ax = plt.subplot(1, len(degrees), i + 1) plt.setp(ax, xticks=(), yticks=())#Define image attributes, xticks = () set x-axis label polynomial_features = PolynomialFeatures(degree=degrees[i], include_bias=False) linear_regression = LinearRegression() pipeline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)]) # Using pipline tandem model pipeline.fit(X[:, np.newaxis], y) scores = cross_val_score(pipeline, X[:, np.newaxis], y,scoring="neg_mean_squared_error", cv=10) # Use cross validation X_test = np.linspace(0, 1, 100) plt.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="Model") plt.plot(X_test, true_fun(X_test), label="True function") plt.scatter(X, y, edgecolor='b', s=20, label="Samples") plt.xlabel("x") plt.ylabel("y") plt.xlim((0, 1))#Display drawing range plt.ylim((-2, 2)) plt.legend(loc="best")#plt. The legend() function adds a legend to the image. plt.title("Degree {}\nMSE = {:.2e}(+/- {:.2e})".format( degrees[i], -scores.mean(), scores.std())) plt.show()
np.random.randn(d0,d1,d2......dn)
1) When there are no parameters in the parentheses of the function, a floating-point number is returned;
2) When there is a parameter in the parentheses of the function, the array with rank 1 is returned, which cannot represent vectors and matrices;
3) When there are two or more parameters in the parentheses of the function, an array of corresponding dimensions is returned, which can represent a vector or matrix;
4)np. random. standard_ The normal () function is similar to np.random.randn(), but NP random. standard_ normal()
The input parameter of is tuple
5)np. random. The input of randn() is usually an integer, but if it is a floating-point number, it will be automatically truncated and converted to an integer.
--------
Copyright notice: This is the original article of CSDN blogger "signal excavator", which follows the CC 4.0 BY-SA copyright agreement. Please attach the original source link and this notice for reprint.
Original link: https://blog.csdn.net/qq_40130759/article/details/79535575
plt.subplot()
plt.subplot(nrows, ncols, index, **kwargs)
First parameter: nrows: row
ncols: columns
Index: index value
The second parameter: projection: {none, 'aitoff', 'hammer', 'lambert', 'mollweide', 'polar', 'rectilinear', str}, optional
The projection type of the subplot (Axes). str is the name of a costum projection, see projections. The default None results in a 'rectilinear' projection.
Optional parameters: you can select the type of subgraph. For example, polar is a pole graph. By default, none is a line graph.
#For example: import matplotlib.pyplot as plt plt.subplot(3,2,1)#Indicates that the interface is divided into 3 * 2 grid 1, which is the number of the figure
plt.setp() sets the object property or the value requirement of the property
The setp() function can make a drawing first and then modify the image properties
line, = plot([1,2,3]) setp(line, linestyle='--') #Set line to dashed line
xticks() function
Set x-axis label properties
Original function:
xticks(ticks, [labels], **kwargs) ticks: Array type for setting X Axis scale interval [labels]: Array type, used to set the display label of each interval **kwargs: Used to set appearance properties such as label font inclination and color.
PolynomialFeatures()
sklearn.preprocessing. PolynomialFeatures ( degree = 2 , * , interact_only = False , include_bias = True , order = 'C' )
Generate polynomial or interactive features
If the input sample is two-dimensional and the form is [a, b], the characteristic of quadratic polynomial is [1, a, b, a^2, ab, b^2]
Degree: degree of control polynomial
interaction_only: the default value is False. If it is specified as True, there will be no items with their own characteristics and their own combination. There are no a2 and b2 in the above secondary items.
include_bias: True by default. If True, there will be the item 1 above.
cross_val_score
cross_val_score(estimator, X, y=None, *, groups=None, scoring=None, #score cv=None, #Specify the number of folds n_jobs=None, #Number of jobs running in parallel. The training estimator and the calculated score are parallelized on the cross validation split. None unless at joblib parallel_ In the backend context, otherwise it means 1- 1 means all processors are used. verbose=0, #Level of detail fit_params=None, pre_dispatch='2*n_jobs', error_score=nan)
The data set is divided into 10 parts, each part is made into a test set, and the other parts are made into a training set.
Advantages of cross validation:
1: Cross validation is used to evaluate the prediction performance of the model, especially the performance of the trained model on new data, which can reduce the over fitting to a certain extent.
2: We can also get as much effective information as possible from the limited data.
plt.legend()
plt.legend
Here is a brief summary of some common functions:
1. Set legend position: PLT legend(loc=‘xxx’)
location string location code
'best' 0
'upper right' 1
'upper left' 2
'lower left' 3
'lower right' 4
'right' 5
'center left' 6
'center right' 7
'lower center' 8
''upper center'' 9
'center' 10
2. Set legend font size
fontsize : int or float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}
The font size of the legend. If the value is a number, the size will be the absolute font size (in points). If it is a string, it is equivalent to the current default font size. This parameter is used only if prop is not specified.
3. Set legend border and background
plt.legend(loc = 'best', frameon=False) # remove the legend border
plt.legend(loc = 'best', edgecolor = 'blue') # sets the legend border color
plt.legend(loc = 'best', facecolor = 'blue') # sets the background color of the legend. If there is no border, the parameter is invalid
4. Set legend Title PLT legend(title=‘xxx’)
plt.legend(title = ('sinx ',' cosx '), the legend is shown in the figure:
--------
Copyright notice: This is the original article of CSDN blogger "humingzhu_97", which follows the CC 4.0 BY-SA copyright agreement. Please attach the original source link and this notice for reprint.
Original link: https://blog.csdn.net/humingzhu_97/article/details/104899572
logistic regression
source code
# Add a directory to the system path to facilitate the import of modules. The root directory of the project is "... / machine learning toy code" import sys from pathlib import Path#Path processing curr_path = str(Path().absolute()) parent_path = str(Path().absolute().parent) p_parent_path = str(Path().absolute().parent.parent) sys.path.append(p_parent_path) print(f"The home directory is:{p_parent_path}") from torch.utils.data import DataLoader from torchvision import datasets import torchvision import torchvision.transforms as transforms import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report import numpy as np train_dataset = datasets.MNIST(root = p_parent_path+'/datasets/', train = True,transform = transforms.ToTensor(), download = False) test_dataset = datasets.MNIST(root = p_parent_path+'/datasets/', train = False, transform = transforms.ToTensor(), download = False) batch_size = len(train_dataset) train_loader = DataLoader(dataset=train_dataset, batch_size=100, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=100, shuffle=True) X_train,y_train = next(iter(train_loader)) X_test,y_test = next(iter(train_loader)) # Print the first 100 pictures images, labels= X_train[:100], y_train[:100] # Use images to generate a grid size with a width of 10 images img = torchvision.utils.make_grid(images, nrow=10) # cv2. The format of imshow() is (size1,size1,channels), while the format of img is (channels,size1,size1), # So it needs to be used Transfer() conversion, put the number of color channels to the third dimension img = img.numpy().transpose(1,2,0) print(images.shape) print(labels.reshape(10,10)) print(img.shape) plt.imshow(img) plt.show() X_train,y_train = X_train.cpu().numpy(),y_train.cpu().numpy() # tensor to array) X_test,y_test = X_test.cpu().numpy(),y_test.cpu().numpy() # tensor to array) X_train = X_train.reshape(X_train.shape[0],784) X_test = X_test.reshape(X_test.shape[0],784) # solver: the optimizer used, lbfgs: Quasi Newton method, sag: random gradient descent model = LogisticRegression(solver='lbfgs', max_iter=400) # lbfgs: Quasi Newton method model.fit(X_train, y_train) y_pred = model.predict(X_test) print(classification_report(y_test, y_pred)) # Print report ones_col=[[1] for i in range(len(X_train))] # Generate a two-dimensional nested list of all 1, namely [[1], [1], [1]] X_train = np.append(X_train,ones_col,axis=1) x_train = np.mat(X_train) X_test = np.append(X_test,ones_col,axis=1) x_test = np.mat(X_test) # Mnsit has ten marks of 0-9. Since it is a two category task, it can take 0 as 1 and the rest as 0 to identify whether it is 0 y_train=np.array([1 if y_train[i]==1 else 0 for i in range(len(y_train))]) y_test=np.array([1 if y_test[i]==1 else 0 for i in range(len(y_test))]) # solver: the optimizer used, lbfgs: Quasi Newton method, sag: random gradient descent model = LogisticRegression(solver='lbfgs', max_iter=100) # lbfgs: Quasi Newton method model.fit(X_train, y_train) y_pred = model.predict(X_test) print(classification_report(y_test, y_pred)) # Print report