...
Create a new Python3 notebook
Import a Dataset
...
Code Block |
---|
import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score df = pd.read_csv('music.csv') X = df.drop(columns="genre") y = df["genre"] #split our data into train and test DataFrames (20% for testing) X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2) model = DecisionTreeClassifier() # train model model.fit(X_train,y_train) # run predict using test data predictions = model.predict(X_test) score = accuracy_score(y_test, predictions) score |
Model Persistence
Saving a Trained Model
Code Block |
---|
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
df = pd.read_csv('music.csv')
X = df.drop(columns="genre")
y = df["genre"]
#split our data into train and test DataFrames (20% for testing)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
model = DecisionTreeClassifier()
# train model
model.fit(X_train,y_train)
# run predict using test data
# predictions = model.predict(X_test)
# score = accuracy_score(y_test, predictions)
#save our model
joblib.dump(model,"music-recomender.joblib") |
Predictions from a Saved Model
Code Block |
---|
import joblib
#load our model
model = joblib.load("music-recomender.joblib")
# run predict using test data
predictions = model.predict([[20,1],[21,0]])
predictions |
Visualizing Decision Trees
Code Block |
---|
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
df = pd.read_csv('music.csv')
X = df.drop(columns="genre")
y = df["genre"]
model = DecisionTreeClassifier()
# train model
model.fit(X,y)
# export graph of data in dot format
tree.export_graphviz(model,out_file='music_recomender.dot',
feature_names=['age','gender'],
class_names=sorted(y.unique()),
label='all',
rounded=True,
filled=True)
|
This will output our .dot file. We just need to pull it into VSCode with dot plugin to visualize it.
References
Reference | URL |
---|---|
Python Machine Learning Tutorial (Data Science) | https://www.youtube.com/watch?v=7eh4d6sabA0 |
...