import pandas as pd
df = pd.read_csv
df.info()
df.head()
df.tail()
df.isnull().sum()
df.describe()
df.corr()
df['column_name'].fillna(value, inplace=True)
df['column_name'].mode() 최빈값
df['column_name'].median() 중앙값
df.drop(['column_name1', 'column_name2', 'column_name3'], axis=1, inplace=True)
df['column_name1'].loc[df['column_name2']>1] = 0
df.iloc[value, value ...] = value
df['column_name1'] = df['column_name1'].str.split(', ', expand=True)[1]
df['column_name1'].value_counts()
df[['column_name1','column_name2']].groupby(['column_name1']).mean().sort_values(by='column_name2', ascending=False)
df['column_name1'] = dataset['column_name1'].map( {'S': 0, 'C': 1, 'Q': 2} ).astype(int)
from sklearn import svm, tree, linear_model, neighbors, naive_bayes, ensemble, discriminant_analysis, gaussian_process
from xgboost import XGBClassifier
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn import feature_selection
from sklearn import model_selection
from sklearn import metrics
import seaborn as sns
model_selection.train_test_split
MLA = [
#Ensemble Methods
ensemble.AdaBoostClassifier(),
ensemble.BaggingClassifier(),
ensemble.ExtraTreesClassifier(),
ensemble.GradientBoostingClassifier(),
ensemble.RandomForestClassifier(),
#Gaussian Processes
gaussian_process.GaussianProcessClassifier(),
#GLM
linear_model.LogisticRegressionCV(),
linear_model.PassiveAggressiveClassifier(),
linear_model.RidgeClassifierCV(),
linear_model.SGDClassifier(),
linear_model.Perceptron(),
#Navies Bayes
naive_bayes.BernoulliNB(),
naive_bayes.GaussianNB(),
#Nearest Neighbor
neighbors.KNeighborsClassifier(),
#SVM
svm.SVC(probability=True),
svm.NuSVC(probability=True),
svm.LinearSVC(),
#Trees
tree.DecisionTreeClassifier(),
tree.ExtraTreeClassifier(),
#Discriminant Analysis
discriminant_analysis.LinearDiscriminantAnalysis(),
discriminant_analysis.QuadraticDiscriminantAnalysis(),
#xgboost
XGBClassifier()
]
svc = SVC()
svc.fit(X_train, Y_train)
Y_pred = svc.predict(X_test)
acc_svc = round(svc.score(X_train, Y_train) * 100, 2)
# Generate Submission File
StackingSubmission = pd.DataFrame({ 'PassengerId': PassengerId,
'Survived': predictions })
StackingSubmission.to_csv("StackingSubmission.csv", index=False)