아이공의 AI 공부 도전기

Pandas 간단

2022. 4. 15. 21:09

Pandas 간단

 

 

Pandas

 

import pandas as pd

df = pd.read_csv

df.info()

df.head()

df.tail()

 

df.isnull().sum()

df.describe()

df.corr()

 

df['column_name'].fillna(value, inplace=True)

df['column_name'].mode() 최빈값

df['column_name'].median() 중앙값

df.drop(['column_name1', 'column_name2', 'column_name3'], axis=1, inplace=True)

 

df['column_name1'].loc[df['column_name2']>1] = 0

df.iloc[value, value ...] = value

df['column_name1'] = df['column_name1'].str.split(', ', expand=True)[1]

 

df['column_name1'].value_counts()

 

df[['column_name1','column_name2']].groupby(['column_name1']).mean().sort_values(by='column_name2', ascending=False)

df['column_name1'] = dataset['column_name1'].map( {'S': 0, 'C': 1, 'Q': 2} ).astype(int)

 

 

from sklearn import svm, tree, linear_model, neighbors, naive_bayes, ensemble, discriminant_analysis, gaussian_process
from xgboost import XGBClassifier
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn import feature_selection
from sklearn import model_selection
from sklearn import metrics
import seaborn as sns

 

model_selection.train_test_split

MLA = [
    #Ensemble Methods
    ensemble.AdaBoostClassifier(),
    ensemble.BaggingClassifier(),
    ensemble.ExtraTreesClassifier(),
    ensemble.GradientBoostingClassifier(),
    ensemble.RandomForestClassifier(),

    #Gaussian Processes
    gaussian_process.GaussianProcessClassifier(),
    
    #GLM
    linear_model.LogisticRegressionCV(),
    linear_model.PassiveAggressiveClassifier(),
    linear_model.RidgeClassifierCV(),
    linear_model.SGDClassifier(),
    linear_model.Perceptron(),
    
    #Navies Bayes
    naive_bayes.BernoulliNB(),
    naive_bayes.GaussianNB(),
    
    #Nearest Neighbor
    neighbors.KNeighborsClassifier(),
    
    #SVM
    svm.SVC(probability=True),
    svm.NuSVC(probability=True),
    svm.LinearSVC(),
    
    #Trees    
    tree.DecisionTreeClassifier(),
    tree.ExtraTreeClassifier(),
    
    #Discriminant Analysis
    discriminant_analysis.LinearDiscriminantAnalysis(),
    discriminant_analysis.QuadraticDiscriminantAnalysis(),

    
    #xgboost
    XGBClassifier()    
    ]

 

svc = SVC()
svc.fit(X_train, Y_train)
Y_pred = svc.predict(X_test)
acc_svc = round(svc.score(X_train, Y_train) * 100, 2)

 

# Generate Submission File 
StackingSubmission = pd.DataFrame({ 'PassengerId': PassengerId,
                            'Survived': predictions })
StackingSubmission.to_csv("StackingSubmission.csv", index=False)

 

 

공유하기

facebook twitter kakaoTalk kakaostory naver band
loading