import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#data = pd.read_csv("data/kaggle_survey_2020_responses.csv", header=1)
data = pd.read_csv("kaggle_survey_2020_responses.csv")
data
Data Science 실무 경력과 관련된 column
# edu_column
edu_columns = ["Q4", "Q6", "Q15"]
# ds_column
ds_columns = ["Q5", "Q20", "Q22"]
#y = ["Q24"]
#edit_data = data[edu_columns + ds_columns + y]
edit_data = data[edu_columns + ds_columns]
edit_data
# null이 아닌 row만
edit_data[edit_data.isnull().any(axis=1)]
# 결측치 제거
final_data = edit_data.dropna()
final_data
교육 수준과 관련된 column
Q4 column
- "What is the highest level of formal education that you have attained or plan to attain with the next 2 years?"
Q4 = final_data["Q4"][1:]
Q4
Q4.value_counts()
# plt.hist
plt.figure(figsize=(16, 16))
plt.title("Histogram of Q4 column")
plt.hist(Q4)
plt.show()
plt.figure(figsize=(16, 16))
plt.pie(Q4.value_counts(),
labels=Q4.value_counts().index,
autopct='%d%%',
startangle=90,
textprops={'fontsize':12})
plt.axis('equal')
plt.title("Pie chart for Q4 column", fontsize=16)
Q6 column
- For how many years have you been writing code and/or programming?
Q6 = final_data["Q6"][1:]
Q6
Q6.value_counts()
plt.figure(figsize=(12, 16))
# plt.hist
plt.hist(Q6)
plt.xticks(rotation='vertical')
plt.title("Histogram for Q6 column")
plt.show()
plt.figure(figsize=(24, 24))
plt.pie(Q6.value_counts(),
labels=Q6.value_counts().index,
autopct='%d%%',
textprops={'fontsize':24})
plt.axis('equal')
plt.title("Pie chart for Q6 column", fontsize=48)
Q15 column
- For how many years have you used machine learning methods?
Q15 = final_data["Q15"][1:]
Q15
# countplot을 사용해봅니다
plt.figure(figsize=(16, 12))
#sns.countplot(x="Q15", data=final_data[1:])
sns.countplot(y="Q15", data=final_data[1:])
plt.show()
plt.figure(figsize=(8,8))
plt.pie(Q15.value_counts(),
labels=Q15.value_counts().index,
autopct='%d%%',
colors=sns.color_palette('hls',len(Q15.value_counts().index)),
textprops={'fontsize':12})
plt.axis('equal')
plt.title("Pie chart for Q15 column", fontsize=16, pad=50)
plt.show()
'빅데이터 분석가 양성과정 > Python' 카테고리의 다른 글
공공데이터 분석(커피전문점) - Load Data (0) | 2024.07.08 |
---|---|
Seaborn - Kaggle Survey EDA(2) (0) | 2024.07.08 |
Seaborn - Mushrooms DataSet (0) | 2024.07.08 |
Seaborn - Penguins Dataset ( Heatmap ) (0) | 2024.07.08 |
Seaborn - Penguins Dataset ( Pairplot ) (0) | 2024.07.08 |