1. ๋ฐ์ดํฐ ํ์ ํ์ธ
import pandas as pd
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')
sparta_data.head()
# type() - ๋ฐ์ดํฐ ์ข
๋ฅ ํ์ธ
print(type(sparta_data['access_date'][1]))
2. ์์ผ๋ณ, ์๊ฐ๋ณ ์๊ฐ์ ์ ๊ตฌํ๊ธฐ
- ๋ฌธ์๋ฅผ ๋ ์ง/์๊ฐ ๋ฐ์ดํฐ๋ก ๋ณํ
- ์์ผ ๋ฐ์ดํฐ, ์๊ฐ ๋ฐ์ดํฐ ์ถ๊ฐ
import pandas as pd
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')
# ๋ฌธ์๋ฅผ ๋ ์ง/์๊ฐ ๋ฐ์ดํฐ๋ก ๋ณํ
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['access_date_time'] = pd.to_datetime(sparta_data['access_date'], format=format)
sparta_data.tail(5)
# ์์ผ, ์๊ฐ ๋ฐ์ดํฐ ์ถ๊ฐ
sparta_data['access_date_time_weekday'] = sparta_data['access_date_time'].dt.day_name()
sparta_data['access_date_time_hour'] = sparta_data['access_date_time'].dt.hour
sparta_data.tail(5)
weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('access_date_time_weekday')['user_id'].count() #weekday ๊ทธ๋ฃนํํ์ฌ user_id ๊ฐฏ์ ์ธ๊ธฐ
weekdata = weekdata.agg(weeks) #์์ผ ์์ผ๋ก ๋ฐฐ์ด
hourdata = sparta_data.groupby('access_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index() #์ค๋ฆ์ฐจ์
weekdata, hourdata
3. ์์ผ๋ณ ์๊ฐ์ ๋ง๋๊ทธ๋ํ
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')
# ๋ฌธ์๋ฅผ ๋ ์ง/์๊ฐ ๋ฐ์ดํฐ๋ก ๋ณํ
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['access_date_time'] = pd.to_datetime(sparta_data['access_date'], format=format)
sparta_data.tail(5)
# ์์ผ, ์๊ฐ ๋ฐ์ดํฐ ์ถ๊ฐ
sparta_data['access_date_time_weekday'] = sparta_data['access_date_time'].dt.day_name()
sparta_data['access_date_time_hour'] = sparta_data['access_date_time'].dt.hour
sparta_data.tail(5)
weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('access_date_time_weekday')['user_id'].count() #weekday ๊ทธ๋ฃนํํ์ฌ user_id ๊ฐฏ์ ์ธ๊ธฐ
weekdata = weekdata.agg(weeks) #์์ผ ์์ผ๋ก ๋ฐฐ์ด
hourdata = sparta_data.groupby('access_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index() #์ค๋ฆ์ฐจ์
plt.figure(figsize=(10,5))
plt.bar(weekdata.index, weekdata)
plt.title('์์ผ๋ณ ์๊ฐ ์๋ฃ ์๊ฐ์ ์')
plt.xlabel('์์ผ')
plt.ylabel('์๊ฐ์(๋ช
)')
plt.xticks(rotation=90)
plt.show()
4. ์๊ฐ๋ณ ์๊ฐ์ ์ ๊ทธ๋ํ
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')
# ๋ฌธ์๋ฅผ ๋ ์ง/์๊ฐ ๋ฐ์ดํฐ๋ก ๋ณํ
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['access_date_time'] = pd.to_datetime(sparta_data['access_date'], format=format)
sparta_data.tail(5)
# ์์ผ, ์๊ฐ ๋ฐ์ดํฐ ์ถ๊ฐ
sparta_data['access_date_time_weekday'] = sparta_data['access_date_time'].dt.day_name()
sparta_data['access_date_time_hour'] = sparta_data['access_date_time'].dt.hour
sparta_data.tail(5)
weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('access_date_time_weekday')['user_id'].count() #weekday ๊ทธ๋ฃนํํ์ฌ user_id ๊ฐฏ์ ์ธ๊ธฐ
weekdata = weekdata.agg(weeks) #์์ผ ์์ผ๋ก ๋ฐฐ์ด
hourdata = sparta_data.groupby('access_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index() #์ค๋ฆ์ฐจ์
plt.figure(figsize=(10,5))
plt.plot(hourdata.index, hourdata)
plt.title('์๊ฐ๋ณ ์๊ฐ ์๋ฃ ์ฌ์ฉ์ ์')
plt.xlabel('์๊ฐ')
plt.ylabel('์ฌ์ฉ์(๋ช
)')
plt.xticks(np.arange(24))
plt.show()
5. ์์ผ๋ณ ์ ์ ์๊ฐ ํํธ๋งต
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')
# ๋ฌธ์๋ฅผ ๋ ์ง/์๊ฐ ๋ฐ์ดํฐ๋ก ๋ณํ
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['access_date_time'] = pd.to_datetime(sparta_data['access_date'], format=format)
sparta_data.tail(5)
# ์์ผ, ์๊ฐ ๋ฐ์ดํฐ ์ถ๊ฐ
sparta_data['access_date_time_weekday'] = sparta_data['access_date_time'].dt.day_name()
sparta_data['access_date_time_hour'] = sparta_data['access_date_time'].dt.hour
sparta_data.tail(5)
weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('access_date_time_weekday')['user_id'].count() #weekday ๊ทธ๋ฃนํํ์ฌ user_id ๊ฐฏ์ ์ธ๊ธฐ
weekdata = weekdata.agg(weeks) #์์ผ ์์ผ๋ก ๋ฐฐ์ด
hourdata = sparta_data.groupby('access_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index() #์ค๋ฆ์ฐจ์
#ํผ๋ฒํ
์ด๋ธ ๋ง๋ค๊ธฐ
#values : ์ด์ ๋ค์ด ๊ฐ๋ ๋ถ๋ถ
#index : ํ์ ๋ค์ด๊ฐ๋ ๋ถ๋ถ
#aggfunc : ๋ฐ์ดํฐ ์ถ์ฝ์ ์ฌ์ฉํ ํจ์
sparta_data_pivot_table = pd.pivot_table(sparta_data, values='user_id',
index=['access_date_time_weekday'],
columns=['access_date_time_hour'],
aggfunc="count").agg(weeks)
#ํํธ๋งต ๊ทธ๋ฆฌ๊ธฐ
plt.figure(figsize=(14,5))
plt.pcolor(sparta_data_pivot_table)
plt.xticks(np.arange(0.5, len(sparta_data_pivot_table.columns), 1), sparta_data_pivot_table.columns)
plt.yticks(np.arange(0.5, len(sparta_data_pivot_table.index), 1), sparta_data_pivot_table.index)
plt.title('์์ผ๋ณ ์ข
๋ฃ ์๊ฐ ํํธ๋งต')
plt.xlabel('์๊ฐ')
plt.ylabel('์์ผ')
plt.colorbar() #์ซ์๋ณ ์์๊ฐ์ ๋ํ๋ด๋ ์ปฌ๋ฌ๋ฐ
plt.show()
6. ๋ถ์ ๊ฒฐ๊ณผ
ํ์์ผ ์ ๋ ์๊ฐ์ ์๊ฐ ๋ ๋ ค ๋ฌธ์ ์ ์ก
'๐ Python' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
ํ์ด์ฌ ๋ด์ฅํจ์ enumerate()์ for ๋ฐ๋ณต๋ฌธ (0) | 2024.01.10 |
---|---|
231228 THU ํ์ด์ฌ ํจ์ vs ๋ฉ์๋ (0) | 2023.12.28 |
ํ์ด์ฌ ๋ฐ๋ณต๋ฌธ - for, while (0) | 2023.12.28 |
ํ์ด์ฌ ๋ง๋ณด๊ธฐ (1) | 2023.12.27 |
231221 THU ํ์ด์ฌ ๋ณต์ต (1) ํ์ด์ฌ ๋ฐ์ดํฐ ๋ถ์ ๋ฐ ์๊ฐํ ๊ธฐ์ด (1) | 2023.12.21 |