说明: 以下为之前在pandas使用过程中的所用的到对数据的基本增删改查处理方法进行梳理
1.数据读取
import pandas as pd
pd.read_文件类型(path)
resd_csv
pd.read_csv(’filepath可加载网络数据’,names=column_names)
column_names:指定类别名字,[‘Sample code number’,‘Clump Thickness’, ‘Uniformity of Cell Size’,‘Uniformity of Cell Shape’,‘Marginal Adhesion’,‘Single Epithelial Cell Size’,‘Bare Nuclei’,‘Bland Chromatin’,‘Normal Nucleoli’,‘Mitoses’,‘Class’]
return:数据
replace(to_replace=’’,value=):返回数据
dropna():返回数据
2.数据保存
msg = pd.read_文件类型(path)
msg.to_文件类型(path)
3.pandas常用数据格式处理函数
import numpy as np
import pandas as pd
data = {'city': ['Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen', 'Hangzhou', 'Chongqing'],
'year': [2016,2017,2016,2017,2016, 2016],
'population': [2100, 2300, 1000, 700, 500, 500]}
frame = pd.DataFrame(data, columns = ['year', 'city', 'population', 'debt'])
df1 = pd.DataFrame({'apts': [55000, 60000],
'cars': [200000, 300000],},
index = ['Shanghai', 'Beijing'])
df2 = pd.DataFrame({'apts': [25000, 20000],
'cars': [150000, 120000],},
index = ['Hangzhou', 'Najing'])
df3 = pd.DataFrame({'apts': [30000, 10000],
'cars': [180000, 100000],},
index = ['Guangzhou', 'Chongqing'])
df4 = pd.DataFrame({'apts': [55000, 60000, 58000],
'cars': [200000, 300000,250000],
'cities': ['Shanghai', 'Beijing','Shenzhen']})
df5 = pd.DataFrame({'salaries': [10000, 30000, 30000, 20000, 15000],
'cities': ['Suzhou', 'Beijing', 'Shanghai', 'Guangzhou', 'Tianjin']})
4.增
frame.ix[0] = np.arange(4)
frame.insert(0, 'temp', frame.year)
frame.ix[:, 'xx'] = np.arange(6)
frame.reindex(index=list(frame.indexs),columns=list(df.columns) + ['E'])
df1.append(df2)
pd.concat([df1, df2, df3])
pd.concat([df1, df2, df3], axis = 1)
result = pd.merge(df4, df5, on='cities')
result2 = pd.merge(df4, df5, on='cities', how='outer')
5.删
del frame['year']
frame = frame.drop(['city', 'debt'], axis = 1)
frame = frame.drop([0, 1, 2])
frame.dropna()
frame.dropna(axis = 1, how = 'all')
frame.dropna(axis = 1, how = 'any')
frame.dropna(axis = 0, how = 'all')
frame.dropna(axis = 0, how = 'any')
6.改
元素赋值
frame.loc[0, 'city'] = 'YunCheng'
frame.iloc[0, 0] = 2011
frame.at[0, 'city'] = 'YunCheng'
frame.iat[0, 0] = 2010
frame.fillna(value = 1)
列赋值
frame['year'] = 2000
frame.debt = np.arange(6)
val = pd.Series([200, 300, 500])
frame['debt'] = val
行赋值
val = pd.Series(['aa', 2000, 500], index = ['city', 'year', 'population'])
frame.loc[0] = val
7.查
索引、值查看
frame.index
frame.columns
frame.values
元素查找
xx = frame.loc[0, 'city']
xx = frame.loc[[0], ['city']]
行查找
df = frame.loc[0:2]
df = frame.iloc[0:2]
df = frame[0:3]
df = frame.ix[0]
列查找
df = frame.loc[:, 'city']
df = frame.loc[:, ['city', 'population']]
df = frame.iloc[:, 0:2]
df = frame['year']
df = frame.year
df = frame[['population', 'year']]
df = frame.filter(regex = 'population|year')
frame[frame.year > 2016]
frame[frame > 2016]
frame[frame.year.isin(['2016', '2015'])]
frame['city', 'year']
块查找
df = frame.iloc[0:2, 0:2]
条件查找
df = frame.year.notnull()
df = frame['year'].notnull()
df = frame[frame.year.notnull()]
df = frame[frame.year.notnull()].values
df = frame[frame.year == 2016][frame.city == 'Beijing']
df = frame.debt[frame.year == 2016][frame.city == 'Beijing']
模糊查找(正则匹配)
tmp_msg[tmp_msg['h_type'].str.contains('正则表达式')]
8.其他
df = frame.T
frame.year.count()
frame.year.value_counts()
frame.groupby('year').count()