import pandas as pd
data_name = '.data/1.csv'
df_csv = pd.read_csv(data_name) # excel-excel table-txt csv-csv
print(df_csv.head())
# 读取多个文件合并成一个文件
import glob
path = '.data/Bearing1_1'
# 合并多个数组
all_files = glob.glob(path+'/*.csv')
all_data = []
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0)
all_data.append(df)
data_123_outerrace = pd.concat(all_data, axis=0, ignore_index=True)
print(data_123_outerrace)
data = df_csv
print(data.shape) # 维度
print(data.dtypes) # 数据类型
print(data.info) # 基本信息
print(data.describe()) # 平均值等等
print(data.columns) # 列名
print(data['Horizontal_vibration_signals'].unique()) # 某列一共有多少参数
print(data['Horizontal_vibration_signals'].values) # 某列的所有值
print(data.head()) # 查看前五行
print(data.tail(3)) # 查看后五行/n=3:查看后3行
# 查找空值
print(data.isnull().any(axis=0)) # axis=0:列 / axis=1:行
print(data.isnull().any(axis=1))
# print(data.loc[data.isnull().any(axis=0)]) # 定位到缺失的
print(data.isnull().sum(axis=0)) # 求各变量缺失的数量
data1 = data.dropna() # 直接删除空值所在行
print(data1)
data2 = data.fillna(0, inplace=False) # 定义空值为零
print(data2)
'''data2 = data.fillna(value={'Horizontal_vibration_signals':data['Horizontal_vibration_signals'].mode()[0], # 用众数代替空值
'Vertical_vibration_signals':data['Horizontal_vibration_signals'].mean()}, # 用平均数代替空值
inplace=False)'''
print(df_csv.duplicated().any()) # 判断数据中是否有重复值
# data3 = data.drop_duplicates() # 删除重复值
# 指定变量删除, First 保留第一个, inplace=True 对原数据进行修改
# data4 = data.drop_duplicates(subject=['Horizontal_vibration_signals'], keep='First', inplace=False)
data['index'] = data.index # 增加索引
print(data.head())
data['New'] = data['Horizontal_vibration_signals'] # 增加一列
print(data.head())
data3 = data.drop(columns='New') # 删除某列,注意要重新附值
print(data3.head())
data4 = data.drop(index=2) # 根据索引,删除特定行
print(data4.head())
data5 = data.drop(index=range(0, 16, 2)) # index=[1, 3 ,6]>>>删除特定几行
print(data5.head(20))
df_csv1 = pd.read_csv(data_name) # excel-excel table-txt csv-csv
# 过滤某个区间
data6 = df_csv1[(df_csv1['Horizontal_vibration_signals'] <= 1.0) & (df_csv1['Horizontal_vibration_signals'] >= -1.0)]
print(data6.head(30))
# 值排序:从小到大,一列值
print(data6.head(30).sort_values(by='Vertical_vibration_signals'))
# 从大到小
print(data6.head(30).sort_values(by='Vertical_vibration_signals', ascending=False))
# 数据汇总 data7 = df_csv1.groupby(by='Horizontal_vibration_signals')['Vertical_vibration_signals'].std()
print(df_csv1.describe())
print(df_csv1[['Horizontal_vibration_signals', 'Vertical_vibration_signals']].std())
# 数据标准化
import numpy as np
# 0-1标准化
def MaxMinNormalization(x):
x = (x-np.min(x))/(np.max(x)-np.min(x))
return x
x1 = MaxMinNormalization(df_csv1['Horizontal_vibration_signals']).reset_index()
print(x1)
# Z-score标准化
def ZescoreNormalization(x):
x = (x-np.mean(x))/np.std(x)
return x
x2 = ZescoreNormalization(df_csv1['Horizontal_vibration_signals'])
print(x2)
# 数据储存
# x2.to_excel('./Test_data/x2.xls', sheet_name='Horizontal_vibration_signals')
Python 数据操作
最新推荐文章于 2025-01-06 18:37:25 发布
该博客展示了使用Python进行数据处理的基本操作,包括读取CSV文件、合并数据、检查数据维度、数据类型、缺失值处理、重复值检测、数据排序及分组统计。还涉及数据标准化方法,如0-1标准化和Z-score标准化,并演示了如何存储处理后的数据。

753

被折叠的 条评论
为什么被折叠?



