added documentation

This commit is contained in:
Quentin Roussel
2024-05-07 00:36:41 +02:00
parent ac4d95bd07
commit 3fd37213e1
4 changed files with 322 additions and 24 deletions

View File

@@ -2,6 +2,21 @@ import pandas as pd
import matplotlib.pyplot as plt
def error(df, df_original, column_name):
"""
Calculate the error between the values in a column of a DataFrame and the last value before each timestamp.
Args:
df (pandas.DataFrame): The DataFrame containing the values.
df_original (pandas.DataFrame): The original DataFrame containing the timestamps and values.
column_name (str): The name of the column to calculate the error for.
Returns:
list: A list of absolute differences between the values in the specified column and the last value before each timestamp.
Raises:
ValueError: If the specified column does not exist in the DataFrame.
"""
diff = []
# Check if the column exists in the DataFrame
if column_name not in df.columns:
@@ -23,6 +38,19 @@ def error(df, df_original, column_name):
def plot_histogram(data_series, bins=10, title="Distribution of Absolute Differences"):
"""
Plots a histogram of the given data series.
Parameters:
- data_series (array-like): The data series to plot the histogram for.
- bins (int): The number of bins to use for the histogram. Default is 10.
- title (str): The title of the histogram plot. Default is "Distribution of Absolute Differences".
Returns:
None
"""
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 4)) # Set the figure size for better readability
plt.hist(data_series, bins=bins, color='blue', alpha=0.7, edgecolor='black')
plt.title(title)
@@ -32,15 +60,40 @@ def plot_histogram(data_series, bins=10, title="Distribution of Absolute Differe
plt.show()
def compute_efficiency(df):
#compute the time differnece between the first and last point
"""
Compute the efficiency of a data frame. i.e the time taken to collect each data point.
Parameters:
df (pandas.DataFrame): The input data frame.
Returns:
float: The efficiency value.
"""
# compute the time difference between the first and last point
time_diff = df["time"].iloc[-1] - df["time"].iloc[0]
#compute the number of points
# compute the number of points
num_points = len(df)
#compute the efficiency
# compute the efficiency
efficiency = time_diff.total_seconds() / num_points
return efficiency
def hourly_rate_of_change(df):
"""
Calculate the average absolute rate of change per hour for a given DataFrame.
Args:
df (pandas.DataFrame): The DataFrame containing the data.
Returns:
pandas.Series: A Series containing the average absolute rate of change per hour.
Raises:
ValueError: If the DataFrame does not include 'time' and 'value' columns, or if it is empty.
ValueError: If the 'time' column is not of datetime type.
"""
# Check if required columns exist
if 'time' not in df.columns or 'value' not in df.columns:
raise ValueError("DataFrame must include 'time' and 'value' columns.")