Source code for pyradon.pyradon

# -*- coding: utf-8 -*-

import os
import pandas as pd
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt
import scipy.stats as stats
import cartopy.crs as ccrs

[docs]class Pyradon(): """A collection of methods for the corellation of soil radon measurements as a potential tracer of tectonic and volcanic activity."""
[docs] @staticmethod def clear_data(df): """The method that clears data from null values.\n :param df: The data in pandas dataframe format\n :type df: pandas.DataFrame\n :return: Returns a pandas dataframe with the data cleaned by NaN values\n :type: pandas.DataFrame\n :Example:\n >>> import pandas as pd >>> from pyradon import Pyradon >>> # Path to file >>> gas_file = "./Data/test_radon2.csv" >>> # Reading Gas Data >>> gas_data = pd.read_csv(gas_file) >>> # Call the method >>> gas_data = Pyradon.clear_data(gas_data) """ # Find all rows with one or more missing values (NaN) ix = df.isnull().any(axis=1) # Get all rows without missing values df = df[~ix] return df
[docs] @staticmethod def check_coords(lat, lon, file): """This method is checking the validity of the coordinates.\n :param lat: Latitude data in pandas series\n :type lat: Series\n :param lon: Longitude data in pandas series\n :type lon: Series\n :param file: Path-Pathlike string that indicates the file\n :type file: str\n :raises ValueError: If latitude is not between -90 and 90 degrees\n :raises ValueError: If longitude is not between -180 and 180 degrees\n :Example:\n >>> import pandas as pd >>> from pyradon import Pyradon >>> # Path to file >>> #gas_file = "./Data/01_2016_2017_PP_Radon_Etna.csv" >>> gas_file = "./Data/test_radon2.csv" >>> # Reading Gas Data >>> gas_data = pd.read_csv(gas_file) >>> # Call check coordinates for gas data >>> Pyradon.check_coords(gas_data['Lat'], gas_data['Lon'], gas_file) """ print('Checking coordinates validity.') if lat.gt(90).any() or lat.lt(-90).any(): raise ValueError(' In {} file: Latitude should be between -90 and 90 degrees.'.format(os.path.basename(file))) if lon.gt(180).any() or lon.lt(-180).any(): raise ValueError(' In {} file: Longitude should be between -180 and 180 degrees.'.format(os.path.basename(file))) print('Done!')
[docs] @staticmethod def disp_results(pred, vars, coef, intercept, r2): """A method to display the analysis results.\n :param pred: Pandas Series with the dependent variable\n :type pred: Series\n :param vars: Pandas DataFrame with the independent variables\n :type vars: DataFrame\n :param coef: Coeffecients of the model.\n :type coef: ndarray\n :param intercept: ndarray\n :type intercept: Intercept of the model\n :param r2: R2 score of the model\n :type r2: float\n :Example:\n >>> import pandas as pd >>> from pyradon import Pyradon >>> # Path to file >>> #gas_file = "./Data/01_2016_2017_PP_Radon_Etna.csv" >>> gas_file = "./Data/test_radon2.csv" >>> # Reading Gas Data >>> gas_data = pd.read_csv(gas_file) >>> # Spliting data to x and y values >>> X = gas_data[['Temperature', 'Pressure']] >>> Y = gas_data[['Radon']] >>> Y_pred, intercept, coef, score = Pyradon.regression(X, Y) >>> # Call display results function >>> Pyradon.disp_results(Y.columns, X.columns, coef, intercept, score) """ for index in range(len(vars.tolist())): if index == 0: if coef[0, index] < 0: disp = "{} = {} {} * {}".format(pred[0], np.around(intercept[0], decimals = 2), np.around(coef[0, index], decimals = 2), vars[index]) else: disp = "{} = {} + {} * {}".format(pred[0], np.around(intercept[0], decimals = 2), np.around(coef[0, index], decimals = 2), vars[index]) else: if coef[0, index] < 0: disp = disp + " {} * {}".format(np.around(coef[0, index], decimals = 2), vars[index]) else: disp = disp + " + {} * {}".format(np.around(coef[0, index], decimals = 2), vars[index]) print ("The prediction function is: {}".format(disp)) print ("R2 score is: {}".format(round(r2, 2)))
[docs] @staticmethod def regression(X, Y): """Fits a linear model with coefficients w = (w1, …, wp) to minimize the residual sum of squares between the observed targets in the dataset, and the targets predicted by the linear approximation.\n :param X: Training data (independent variables)\n :type X: DataFrame\n :param Y: DataFrame, array-like\n :type Y: DataFrame, array-like\n :return: A tuple with the predicted values, intercept value, coefficients and the R2 score of the model\n :type: tuple\n :Example:\n >>> import pandas as pd >>> from pyradon import Pyradon >>> # Path to file >>> gas_file = "./Data/test_radon2.csv" >>> # Reading Gas Data >>> gas_data = pd.read_csv(gas_file) >>> # Spliting data to x and y values. >>> X = gas_data[['Temperature', 'Pressure']] >>> Y = gas_data[['Radon']] >>> Y_pred, intercept, coef, score = Pyradon.regression(X, Y) """ regr = linear_model.LinearRegression() # Create model regr.fit(X, Y) Y_pred = regr.predict(X) intercept = regr.intercept_ coef = regr.coef_ score = regr.score(X, Y) return Y_pred, intercept, coef, score
[docs] @staticmethod def plotlines(df1, df2, x_label = None, y_label = [None, None], **kwargs): """A matplotlib based function that plots two lines on with different y axis and the same x axis in the same chart.\n :param df1: Contains data to be plotted on for the first line. X data must be on column 0 and Y data must be on column 1\n :type df1: DataFrame\n :param df2: Contains data to be plotted on for the second line. X data must be on column 0 and Y data must be on column 1\n :type df2: DataFrame\n :param x_label: Label of x-axis. The default name is the column name of the first column of df1 variable, defaults to None\n :type x_label: str, optional\n :param y_label: Label of y-axis for the two lines. The default name is the column name for both, defaults to [None, None]\n :type y_label: list, optional\n :raises ValueError: Raises error when a y axis label is not provided for both dataframes\n :param `**kwargs`: See bellow for a list of valid properties\n +-----------+-------------------+-------+ |Name |Description |Type | +===========+===================+=======+ |l1c |Color of line 1 |str | +-----------+-------------------+-------+ |l2c |Color of line 2 |str | +-----------+-------------------+-------+ |l1w |Width of line 1 |float | +-----------+-------------------+-------+ |l2w |Width of line 2 |float | +-----------+-------------------+-------+ |labelsize |Size of the labels |float | +-----------+-------------------+-------+ |title |Title of the chart |str | +-----------+-------------------+-------+ |titlesize |Size of the title |float | +-----------+-------------------+-------+ |fontcolor |Color of the fonts |str | +-----------+-------------------+-------+ Based on matplotlib (matplotlib.org/)\n :Example:\n >>> import pandas as pd >>> from pyradon import Pyradon >>> # Path to file >>> gas_file = "./Data/test_radon2.csv" >>> # Reading data >>> gas_data = pd.read_csv(gas_file) >>> gas_data['Date / Hour'] = pd.to_datetime(gas_data['Date / Hour'], format = '%m/%d/%Y %H:%M') >>> df1 = gas_data[['Date / Hour', 'Temperature']] >>> df2 = gas_data[['Date / Hour', 'Pressure']] >>> # Plot Temperature and Pressure >>> Pyradon.plotlines(df1, df2) >>> Pyradon.plotlines(df1, df2, x_label = 'Date', y_label = [r'$Temperature\ (C^{o})$', None], l1c = 'g', l1w = 2.) >>> Pyradon.plotlines(df1, df2, x_label = r'$Date$', y_label = [r'$Temperature\ (C^{o})$', r'$Pressure\ (mBar)$'], l1c = 'g', l1w = 2., l2c = 'orange', l2w = 4.) >>> Pyradon.plotlines(df1, df2, x_label = r'$Date$', y_label = [r'$Temperature\ (C^{o})$', r'$Pressure\ (mBar)$'], l1c = 'g', l1w = 2., l2c = 'orange', l2w = 4., fontcolor = 'red', title = 'Test', labelsize = 9.) """ # Default line styling # Line 1 if 'l1c' in kwargs: l1c = kwargs.get('l1c') else: l1c = 'r' if 'l1w' in kwargs: l1w = kwargs.get('l1w') else: l1w = 1. # Line 2 if 'l2c' in kwargs: l2c = kwargs.get('l2c') else: l2c = 'b' if 'l2w' in kwargs: l2w = kwargs.get('l2w') else: l2w = 1. # Other kwargs if 'labelsize' in kwargs: labelsize = kwargs.get('labelsize') else: labelsize = 11. if 'title' in kwargs: title = kwargs.get('title') else: title = None if 'titlesize' in kwargs: titlesize = kwargs.get('titlesize') else: titlesize = 13. if 'fontcolor' in kwargs: fontcolor = kwargs.get('fontcolor') else: fontcolor = 'black' fig = plt.figure() ax1 = fig.add_subplot() if (x_label == None): ax1.set_xlabel(df1.columns[0], fontsize = labelsize, fontdict = {'color' : fontcolor}) else: ax1.set_xlabel(x_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) if (len(y_label) != 2): raise ValueError("A y axis label must be provided for both dataframes.") else: if (y_label[0] == None): ax1.set_ylabel(list(df1.columns)[1], fontsize = labelsize, fontdict = {'color' : fontcolor}) ax1.plot(df1.iloc[:, 0], df1.iloc[:, 1], label = list(df1.columns)[1], color = l1c, linewidth = l1w) else: ax1.set_ylabel(y_label[0], fontsize = labelsize, fontdict = {'color' : fontcolor}) ax1.plot(df1.iloc[:, 0], df1.iloc[:, 1], label = y_label[0], color = l1c, linewidth = l1w) # instantiate a second axes that shares the same x-axis ax2 = ax1.twinx() if (y_label[1] == None): ax2.set_ylabel(list(df2.columns)[1], fontsize = labelsize, fontdict = {'color' : fontcolor}) # we already handled the x-label with ax1 ax2.plot(df2.iloc[:, 0], df2.iloc[:, 1], label = list(df2.columns)[1], color = l2c, linewidth = l2w) else: ax2.set_ylabel(y_label[1], fontsize = labelsize, fontdict = {'color' : fontcolor}) ax2.plot(df2.iloc[:, 0], df2.iloc[:, 1], label = y_label[1], color = l2c, linewidth = l2w) fig.autofmt_xdate() fig.tight_layout() # otherwise the right y-label is slightly clipped fig.legend(loc="upper right", bbox_to_anchor=(1,1), bbox_transform=ax1.transAxes, frameon = False) ax1.tick_params(direction = 'out', labelsize = labelsize, labelcolor = fontcolor) ax2.tick_params(direction = 'out', labelsize = labelsize, labelcolor = fontcolor) if title != None: plt.title(title, fontdict = {'fontsize': titlesize, 'color': fontcolor}) plt.show();
[docs] @staticmethod def plot_different_sized_cicles(x, y, grouped, values, labels, x_label = None, y_label = None, **kwargs): """Plots different sized cicles of a grouped dataframe on a map.\n :param x: Column name of the DataFrame that contains the longitude values\n :type x: str\n :param y: Column name of the DataFrame that contains the latitude values\n :type y: str\n :param grouped: Grouped dataset\n :type grouped: pandas.DataFrameGroupBy\n :param values: Values that data are grouped and also the cicle size of each category\n :type values: list\n :param labels: Labels to be showed in the legend\n :type labels: list\n :param x_label: Label of x-axis, defaults to None\n :type x_label: str, optional\n :param y_label: Label of y-axis, defaults to None\n :type y_label: str, optional\n :param `**kwargs`: See bellow for a list of valid properties\n +-----------+---------------------------+-------+ |Name |Description |Type | +===========+===========================+=======+ |c |Color |str | +-----------+---------------------------+-------+ |alpha |The degree of transparency |str | +-----------+---------------------------+-------+ |legendsize |Size of the legend |float | +-----------+---------------------------+-------+ |labelsize |Size of the labels |float | +-----------+---------------------------+-------+ |title |Title of the chart |str | +-----------+---------------------------+-------+ |titlesize |Size of the title |float | +-----------+---------------------------+-------+ |fontcolor |Color of the fonts |str | +-----------+---------------------------+-------+ Based on matplotlib (matplotlib.org/)\n :Example:\n >>> import pandas as pd >>> import numpy as np >>> from pyradon import Pyradon >>> # Path to file >>> earthquake_file = "./Data/02_Earthquakes_Etna_UTM.csv" >>> #earthquake_file = "./Data/test_earth2.csv" >>> # Reading Gas Data >>> earthquake_data = pd.read_csv(earthquake_file) >>> # Categorize data based on the earthquake magnitude >>> conditions = [(earthquake_data['Magnitude'] >= 4.0), ((earthquake_data['Magnitude'] >= 3.0) & (earthquake_data['Magnitude'] < 4.0)), ((earthquake_data['Magnitude'] >= 2.0) & (earthquake_data['Magnitude'] < 3.0)), (earthquake_data['Magnitude'] < 2.0)] >>> values = [50, 100, 200, 400] >>> # Adding a new column with the classification result >>> earthquake_data['Category'] = np.select(conditions, values) >>> # Creating labels >>> labels = ['<=1.9', '2.0 - 2.9', '3.0 - 3.9', '>= 4.0'] >>> # Creating a new dataframe with the data grouped by the Category column >>> grouped = earthquake_data.groupby('Category') >>> Pyradon.plot_different_sized_cicles('Lon', 'Lat', grouped, values, labels) >>> Pyradon.plot_different_sized_cicles('Lon', 'Lat', grouped, values, labels, x_label = 'Longitude', y_label = 'Latitude', c = 'g', alpha = 0.4, labelsize = 12., fontcolor = 'r') >>> Pyradon.plot_different_sized_cicles('Lon', 'Lat', grouped, values, labels, x_label = 'Longitude', y_label = 'Latitude', c = 'g', alpha = 0.4, labelsize = 12., fontcolor = 'r',\ ... title = 'Earthquake Magnitude', titlesize = 14., legendtitle = 'Magnitude') """ # kwarg for cicle color if 'c' in kwargs: c = kwargs.get('c') else: c = 'r' if 'alpha' in kwargs: alpha = kwargs.get('alpha') else: alpha = 0.1 # Other kwargs if 'labelsize' in kwargs: labelsize = kwargs.get('labelsize') else: labelsize = 11. if 'title' in kwargs: title = kwargs.get('title') else: title = None if 'titlesize' in kwargs: titlesize = kwargs.get('titlesize') else: titlesize = 13. if 'fontcolor' in kwargs: fontcolor = kwargs.get('fontcolor') else: fontcolor = 'black' if 'legendtitle' in kwargs: legendtitle = kwargs.get('legendtitle') else: legendtitle = None fig = plt.figure() central_lon = grouped.mean()[x].mean() central_lat = grouped.mean()[y].mean() ax = fig.add_subplot(1,1,1, projection=ccrs.Orthographic(central_longitude = central_lon, central_latitude = central_lat)) ax.stock_img() ax.coastlines() ax.gridlines() for i, (name, group) in enumerate(grouped): ax.scatter(group[x], group[y], c = c, s = values[i], label = labels[i], alpha = alpha) if x_label != None: plt.xlabel(x_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) if y_label != None: plt.ylabel(y_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) if title != None: plt.title(title, fontdict = {'fontsize': titlesize, 'color': fontcolor}) plt.tight_layout() # otherwise the right y-label is slightly clipped if legendtitle != None: plt.legend(loc = "upper right", bbox_to_anchor = (1, 1), bbox_transform = ax.transAxes, frameon = False, title = legendtitle) else: plt.legend(loc = "upper right", bbox_to_anchor = (1, 1), bbox_transform = ax.transAxes, frameon = False) plt.show();
[docs] @staticmethod def scatterplot(x, y, x_label = None, y_label = None, invert_y_axis = True, **kwargs): """A simple scatterplot of two variables with an invert y axis option.\n :param x: Data to be plotted on x axis\n :type x: Series\n :param y: Data to be plotted on y axis\n :type y: Series\n :param x_label: Label of x axis, defaults to None\n :type x_label: str, optional\n :param y_label: Label of y axis, defaults to None\n :type y_label: str, optional\n :param invert_y_axis: Invert y axis, defaults to True\n :type invert_y_axis: bool, optional\n :param `**kwargs`: See bellow for a list of valid properties\n +-----------+---------------------------+-------+ |Name |Description |Type | +===========+===========================+=======+ |c |Color |str | +-----------+---------------------------+-------+ |alpha |The degree of transparency |str | +-----------+---------------------------+-------+ |s |Size of the object |float | +-----------+---------------------------+-------+ |legendsize |Size of the legend |float | +-----------+---------------------------+-------+ |labelsize |Size of the labels |float | +-----------+---------------------------+-------+ |title |Title of the chart |str | +-----------+---------------------------+-------+ |titlesize |Size of the title |float | +-----------+---------------------------+-------+ |fontcolor |Color of the fonts |str | +-----------+---------------------------+-------+ Based on matplotlib (matplotlib.org/)\n :Example:\n >>> import pandas as pd >>> from pyradon import Pyradon >>> # Path to file >>> earthquake_file = "./Data/02_Earthquakes_Etna_UTM.csv" >>> #earthquake_file = "./Data/test_earth2.csv" >>> # Reading earthquake data >>> earthquake_data = pd.read_csv(earthquake_file) >>> Pyradon.scatterplot(earthquake_data['Lon'], earthquake_data['Depth']) >>> Pyradon.scatterplot(earthquake_data['Lon'], earthquake_data['Depth'], x_label = r'$Longitude\ ^{o}$', y_label = r'$Depth\ (km)$', invert_y_axis = True, \ ... s = 20., c = 'g', alpha = 0.4, labelsize = 9., fontcolor = 'black',\ ... title = r'$Scatterplot\ with\ Depth\ and\ Longitude$', titlesize = 13.) """ if 'c' in kwargs: c = kwargs.get('c') else: c = 'r' if 'alpha' in kwargs: alpha = kwargs.get('alpha') else: alpha = 0.1 if 's' in kwargs: s = kwargs.get('s') else: s = 9 # Other kwargs if 'labelsize' in kwargs: labelsize = kwargs.get('labelsize') else: labelsize = 11. if 'title' in kwargs: title = kwargs.get('title') else: title = None if 'titlesize' in kwargs: titlesize = kwargs.get('titlesize') else: titlesize = 13. if 'fontcolor' in kwargs: fontcolor = kwargs.get('fontcolor') else: fontcolor = 'black' plt.scatter(x, y, c = c, s = s, alpha = alpha) if x_label != None: plt.xlabel(x_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) if y_label != None: plt.ylabel(y_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) if title != None: plt.title(title, fontdict = {'fontsize': titlesize, 'color': fontcolor}) plt.tight_layout() # otherwise the right y-label is slightly clipped ax = plt.gca() ax.tick_params(direction = 'out', labelsize = labelsize, labelcolor = fontcolor) if (invert_y_axis == True): plt.gca().invert_yaxis() plt.show();
[docs] @staticmethod def plotlinebar(df1, df2, x_label = None, y_label = [None, None], **kwargs): """A matplotlib based function that plots two lines on with different y axis and the same x axis in the same chart.\n :param df1: Contains data to be plotted on for the bars. X data must be on column 0 and Y data must be on column 1\n :type df1: DataFrame\n :param df2: Contains data to be plotted on for the line. X data must be on column 0 and Y data must be on column 1\n :type df2: DataFrame\n :param x_label: Label of x-axis. The default name is the column name of the first column of df1 variable, defaults to None\n :type x_label: str, optional\n :param y_label: Label of y-axis for the two lines.The default name is the column name, defaults to [None, None]\n :type y_label: list, optional\n :raises ValueError: Raises error when a y axis label is not provided for both dataframes\n :param `**kwargs`: See bellow for a list of valid properties\n +-----------+-------------------+-------+ |Name |Description |Type | +===========+===================+=======+ |lc |Color of line |str | +-----------+-------------------+-------+ |bc |Color of bar |str | +-----------+-------------------+-------+ |lw |Width of line |float | +-----------+-------------------+-------+ |bw |Width of bar |float | +-----------+-------------------+-------+ |labelsize |Size of the labels |float | +-----------+-------------------+-------+ |title |Title of the chart |str | +-----------+-------------------+-------+ |titlesize |Size of the title |float | +-----------+-------------------+-------+ |fontcolor |Color of the fonts |str | +-----------+-------------------+-------+ Based on matplotlib (matplotlib.org/)\n :Example:\n >>> import pandas as pd >>> import numpy as np >>> from pyradon import Pyradon >>> # Path to file >>> earthquake_file = "./Data/02_Earthquakes_Etna_UTM.csv" >>> #earthquake_file = "./Data/test_earth2.csv" >>> # Reading Gas Data >>> earthquake_data = pd.read_csv(earthquake_file) >>> earthquake_data['Energy'] = earthquake_data['Magnitude'].apply(lambda x: np.sqrt(10 ** (4.8 + 1.5 * x))) # Where energy in Joules^1/2 >>> # Sorting values based on date field after converting the Datetime column to datetime object >>> earthquake_data['Date'] = pd.to_datetime(earthquake_data.DateTime, format = '%m/%d/%Y %H:%M') >>> earthquake_data = earthquake_data.sort_values(by='Date') >>> earthquake_data['Cumsum'] = earthquake_data.Energy.cumsum() >>> # Creating a Date_only field to calculate the frequency per day >>> earthquake_data['Date_only'] = earthquake_data.Date.dt.date >>> counts = earthquake_data.Date_only.value_counts() >>> counts = counts.to_frame() >>> temp_df = earthquake_data.loc[earthquake_data.groupby('Date_only')['Cumsum'].idxmax()] >>> temp_df = temp_df.set_index('Date_only') >>> temp_df = temp_df.merge(counts, right_index = True, left_index = True) >>> temp_df = temp_df.rename(columns = {'Date_only': 'Number of events'}) >>> temp_df['Index'] = temp_df.index >>> df1 = temp_df[['Index', 'Number of events']] >>> df2 = temp_df[['Index', 'Cumsum']] >>> Pyradon.plotlinebar(df1, df2, x_label = r'$Date$', y_label = [r'$Number\ of\ Events$', r'$J^{1/2}$'], lc = 'g', lw = 2., bc = 'orange', bw = 4., fontcolor = 'red', title = 'Test', labelsize = 9.) >>> del (temp_df, df1, df2) """ # Default line styling # Line if 'lc' in kwargs: lc = kwargs.get('lc') else: lc = 'r' if 'lw' in kwargs: lw = kwargs.get('lw') else: lw = 1. # Bar if 'bc' in kwargs: bc = kwargs.get('bc') else: bc = 'b' if 'bw' in kwargs: bw = kwargs.get('bw') else: bw = 1. # Other kwargs if 'labelsize' in kwargs: labelsize = kwargs.get('labelsize') else: labelsize = 11. if 'title' in kwargs: title = kwargs.get('title') else: title = None if 'titlesize' in kwargs: titlesize = kwargs.get('titlesize') else: titlesize = 13. if 'fontcolor' in kwargs: fontcolor = kwargs.get('fontcolor') else: fontcolor = 'black' fig = plt.figure() ax1 = fig.add_subplot() if (x_label == None): ax1.set_xlabel(df1.columns[0], fontsize = labelsize, fontdict = {'color' : fontcolor}) else: ax1.set_xlabel(x_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) if (len(y_label) != 2): raise ValueError("A y axis label must be provided for both dataframes.") else: if (y_label[0] == None): ax1.set_ylabel(list(df1.columns)[1], fontsize = labelsize, fontdict = {'color' : fontcolor}) ax1.bar(df1.iloc[:, 0], df1.iloc[:, 1], label = list(df1.columns)[1], color = bc, linewidth = bw) else: ax1.set_ylabel(y_label[0], fontsize = labelsize, fontdict = {'color' : fontcolor}) ax1.bar(df1.iloc[:, 0], df1.iloc[:, 1], label = y_label[0], color = bc, linewidth = bw) # instantiate a second axes that shares the same x-axis ax2 = ax1.twinx() if (y_label[1] == None): ax2.set_ylabel(list(df2.columns)[1], fontsize = labelsize, fontdict = {'color' : fontcolor}) # we already handled the x-label with ax1 ax2.plot(df2.iloc[:, 0], df2.iloc[:, 1], label = list(df2.columns)[1], color = lc, linewidth = lw) else: ax2.set_ylabel(y_label[1], fontsize = labelsize, fontdict = {'color' : fontcolor}) ax2.plot(df2.iloc[:, 0], df2.iloc[:, 1], label = y_label[1], color = lc, linewidth = lw) fig.autofmt_xdate() fig.tight_layout() # otherwise the right y-label is slightly clipped fig.legend(loc="upper right", bbox_to_anchor=(1,1), bbox_transform=ax1.transAxes, frameon = False) ax1.tick_params(direction = 'out', labelsize = labelsize, labelcolor = fontcolor) ax2.tick_params(direction = 'out', labelsize = labelsize, labelcolor = fontcolor) if title != None: plt.title(title, fontdict = {'fontsize': titlesize, 'color': fontcolor}) plt.show();
[docs] @staticmethod def normal_probplot(data, x_label = None, y_label = None, **kwargs): """Generates a probability plot of sample data against the quantiles of normal theoretical distribution.\n :param data: Sample data from which probplot creates the plot\n :type data: array-like\n :param x_label: Label of x-axis, defaults to None\n :type x_label: str, optional\n :param y_label: Label of y-axis, defaults to None\n :type y_label: str, optional\n :param `**kwargs`: See bellow for a list of valid properties\n +-----------+-------------------+-------+ |Name |Description |Type | +===========+===================+=======+ |lc |Color of line |str | +-----------+-------------------+-------+ |mc |Color of cicle |str | +-----------+-------------------+-------+ |lw |Width of line |float | +-----------+-------------------+-------+ |mw |Width of cicle |float | +-----------+-------------------+-------+ |labelsize |Size of the labels |float | +-----------+-------------------+-------+ |title |Title of the chart |str | +-----------+-------------------+-------+ |titlesize |Size of the title |float | +-----------+-------------------+-------+ |fontcolor |Color of the fonts |str | +-----------+-------------------+-------+ Based on matplotlib (matplotlib.org/)\n :Example:\n >>> import pandas as pd >>> from pyradon import Pyradon >>> from sklearn import linear_model >>> # Path to file >>> #gas_file = "./Data/01_2016_2017_PP_Radon_Etna.csv" >>> gas_file = "./Data/test_radon2.csv" >>> # Reading Gas Data >>> gas_data = pd.read_csv(gas_file) >>> # Spliting data to x and y values. >>> X = gas_data[['Temperature', 'Pressure']] >>> Y = gas_data[['Radon']] >>> regr = linear_model.LinearRegression() >>> # Create model >>> regr.fit(X, Y) >>> Y_pred = regr.predict(X) >>> # Convert the predicted results back to pandas dataframe >>> Y_pred = pd.DataFrame(Y_pred) >>> Y_pred.columns = ['Predicted Radon'] >>> # Add Predicted values to the initial dataframe >>> gas_data = pd.merge(gas_data, Y_pred, left_index = True, right_index=True) >>> # Filtering Radon values >>> gas_data['Filtered Radon'] = gas_data['Radon'] - gas_data['Predicted Radon'] >>> Pyradon.normal_probplot(gas_data['Filtered Radon'], lc = 'g', lw = 3., labelsize = 28.) >>> Pyradon.normal_probplot(gas_data['Filtered Radon'], mc = 'r', mw = 4., lc = 'g', lw = 1.2, labelsize = 9., title = 'Probability Plot of Filtered Radon', titlesize = 13.,\ ... fontcolor = 'r') >>> Pyradon.normal_probplot(gas_data['Filtered Radon'], mc = 'b', mw = 9., lc = 'g', lw = 2., labelsize = 9., title = 'Probability Plot of Filtered Radon', titlesize = 13.) """ # Default line styling # Line if 'lc' in kwargs: lc = kwargs.get('lc') else: lc = 'r' if 'lw' in kwargs: lw = kwargs.get('lw') else: lw = 1. # Bar if 'mc' in kwargs: mc = kwargs.get('mc') else: mc = 'b' if 'mw' in kwargs: mw = kwargs.get('mw') else: mw = 1. # Other kwargs if 'labelsize' in kwargs: labelsize = kwargs.get('labelsize') else: labelsize = 11. if 'title' in kwargs: title = kwargs.get('title') else: title = None if 'titlesize' in kwargs: titlesize = kwargs.get('titlesize') else: titlesize = 13. if 'fontcolor' in kwargs: fontcolor = kwargs.get('fontcolor') else: fontcolor = 'black' fig = plt.figure() ax = fig.add_subplot() stats.probplot(data, dist = "norm", fit = True, rvalue = True, plot = ax) ax.get_lines()[0].set_color(mc) ax.get_lines()[0].set_markersize(mw) ax.get_lines()[1].set_linewidth(lw) ax.get_lines()[1].set_color(lc) ax.get_children()[2].set_fontsize(labelsize) ax.get_children()[2].set_color(fontcolor) ax.tick_params(direction = 'out', labelsize = labelsize, labelcolor = fontcolor) if (x_label != None): ax.set_xlabel(x_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) else: ax.set_xlabel("Expected Normal Value", fontsize = labelsize, fontdict = {'color' : fontcolor}) if (y_label != None): ax.set_ylabel(y_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) else: ax.set_ylabel("Ordered Values", fontsize = labelsize, fontdict = {'color' : fontcolor}) if title != None: plt.title(title, fontdict = {'fontsize': titlesize, 'color': fontcolor}) plt.show();
[docs] @staticmethod def plotline(x, y, x_label = None, y_label = None, **kwargs): """A matplotlib based function for plotting a simple line.\n :param x: Data to be plotted on x axis\n :type x: Series\n :param y: Data to be plotted on y axis\n :type y: Series\n :param x_label: Label of x axis, defaults to None\n :type x_label: str, optional\n :param y_label: Label of y axis, defaults to None\n :type y_label: str, optional\n :param `**kwargs`: See bellow for a list of valid properties\n +-----------+-------------------+-------+ |Name |Description |Type | +===========+===================+=======+ |lc |Color of line |str | +-----------+-------------------+-------+ |lw |Width of line |float | +-----------+-------------------+-------+ |labelsize |Size of the labels |float | +-----------+-------------------+-------+ |title |Title of the chart |str | +-----------+-------------------+-------+ |titlesize |Size of the title |float | +-----------+-------------------+-------+ |fontcolor |Color of the fonts |str | +-----------+-------------------+-------+ Based on matplotlib (matplotlib.org/)\n :Example:\n >>> import pandas as pd >>> from pyradon import Pyradon >>> from sklearn import linear_model >>> # Path to file >>> #gas_file = "./Data/01_2016_2017_PP_Radon_Etna.csv" >>> gas_file = "./Data/test_radon2.csv" >>> # Reading Gas Data >>> gas_data = pd.read_csv(gas_file) >>> # Spliting data to x and y values. >>> X = gas_data[['Temperature', 'Pressure']] >>> Y = gas_data[['Radon']] >>> regr = linear_model.LinearRegression() >>> # Create model >>> regr.fit(X, Y) >>> Y_pred = regr.predict(X) >>> # Convert the predicted results back to pandas dataframe >>> Y_pred = pd.DataFrame(Y_pred) >>> Y_pred.columns = ['Predicted Radon'] >>> # Add Predicted values to the initial dataframe >>> gas_data = pd.merge(gas_data, Y_pred, left_index = True, right_index=True) >>> # Filtering Radon values >>> gas_data['Filtered Radon'] = gas_data['Radon'] - gas_data['Predicted Radon'] >>> x = pd.to_datetime(gas_data['Date / Hour'], format = '%m/%d/%Y %H:%M') >>> y = gas_data[['Filtered Radon']] >>> Pyradon.plotline(x, y) >>> Pyradon.plotline(x, y, x_label = 'Date', y_label = 'Filtered Radon', lc = 'g', lw = 0.1, labelsize = 7.,\ ... title = 'Filtered Radon Time Series', titlesize = 13., fontcolor = 'black') """ # Line if 'lc' in kwargs: lc = kwargs.get('lc') else: lc = 'r' if 'lw' in kwargs: lw = kwargs.get('lw') else: lw = 1. # Other kwargs if 'labelsize' in kwargs: labelsize = kwargs.get('labelsize') else: labelsize = 11. if 'title' in kwargs: title = kwargs.get('title') else: title = None if 'titlesize' in kwargs: titlesize = kwargs.get('titlesize') else: titlesize = 13. if 'fontcolor' in kwargs: fontcolor = kwargs.get('fontcolor') else: fontcolor = 'black' fig = plt.figure() ax = fig.add_subplot() if (x_label != None): ax.set_xlabel(x_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) if (y_label != None): ax.set_ylabel(y_label, fontsize = labelsize, fontdict = {'color' : fontcolor}) ax.plot(x, y, color = lc, linewidth = lw) if title != None: plt.title(title, fontdict = {'fontsize': titlesize, 'color': fontcolor}) fig.autofmt_xdate() ax.tick_params(direction = 'out', labelsize = labelsize, labelcolor = fontcolor) fig.tight_layout() # otherwise the right y-label is slightly clipped plt.show();