#!/usr/bin/python -tt
import csv
import math
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
class AnalyzeStudents:
def __init__( self, filename ):
self.parse_input( filename )
def parse_input( self, filename ):
# prepare arrays for further processing
self.gradesm = [] # grades of male students
self.gradesf = [] # grades of female students
self.credits = [] # ECTS credits
# open and parse CSV with student information
# 0: gender
# 1: ECTS credits
# 2: grade * 100
with open( filename, 'rU' ) as csvfile:
...
# concatenate female and male grades for full list
...
def plot_histograms( self ):
#
# problem 1: histogramming
#
plt.figure( 1, ( 15, 5 ) )
# histograms of grades (m and f stacked)
plt.subplot( 1, 3, 1 )
...
plt.xlabel( 'Grade' )
plt.ylabel( 'Number of Students' )
# histogram of credits
plt.subplot( 1, 3, 2 )
plt.xlabel( 'ECTS Credits' )
...
def print_mean_quantiles( self ):
#
# problem 2: mean values, 10% and 50% quantiles
#
...
def print_variance( self ):
#
# problem 3: sample variance
#
...
def print_significance( self ):
#
# problem 4: significance of deviation between grades of female and male students
#
...
#
# methods to compute, variance etc
#
# mean
def mean( self, sample ):
...
# quantiles
def quantile( self, sample, percentage ):
# convert percentage into index
sorted_sample =
...
# sample variance
def sample_variance( self, sample ):
...
# unbiased sample variance with Bessel correction
def unbiased_variance( self, sample ):
...
if __name__ == '__main__':
ana = AnalyzeStudents( 'studierende.csv' )
ana.plot_histograms()
ana.print_mean_quantiles()
ana.print_variance()
ana.print_significance()