Coverage for test_util.py : 100%
54 statements
1from util import *
2import pandas as pd
3import os
4import sys
6test_data_dir = os.path.join(sys.path[0],“test_data”)
7research_datasets = [‘mockResearch2.xlsx’, ‘mockResearch.csv’]
8correct_research_datasets = [‘testRightResult_mr2.xlsx’, ‘testRightResult_mr.xlsx’]
9practicum_dataset = ‘mockPracticum.xlsx’
10correct_practicum_dataset = ‘testRightResult_Practicum.xlsx’
11correct_cleaned_dataset = ‘test_cleaned.xlsx’
12correct_dirty_dataset = ‘test_dirty.xlsx’
13correct_final_dataset = ‘testRightResult.xlsx’
15def read_dataset(file_name, *args, **kwargs):
16 file_type = file_name.split(‘.’)[–1]
17 if file_type == ‘csv’:
18 return pd.read_csv(os.path.join(test_data_dir, file_name), *args, **kwargs)
19 elif file_type == ‘xlsx’ or file_type == ‘xls’:
20 return pd.read_excel(os.path.join(test_data_dir, file_name), *args, **kwargs)
22def test_location():
23 for i in range(len(research_datasets)):
24 data = read_dataset(research_datasets[i])
25 location(data)
26 correct = read_dataset(correct_research_datasets[i])
27 pd.testing.assert_frame_equal(data[correct.columns], correct)
30def test_process_research():
31 data_all_research = process_research([os.path.join(test_data_dir,i) for i in research_datasets])
32 #correct data should be concatnated in reverse order.
33 correct_all_research = pd.concat([read_dataset(i) for i in correct_research_datasets[::–1]])
34 #Assert data == correct
35 pd.testing.assert_frame_equal(data_all_research[correct_all_research.columns], correct_all_research)
37def test_find_dirty():
38 data_cleaned, data_to_be_cleaned = find_dirty(pd.read_excel(os.path.join(test_data_dir,practicum_dataset)))
39 correct_to_be_cleaned = read_dataset(correct_dirty_dataset, dtype=str, keep_default_na=False)
40 pd.testing.assert_frame_equal(data_to_be_cleaned, correct_to_be_cleaned)
42def test_process_practicum():
43 columns = [‘Type’, ‘County’,‘City’,‘State’,‘Country’,‘Latitude’,‘Longitude’]
44 data_cleaned, data_to_be_cleaned = find_dirty(pd.read_excel(os.path.join(test_data_dir,practicum_dataset)))
45 data_all_practicum = pd.DataFrame(columns=columns)
46 data_unusable = pd.DataFrame(columns=columns)
47 data_all_practicum, data_unusable = process_practicum(data_cleaned, data_all_practicum, data_unusable)
48 cleaned = read_dataset(correct_cleaned_dataset)
49 data_all_practicum, data_unusable = process_practicum(cleaned, data_all_practicum, data_unusable)
50 correct_all_practicum = read_dataset(correct_practicum_dataset)
51 pd.testing.assert_frame_equal(data_all_practicum[columns], correct_all_practicum[columns])
53def test_integration():
54 columns=[‘Type’, ‘County’,‘City’,‘State’,‘Country’,‘Latitude’,‘Longitude’, ‘Organization’]
55 data_all_research = process_research([os.path.join(test_data_dir,i) for i in research_datasets])
56 data_cleaned, data_to_be_cleaned = find_dirty(pd.read_excel(os.path.join(test_data_dir,practicum_dataset)))
57 data_all_practicum = pd.DataFrame(columns=columns)
58 data_unusable = pd.DataFrame(columns=columns)
59 data_all_practicum, data_unusable = process_practicum(data_cleaned, data_all_practicum, data_unusable)
60 cleaned = read_dataset(correct_cleaned_dataset)
61 data_all_practicum, data_unusable = process_practicum(cleaned, data_all_practicum, data_unusable)
62 final = join_ds(data_all_research, data_all_practicum)
64 correct_final = read_dataset(correct_final_dataset)
65 pd.testing.assert_frame_equal(final, correct_final)