Skip to content

test_util_py

1from util import * 

2import pandas as pd 

3import os 

4import sys 

5 

6test_data_dir = os.path.join(sys.path[0],“test_data”) 

7research_datasets = [‘mockResearch2.xlsx’, ‘mockResearch.csv’] 

8correct_research_datasets = [‘testRightResult_mr2.xlsx’, ‘testRightResult_mr.xlsx’] 

9practicum_dataset = ‘mockPracticum.xlsx’ 

10correct_practicum_dataset = ‘testRightResult_Practicum.xlsx’ 

11correct_cleaned_dataset = ‘test_cleaned.xlsx’ 

12correct_dirty_dataset = ‘test_dirty.xlsx’ 

13correct_final_dataset = ‘testRightResult.xlsx’ 

14 

15def read_dataset(file_name, *args, **kwargs): 

16 file_type = file_name.split(‘.’)[1] 

17 if file_type == ‘csv’: 

18 return pd.read_csv(os.path.join(test_data_dir, file_name), *args, **kwargs) 

19 elif file_type == ‘xlsx’ or file_type == ‘xls’: 

20 return pd.read_excel(os.path.join(test_data_dir, file_name), *args, **kwargs) 

21 

22def test_location(): 

23 for i in range(len(research_datasets)): 

24 data = read_dataset(research_datasets[i]) 

25 location(data) 

26 correct = read_dataset(correct_research_datasets[i]) 

27 pd.testing.assert_frame_equal(data[correct.columns], correct) 

28 

29 

30def test_process_research(): 

31 data_all_research = process_research([os.path.join(test_data_dir,i) for i in research_datasets]) 

32 #correct data should be concatnated in reverse order. 

33 correct_all_research = pd.concat([read_dataset(i) for i in correct_research_datasets[::1]]) 

34 #Assert data == correct 

35 pd.testing.assert_frame_equal(data_all_research[correct_all_research.columns], correct_all_research) 

36 

37def test_find_dirty(): 

38 data_cleaned, data_to_be_cleaned = find_dirty(pd.read_excel(os.path.join(test_data_dir,practicum_dataset))) 

39 correct_to_be_cleaned = read_dataset(correct_dirty_dataset, dtype=str, keep_default_na=False) 

40 pd.testing.assert_frame_equal(data_to_be_cleaned, correct_to_be_cleaned) 

41 

42def test_process_practicum(): 

43 columns = [‘Type’, ‘County’,‘City’,‘State’,‘Country’,‘Latitude’,‘Longitude’] 

44 data_cleaned, data_to_be_cleaned = find_dirty(pd.read_excel(os.path.join(test_data_dir,practicum_dataset))) 

45 data_all_practicum = pd.DataFrame(columns=columns) 

46 data_unusable = pd.DataFrame(columns=columns) 

47 data_all_practicum, data_unusable = process_practicum(data_cleaned, data_all_practicum, data_unusable) 

48 cleaned = read_dataset(correct_cleaned_dataset) 

49 data_all_practicum, data_unusable = process_practicum(cleaned, data_all_practicum, data_unusable) 

50 correct_all_practicum = read_dataset(correct_practicum_dataset) 

51 pd.testing.assert_frame_equal(data_all_practicum[columns], correct_all_practicum[columns]) 

52 

53def test_integration(): 

54 columns=[‘Type’, ‘County’,‘City’,‘State’,‘Country’,‘Latitude’,‘Longitude’, ‘Organization’] 

55 data_all_research = process_research([os.path.join(test_data_dir,i) for i in research_datasets]) 

56 data_cleaned, data_to_be_cleaned = find_dirty(pd.read_excel(os.path.join(test_data_dir,practicum_dataset))) 

57 data_all_practicum = pd.DataFrame(columns=columns) 

58 data_unusable = pd.DataFrame(columns=columns) 

59 data_all_practicum, data_unusable = process_practicum(data_cleaned, data_all_practicum, data_unusable) 

60 cleaned = read_dataset(correct_cleaned_dataset) 

61 data_all_practicum, data_unusable = process_practicum(cleaned, data_all_practicum, data_unusable) 

62 final = join_ds(data_all_research, data_all_practicum) 

63 

64 correct_final = read_dataset(correct_final_dataset) 

65 pd.testing.assert_frame_equal(final, correct_final)