386 lines
9.0 KiB
C++
386 lines
9.0 KiB
C++
// THIS IS THE PROVIDED CODE FOR PROGRAM #2, DSA 1, SPRING 2021
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <list>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <algorithm>
|
|
#include <ctime>
|
|
#include <cmath>
|
|
#include <cstring>
|
|
#include <cctype>
|
|
#include <cstdlib>
|
|
#include <iterator>
|
|
#include <vector>
|
|
#include <map>
|
|
|
|
using namespace std;
|
|
|
|
// A simple class; each object holds four public fields
|
|
class Data {
|
|
public:
|
|
string lastName;
|
|
string firstName;
|
|
string ssn;
|
|
};
|
|
|
|
// Load the data from a specified input file
|
|
void loadDataList(list<Data *> &l, const string &filename) {
|
|
|
|
ifstream input(filename);
|
|
if (!input) {
|
|
cerr << "Error: could not open " << filename << "\n";
|
|
exit(1);
|
|
}
|
|
|
|
// The first line indicates the size
|
|
string line;
|
|
getline(input, line);
|
|
stringstream ss(line);
|
|
int size;
|
|
ss >> size;
|
|
|
|
// Load the data
|
|
for (int i = 0; i < size; i++) {
|
|
getline(input, line);
|
|
stringstream ss2(line);
|
|
Data *pData = new Data();
|
|
ss2 >> pData->lastName >> pData->firstName >> pData->ssn;
|
|
l.push_back(pData);
|
|
}
|
|
|
|
input.close();
|
|
}
|
|
|
|
// Output the data to a specified output file
|
|
void writeDataList(const list<Data *> &l, const string &filename) {
|
|
|
|
ofstream output(filename);
|
|
if (!output) {
|
|
cerr << "Error: could not open " << filename << "\n";
|
|
exit(1);
|
|
}
|
|
|
|
// Write the size first
|
|
int size = l.size();
|
|
output << size << "\n";
|
|
|
|
// Write the data
|
|
for (auto pData:l) {
|
|
output << pData->lastName << " "
|
|
<< pData->firstName << " "
|
|
<< pData->ssn << "\n";
|
|
}
|
|
|
|
output.close();
|
|
}
|
|
|
|
// Sort the data according to a specified field
|
|
// (Implementation of this function will be later in this file)
|
|
void sortDataList(list<Data *> &);
|
|
|
|
// The main function calls routines to get the data, sort the data,
|
|
// and output the data. The sort is timed according to CPU time.
|
|
int main() {
|
|
string filename;
|
|
cout << "Enter name of input file: ";
|
|
cin >> filename;
|
|
list<Data *> theList;
|
|
loadDataList(theList, filename);
|
|
|
|
cout << "Data loaded.\n";
|
|
|
|
cout << "Executing sort...\n";
|
|
clock_t t1 = clock();
|
|
sortDataList(theList);
|
|
clock_t t2 = clock();
|
|
double timeDiff = ((double) (t2 - t1)) / CLOCKS_PER_SEC;
|
|
|
|
cout << "Sort finished. CPU time was " << timeDiff << " seconds.\n";
|
|
|
|
cout << "Enter name of output file: ";
|
|
cin >> filename;
|
|
writeDataList(theList, filename);
|
|
|
|
return 0;
|
|
}
|
|
|
|
// -------------------------------------------------
|
|
// YOU MAY NOT CHANGE OR ADD ANY CODE ABOVE HERE !!!
|
|
// -------------------------------------------------
|
|
|
|
// You may add global variables, functions, and/or
|
|
// class defintions here if you wish.
|
|
|
|
bool compareFirstName(Data* first, Data* last) {
|
|
return first->firstName.compare(last->firstName) < 0;
|
|
}
|
|
|
|
bool compareLastName(Data* first, Data* last) {
|
|
return first->lastName < last->lastName;
|
|
}
|
|
|
|
bool compareSSN(Data* first, Data* last) {
|
|
return first->ssn < last->ssn;
|
|
}
|
|
|
|
bool compare(Data* first, Data* last) {
|
|
int comp = first->lastName.compare(last->lastName);
|
|
if (comp != 0)
|
|
return comp < 0;
|
|
comp = first->firstName.compare(last->firstName);
|
|
if (comp != 0)
|
|
return comp < 0;
|
|
return first->ssn.compare(last->ssn) < 0;
|
|
}
|
|
|
|
bool compareFirstNameSSNOnly(Data* first, Data* last) {
|
|
int comp = first->firstName.compare(last->firstName);
|
|
if (comp != 0)
|
|
return comp < 0;
|
|
return first->ssn.compare(last->ssn) < 0;
|
|
}
|
|
|
|
void sortBySSN(list<Data *> &l) {
|
|
int BUCKET_SIZE = 10;
|
|
list<Data *> buckets[BUCKET_SIZE];
|
|
for (int i = 10; i >= 0; i--) {
|
|
if (i == 3 || i == 6)
|
|
continue;
|
|
for (auto it = l.begin(); it != l.end(); it++) {
|
|
buckets[(*it)->ssn.at(i) - '0'].push_back(*it);
|
|
}
|
|
l.clear();
|
|
for (int k = 0; k < BUCKET_SIZE; k++) {
|
|
auto nl = buckets[k];
|
|
for (auto it = nl.begin(); it != nl.end(); it++) {
|
|
l.push_back(*it);
|
|
}
|
|
nl.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
int longestLastName(list<Data *> &l) {
|
|
int max = 0;
|
|
for (auto it = l.begin(); it != l.end(); it++) {
|
|
int value = (*it)->lastName.length();
|
|
if (value > max)
|
|
max = value;
|
|
}
|
|
return max;
|
|
}
|
|
|
|
void sortByLastName(list<Data *> &l) {
|
|
int BUCKET_SIZE = 27;
|
|
list<Data *> buckets[BUCKET_SIZE];
|
|
int MAX_LENGTH = longestLastName(l);
|
|
for (int i = MAX_LENGTH - 1; i >= 0; i--) {
|
|
for (auto it = l.begin(); it != l.end(); it++) {
|
|
string lastName = (*it)->lastName;
|
|
if (lastName.size() <= i) {
|
|
buckets[27].push_back(*it);
|
|
continue;
|
|
}
|
|
buckets[lastName.at(i) - 'A'].push_back(*it);
|
|
}
|
|
l.clear();
|
|
for (int k = 0; k < BUCKET_SIZE; k++) {
|
|
auto nl = buckets[k];
|
|
for (auto it = nl.begin(); it != nl.end(); it++) {
|
|
l.push_back(*it);
|
|
}
|
|
nl.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
int longestFirstName(list<Data *> &l) {
|
|
int max = 0;
|
|
for (auto it = l.begin(); it != l.end(); it++) {
|
|
int value = (*it)->firstName.length();
|
|
if (value > max)
|
|
max = value;
|
|
}
|
|
return max;
|
|
}
|
|
|
|
|
|
|
|
void sortByFirstName(list<Data *> &l) {
|
|
int BUCKET_SIZE = 27;
|
|
list<Data *> buckets[BUCKET_SIZE];
|
|
int MAX_LENGTH = longestFirstName(l);
|
|
for (int i = MAX_LENGTH - 1; i >= 0; i--) {
|
|
for (auto it = l.begin(); it != l.end(); it++) {
|
|
string firstName = (*it)->firstName;
|
|
if (firstName.size() <= i) {
|
|
buckets[27].push_back(*it);
|
|
continue;
|
|
}
|
|
buckets[firstName.at(i) - 'A'].push_back(*it);
|
|
}
|
|
l.clear();
|
|
for (int k = 0; k < BUCKET_SIZE; k++) {
|
|
auto nl = buckets[k];
|
|
for (auto it = nl.begin(); it != nl.end(); it++) {
|
|
l.push_back(*it);
|
|
}
|
|
nl.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
const int AFS_RADIX=27;
|
|
const int LONGEST_NAME = 20;
|
|
|
|
enum field {
|
|
lastName = 1,
|
|
firstName = 2,
|
|
ssn = 3,
|
|
};
|
|
|
|
struct nameLengths {
|
|
int firstName = 0;
|
|
int lastName = 0;
|
|
};
|
|
|
|
string* getVal(Data * d, field f) {
|
|
switch (f) {
|
|
case lastName:
|
|
return &d->lastName;
|
|
case firstName:
|
|
return &d->firstName;
|
|
}
|
|
return &d->ssn;
|
|
}
|
|
|
|
char valueAt(Data * d, field type, int pos) {
|
|
string *str = getVal(d, type);
|
|
if (str->length() > pos) {
|
|
return str->at(pos) - 'A' + 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
nameLengths longestName(list<Data *> &l) {
|
|
nameLengths n;
|
|
for (auto it = l.begin(); it != l.end(); it++) {
|
|
int fn = (*it)->firstName.length();
|
|
int ln = (*it)->lastName.length();
|
|
if (fn > n.firstName)
|
|
n.firstName = fn;
|
|
if(ln > n.lastName)
|
|
n.lastName = ln;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
size_t longestNameType(list<Data *> &l, field type) {
|
|
int max = 0;
|
|
for (auto it = l.begin(); it != l.end(); it++) {
|
|
int value = getVal(*it, type)->length();
|
|
if (value > max)
|
|
max = value;
|
|
}
|
|
return max;
|
|
}
|
|
|
|
void _afsSwapAll(vector<Data *> &v, int* offsets, int start, int digit, field type) {
|
|
int i = start;
|
|
int nf[AFS_RADIX] = {};
|
|
int current_block = 0;
|
|
copy(offsets, offsets + AFS_RADIX, begin(nf));
|
|
while (current_block < AFS_RADIX - 1) {
|
|
if (i >= start + offsets[current_block + 1]) {
|
|
current_block += 1;
|
|
continue;
|
|
}
|
|
char val = valueAt(v[i], type, digit);
|
|
if (val == current_block) {
|
|
i += 1;
|
|
continue;
|
|
}
|
|
auto swap_to = start + nf[val];
|
|
auto tmp = v[swap_to];
|
|
v[swap_to] = v[i];
|
|
v[i] = tmp;
|
|
nf[val] += 1;
|
|
}
|
|
}
|
|
|
|
void _afsOffsets (vector<Data *> &v, int start, int end, int digit, int* offsets, field type) {
|
|
int counts[AFS_RADIX] = {};
|
|
for (int i = start; i < end; i++) {
|
|
counts[valueAt(v[i], type, digit)] += 1;
|
|
}
|
|
long sum = 0;
|
|
for (int i = 0; i < AFS_RADIX + 1; i++) {
|
|
offsets[i] = sum;
|
|
sum += counts[i];
|
|
}
|
|
}
|
|
|
|
void _recurseAmericanFlagSort(vector <Data *> &v, map<int, bool> *allOffsets, int start, int end, int digit, int max, field type) {
|
|
int offsets[AFS_RADIX + 1] = {};
|
|
_afsOffsets(v, start, end, digit, offsets, type);
|
|
_afsSwapAll(v, offsets, start, digit, type);
|
|
if (allOffsets != nullptr) {
|
|
for (int i = 0; i < AFS_RADIX; i++) {
|
|
(*allOffsets)[offsets[i] + start] = true;
|
|
}
|
|
}
|
|
if (digit == max) {
|
|
return;
|
|
}
|
|
for (int i = 0; i < AFS_RADIX; i++) {
|
|
if (offsets[i + 1] - 1000 < offsets[i]) {
|
|
if (offsets[i + 1] != offsets[i]) {
|
|
auto b = v.begin() + start + offsets[i];
|
|
auto l = v.begin() + start + offsets[i + 1];
|
|
sort(b, l, compareLastName);
|
|
for (auto it = b + 1; it != l; it++) {
|
|
if (getVal(*it, type)->compare(*getVal(*prev(it), type)) != 0) {
|
|
(*allOffsets)[it - v.begin()] = true;
|
|
}
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
_recurseAmericanFlagSort(v, allOffsets, start + offsets[i], start + offsets[i + 1], digit + 1, max, type);
|
|
}
|
|
}
|
|
|
|
|
|
void americanFlagSort(list <Data *> &l, vector<Data *> &v, map<int, bool> *offsets, int start, int end, field type) {
|
|
int max = longestNameType(l, type);
|
|
_recurseAmericanFlagSort(v, offsets, start, end, 0, max, type);
|
|
}
|
|
|
|
void sortDataList(list<Data *> &l) {
|
|
// Fill this in
|
|
|
|
|
|
// Radix sort try
|
|
// sortBySSN(l);
|
|
// sortByFirstName(l);
|
|
// sortByLastName(l);
|
|
|
|
// American Flag Sort first try
|
|
|
|
// Straight l.sort
|
|
l.sort(compare);
|
|
|
|
// Vector sort
|
|
// vector<Data *> v;
|
|
// v.reserve(l.size());
|
|
// copy(begin(l), end(l), back_inserter(v));
|
|
// sort(v.begin(), v.end(), compare);
|
|
// l.clear();
|
|
// copy(v.begin(), v.end(), back_inserter(l));
|
|
}
|
|
|
|
|