help in C language

help in C language

/* File: indexPage.c */ /* Author: Britton Wolfe */ /* Date: September 3rd, 2010 */ /* This program indexes a web page, printing out the counts of words on that page */ #include <stdlib.h> #include <stdio.h> #include <string.h> /* TODO: structure definitions */ //Isaiah: this is the trie struct. If you have any questions feel free to contact me! typedef struct trieNode{ int count; struct trieNode* children[26]; }node; /* NOTE: int return values can be used to indicate errors (typically non-zero) or success (typically zero return value) */ /* TODO: change this return type */ // Dalal: I changed the return type. char* indexPage(const char* url, node* tNode); int addWordOccurrence(const char* word, const int wordLength, node* tNode); void printTrieContents(node* tNode, char *word, int curr); int freeTrieMemory(node* tNode); int getText(const char* srcAddr, char* buffer, const int bufSize); node* newNode(){ node* newNode = malloc(sizeof(node)); newNode->count = 0; int i = 0; for (i; i<26; i++){ newNode->children[i] = NULL; } return newNode; } //Dalal: Not sure about it. //Isaiah: Use indexPage to get a char* and loop over the buffer adding words with //add occurance function. Finally call the print Trie and FreeTrie. int main(int argc, char** argv){ node* tNode = newNode(); char* word; int curr = 0; /* TODO: write the (simple) main function*/ /* argv[1] will be the URL to index, if argc > 1 */ if(argc > 1){ word = indexPage(argv[1], tNode); } printf(“%s\n”, argv[1]); char* temp; temp = strtok(word, ” “); while(temp != NULL){ addWordOccurrence(temp, strlen(temp), tNode); printf(“\t%s\n”, temp); temp = strtok(NULL, ” “); } printTrieContents(tNode, word, curr); freeTrieMemory(tNode); free(word); } /* TODO: define the functions corresponding to the above prototypes */ /* TODO: change this return type */ // Dalal: changed the return type + still not completed. // Isaiah: This function should take in a URL string, parse it, and call //AddOccurrence in order to add each word into the Trie. //If the function fails it will return 1, otherwise it will return 0. char* indexPage(const char* url, node* tNode){ const int bufferSize = 10000; char* buffer = malloc(sizeof(char) * bufferSize); char* temp; int outcome = getText(url, buffer, bufferSize); int i = 0; int j = 0; for (i = 0;buffer[i];i++){ if (buffer[i] >= ‘a’ && buffer[i] <= ‘z’ || buffer[i] >= ‘A’ && buffer[i] <= ‘Z’){ //Needs to include a and z or it will set them as blank buffer[i] = tolower(buffer[i]); } else { buffer[i] = ‘ ‘; } } char c; for (i = 0; i < bufferSize; i++, j++){ c = buffer[i]; if (c == ‘ ‘){ while((c=buffer[i+1]) == ‘ ‘){ i++; } c = buffer[i]; } buffer[j] = c; } buffer[j] = ‘\0’; return buffer; } //Isaiah: This may or may not work. It is kind of an odd way of accomplishing the task… //if the function returns 0 it succeeded in adding the word. //if it returns 1 it has failed for some reason. //if you have any questions about how this function works please let me know! int addWordOccurrence(const char* word, const int wordLength, node* tNode) { int cc = 0; while(cc < wordLength){ int curr = word[cc] -‘a’; if (curr > 25 || curr < 0){ cc++; continue; } if(tNode->children[curr] == NULL){ tNode->children[curr] = newNode(); } tNode = tNode->children[curr]; cc++; } tNode->count++; } //Isaiah: changed this around, it should work. void printTrieContents(node* tNode, char *word, int curr){ int i; for (i=0; i <26; i++){ if (tNode->children[i] != NULL){ word[curr] = i +’a’; printTrieContents(tNode->children[i], word, curr + 1); } if (tNode->count){ int j; for(j=0; j < curr; j++){ printf(“%c”, word[j]); } printf(“: %d\n”, tNode->count); tNode->count = 0; } } } //Isaiah:this function will recursively free the Trie and its children. //WARNING: ONLY CALL WHEN YOU ARE SURE YOU NO LONGER NEED THE TRIE //THIS FUNCTION WILL COMPLETELY DESTROY ALL OF ITS DATA. int freeTrieMemory(node* tNode) { if (tNode != NULL){ int i = 0; for (i; i<26;i++){ if (tNode->children[i] != NULL){ freeTrieMemory(tNode->children[i]); } } free(tNode); tNode=NULL; } } /* You should not need to modify this function */ int getText(const char* srcAddr, char* buffer, const int bufSize){ FILE *pipe; int bytesRead; snprintf(buffer, bufSize, “curl -s \”%s\” | python getText.py”, srcAddr); pipe = popen(buffer, “r”); if(pipe == NULL){ fprintf(stderr, “ERROR: could not open the pipe for command %s\n”, buffer); return 0; } bytesRead = fread(buffer, sizeof(char), bufSize-1, pipe); buffer[bytesRead] = ‘\0’; pclose(pipe); return bytesRead; }

project3/p3/in

project3/p3/indexPage

project3/p3/solution

project3/p3/runTestCases.sh

#!/bin/bash for i in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 do if [[ ${i} -lt 10 ]] then url=”http://users.ipfw.edu/chenz/testweb/page_00000${i}.html” else url=”http://users.ipfw.edu/chenz/testweb/page_0000${i}.html” fi ./indexPage ${url} > test${i}Output.txt ./solution ${url} > test${i}CorrectOutput.txt diff -q test${i}CorrectOutput.txt test${i}Output.txt > /dev/null if [[ $? == 0 ]] then echo Test ${i} was passed successfully. rm test${i}Output.txt test${i}CorrectOutput.txt else diff -q -b test${i}CorrectOutput.txt test${i}Output.txt > /dev/null if [[ $? == 0 ]] then echo Test ${i} differed in its whitespace. else echo Test ${i} failed. fi echo “Here is a side-by-side comparison for test ${i}, with the correct output on the left, showing only the mismatched lines:” diff -y –suppress-common-lines test${i}CorrectOutput.txt test${i}Output.txt fi done

project3/p3/getText.py

#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup import sys import re import pprint import string import socket import errno doc = sys.stdin.read() soup = BeautifulSoup(doc, “html5lib”) strings = soup.findAll(text=True) try: for s in strings: cleanStr=s.strip() if(len(cleanStr) > 0): print cleanStr.encode(“ascii”, “replace”) #pprint.pprint(cleanStr) # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”

project3/project3.tar

p3/solution

p3/getText.py

p3/indexPage.c

project3/project/getLinks.py

#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup #import requests import sys import re import socket import errno doc = sys.stdin.read() soup = BeautifulSoup(doc, “html5lib”) # this version excludes anchor links, but includes relative links #links = soup.findAll(‘a’, href=re.compile(“^[^#]”)) # this version only includes absolute http addresses links = soup.findAll(‘a’, href=re.compile(“^http://”)) try: print len(links) for l in links: if l.has_attr(‘href’): print l[‘href’] # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”

project3/project/test_valgrind

valgrind –leak-check=yes ./webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt 2> valgrind_results.txt valgrind –leak-check=yes ./webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt 2>> valgrind_results.txt

project3/project/test_all

./webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt > result1.txt ./webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt > result2.txt ./webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt > test_result_0.txt ./webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > test_result_1.txt ./webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > test_result_2.txt ./webSearchSol/webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt > ./webSearchSol/result1.txt ./webSearchSol/webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt > ./webSearchSol/result2.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt > ./webSearchSol/test_result_0.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > ./webSearchSol/test_result_1.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > ./webSearchSol/test_result_2.txt diff ./result1.txt ./webSearchSol/result1.txt > results_all.txt diff ./result2.txt ./webSearchSol/result2.txt >> results_all.txt diff ./test_result_0.txt ./webSearchSol/test_result_0.txt >> results_all.txt diff ./test_result_1.txt ./webSearchSol/test_result_1.txt >> results_all.txt diff ./test_result_2.txt ./webSearchSol/test_result_2.txt >> results_all.txt

project3/project/project4.tar

./webSearchSol/searchFiles/test0.txt

sport sports computer science science computer ipfw with highlights some ipfw news other science another

./webSearchSol/searchFiles/train1.txt

computer science science computer ipfw computer science sports scores no capitalS no punctuation! definitely NOT capitals AND punctuation!!! link page

./webSearchSol/searchFiles/train2.txt

research computer research science IPFW ipfw science sports highlights links

./webSearchSol/searchFiles/test1.txt

sport sports sports! IPFW IPFW computer science ipfw computer science ipfw science computer ipfw with highlights some ipfw news other science another

./webSearchSol/getText.py

./webSearchSol/webSearch

./webSearchSol/getLinks.py

./webSearchSol/urlFiles/test2.txt

http://www.yahoo.com 7 http://www.cnn.com 8 http://www.ipfw.edu 12

./webSearchSol/urlFiles/test0.txt

http://users.ipfw.edu/chenz/testWeb/page_000017.html 15 http://users.ipfw.edu/chenz/testWeb/page_000028.html 15 http://users.ipfw.edu/chenz/testWeb/page_000001.html 20 http://users.ipfw.edu/chenz/testWeb/page_000015.html 15 http://users.ipfw.edu/chenz/testWeb/page_000004.html 18

./webSearchSol/urlFiles/double2.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 2 http://users.ipfw.edu/chenz/testWeb/page_000001.html 2

./webSearchSol/urlFiles/train1.txt

http://users.ipfw.edu/chenz/testWeb/page_000014.html 15 http://users.ipfw.edu/chenz/testWeb/page_000020.html 5 http://users.ipfw.edu/chenz/testWeb/page_000007.html 12

./webSearchSol/urlFiles/single1.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 1

./webSearchSol/urlFiles/single2.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 2

./webSearchSol/urlFiles/train2.txt

http://users.ipfw.edu/chenz/testWeb/page_000010.html 5 http://users.ipfw.edu/chenz/testWeb/page_000001.html 1 http://users.ipfw.edu/chenz/testWeb/page_000018.html 12

./webSearchSol/urlFiles/test1.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 15 http://users.ipfw.edu/chenz/ 15 http://users.ipfw.edu/chenz/testWeb/page_000001.html 20

./webSearchSol/urlFiles/double1.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 1 http://users.ipfw.edu/chenz/testWeb/page_000001.html 1

./test_all

./getText.py

./getLinks.py

./test_valgrind

project3/project/webSearchSol/getLinks.py

project3/project/webSearchSol/urlFiles/double2.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 2 http://users.ipfw.edu/chenz/testWeb/page_000001.html 2

project3/project/webSearchSol/urlFiles/single1.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 1

project3/project/webSearchSol/urlFiles/test0.txt

project3/project/webSearchSol/urlFiles/train1.txt

http://users.ipfw.edu/chenz/testWeb/page_000014.html 15 http://users.ipfw.edu/chenz/testWeb/page_000020.html 5 http://users.ipfw.edu/chenz/testWeb/page_000007.html 12

project3/project/webSearchSol/urlFiles/single2.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 2

project3/project/webSearchSol/urlFiles/test1.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 15 http://users.ipfw.edu/chenz/ 15 http://users.ipfw.edu/chenz/testWeb/page_000001.html 20

project3/project/webSearchSol/urlFiles/train2.txt

http://users.ipfw.edu/chenz/testWeb/page_000010.html 5 http://users.ipfw.edu/chenz/testWeb/page_000001.html 1 http://users.ipfw.edu/chenz/testWeb/page_000018.html 12

project3/project/webSearchSol/urlFiles/test2.txt

http://www.yahoo.com 7 http://www.cnn.com 8 http://www.ipfw.edu 12

project3/project/webSearchSol/urlFiles/double1.txt

http://users.ipfw.edu/chenz/testWeb/page_000003.html 1 http://users.ipfw.edu/chenz/testWeb/page_000001.html 1

project3/project/webSearchSol/searchFiles/test0.txt

sport sports computer science science computer ipfw with highlights some ipfw news other science another

project3/project/webSearchSol/searchFiles/train1.txt

computer science science computer ipfw computer science sports scores no capitalS no punctuation! definitely NOT capitals AND punctuation!!! link page

project3/project/webSearchSol/searchFiles/test1.txt

sport sports sports! IPFW IPFW computer science ipfw computer science ipfw science computer ipfw with highlights some ipfw news other science another

project3/project/webSearchSol/searchFiles/train2.txt

research computer research science IPFW ipfw science sports highlights links

project3/project/webSearchSol/webSearch

project3/project/webSearchSol/getText.py

project3/project/getText.py

Best Quality Custom Writing Services

Engage Quality Experts

Excellent Client Support

Get it before Your Deadline

You Need a Professional Writer To Work On Your Paper?

Privacy Policy

Do you have an Urgent Assignment?