help in C language

help in C language

/* File: indexPage.c */ /* Author: Britton Wolfe */ /* Date: September 3rd, 2010 */ /* This program indexes a web page, printing out the counts of words on that page */ #include <stdlib.h> #include <stdio.h> #include <string.h> /* TODO: structure definitions */ //Isaiah: this is the trie struct. If you have any questions feel free to contact me! typedef struct trieNode{ int count; struct trieNode* children[26]; }node; /* NOTE: int return values can be used to indicate errors (typically non-zero) or success (typically zero return value) */ /* TODO: change this return type */ // Dalal: I changed the return type. char* indexPage(const char* url, node* tNode); int addWordOccurrence(const char* word, const int wordLength, node* tNode); void printTrieContents(node* tNode, char *word, int curr); int freeTrieMemory(node* tNode); int getText(const char* srcAddr, char* buffer, const int bufSize); node* newNode(){ node* newNode = malloc(sizeof(node)); newNode->count = 0; int i = 0; for (i; i<26; i++){ newNode->children[i] = NULL; } return newNode; } //Dalal: Not sure about it. //Isaiah: Use indexPage to get a char* and loop over the buffer adding words with //add occurance function. Finally call the print Trie and FreeTrie. int main(int argc, char** argv){ node* tNode = newNode(); char* word; int curr = 0; /* TODO: write the (simple) main function*/ /* argv[1] will be the URL to index, if argc > 1 */ if(argc > 1){ word = indexPage(argv[1], tNode); } printf(“%s\n”, argv[1]); char* temp; temp = strtok(word, ” “); while(temp != NULL){ addWordOccurrence(temp, strlen(temp), tNode); printf(“\t%s\n”, temp); temp = strtok(NULL, ” “); } printTrieContents(tNode, word, curr); freeTrieMemory(tNode); free(word); } /* TODO: define the functions corresponding to the above prototypes */ /* TODO: change this return type */ // Dalal: changed the return type + still not completed. // Isaiah: This function should take in a URL string, parse it, and call //AddOccurrence in order to add each word into the Trie. //If the function fails it will return 1, otherwise it will return 0. char* indexPage(const char* url, node* tNode){ const int bufferSize = 10000; char* buffer = malloc(sizeof(char) * bufferSize); char* temp; int outcome = getText(url, buffer, bufferSize); int i = 0; int j = 0; for (i = 0;buffer[i];i++){ if (buffer[i] >= ‘a’ && buffer[i] <= ‘z’ || buffer[i] >= ‘A’ && buffer[i] <= ‘Z’){ //Needs to include a and z or it will set them as blank buffer[i] = tolower(buffer[i]); } else { buffer[i] = ‘ ‘; } } char c; for (i = 0; i < bufferSize; i++, j++){ c = buffer[i]; if (c == ‘ ‘){ while((c=buffer[i+1]) == ‘ ‘){ i++; } c = buffer[i]; } buffer[j] = c; } buffer[j] = ‘\0’; return buffer; } //Isaiah: This may or may not work. It is kind of an odd way of accomplishing the task… //if the function returns 0 it succeeded in adding the word. //if it returns 1 it has failed for some reason. //if you have any questions about how this function works please let me know! int addWordOccurrence(const char* word, const int wordLength, node* tNode) { int cc = 0; while(cc < wordLength){ int curr = word[cc] -‘a’; if (curr > 25 || curr < 0){ cc++; continue; } if(tNode->children[curr] == NULL){ tNode->children[curr] = newNode(); } tNode = tNode->children[curr]; cc++; } tNode->count++; } //Isaiah: changed this around, it should work. void printTrieContents(node* tNode, char *word, int curr){ int i; for (i=0; i <26; i++){ if (tNode->children[i] != NULL){ word[curr] = i +’a’; printTrieContents(tNode->children[i], word, curr + 1); } if (tNode->count){ int j; for(j=0; j < curr; j++){ printf(“%c”, word[j]); } printf(“: %d\n”, tNode->count); tNode->count = 0; } } } //Isaiah:this function will recursively free the Trie and its children. //WARNING: ONLY CALL WHEN YOU ARE SURE YOU NO LONGER NEED THE TRIE //THIS FUNCTION WILL COMPLETELY DESTROY ALL OF ITS DATA. int freeTrieMemory(node* tNode) { if (tNode != NULL){ int i = 0; for (i; i<26;i++){ if (tNode->children[i] != NULL){ freeTrieMemory(tNode->children[i]); } } free(tNode); tNode=NULL; } } /* You should not need to modify this function */ int getText(const char* srcAddr, char* buffer, const int bufSize){ FILE *pipe; int bytesRead; snprintf(buffer, bufSize, “curl -s \”%s\” | python”, srcAddr); pipe = popen(buffer, “r”); if(pipe == NULL){ fprintf(stderr, “ERROR: could not open the pipe for command %s\n”, buffer); return 0; } bytesRead = fread(buffer, sizeof(char), bufSize-1, pipe); buffer[bytesRead] = ‘\0’; pclose(pipe); return bytesRead; }






#!/bin/bash for i in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 do if [[ ${i} -lt 10 ]] then url=”${i}.html” else url=”${i}.html” fi ./indexPage ${url} > test${i}Output.txt ./solution ${url} > test${i}CorrectOutput.txt diff -q test${i}CorrectOutput.txt test${i}Output.txt > /dev/null if [[ $? == 0 ]] then echo Test ${i} was passed successfully. rm test${i}Output.txt test${i}CorrectOutput.txt else diff -q -b test${i}CorrectOutput.txt test${i}Output.txt > /dev/null if [[ $? == 0 ]] then echo Test ${i} differed in its whitespace. else echo Test ${i} failed. fi echo “Here is a side-by-side comparison for test ${i}, with the correct output on the left, showing only the mismatched lines:” diff -y –suppress-common-lines test${i}CorrectOutput.txt test${i}Output.txt fi done



#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup import sys import re import pprint import string import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) strings = soup.findAll(text=True) try: for s in strings: cleanStr=s.strip() if(len(cleanStr) > 0): print cleanStr.encode(“ascii”, “replace”) #pprint.pprint(cleanStr) # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”





#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup import sys import re import pprint import string import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) strings = soup.findAll(text=True) try: for s in strings: cleanStr=s.strip() if(len(cleanStr) > 0): print cleanStr.encode(“ascii”, “replace”) #pprint.pprint(cleanStr) # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”



/* File: indexPage.c */ /* Author: Britton Wolfe */ /* Date: September 3rd, 2010 */ /* This program indexes a web page, printing out the counts of words on that page */ #include <stdlib.h> #include <stdio.h> #include <string.h> /* TODO: structure definitions */ /* NOTE: int return values can be used to indicate errors (typically non-zero) or success (typically zero return value) */ /* TODO: change this return type */ void indexPage(const char* url); int addWordOccurrence(const char* word, const int wordLength /* TODO: other parameters you need */); void printTrieContents(/* TODO: any parameters you need */); int freeTrieMemory(/* TODO: any parameters you need */); int getText(const char* srcAddr, char* buffer, const int bufSize); int main(int argc, char** argv){ /* TODO: write the (simple) main function /* argv[1] will be the URL to index, if argc > 1 */ return 0; } /* TODO: define the functions corresponding to the above prototypes */ /* TODO: change this return type */ void indexPage(const char* url) {} int addWordOccurrence(const char* word, const int wordLength /* TODO: other parameters you need */) {} void printTrieContents(/* TODO: any parameters you need */) {} int freeTrieMemory(/* TODO: any parameters you need */) {} /* You should not need to modify this function */ int getText(const char* srcAddr, char* buffer, const int bufSize){ FILE *pipe; int bytesRead; snprintf(buffer, bufSize, “curl -s \”%s\” | python”, srcAddr); pipe = popen(buffer, “r”); if(pipe == NULL){ fprintf(stderr, “ERROR: could not open the pipe for command %s\n”, buffer); return 0; } bytesRead = fread(buffer, sizeof(char), bufSize-1, pipe); buffer[bytesRead] = ‘\0’; pclose(pipe); return bytesRead; }



#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup #import requests import sys import re import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) # this version excludes anchor links, but includes relative links #links = soup.findAll(‘a’, href=re.compile(“^[^#]”)) # this version only includes absolute http addresses links = soup.findAll(‘a’, href=re.compile(“^http://”)) try: print len(links) for l in links: if l.has_attr(‘href’): print l[‘href’] # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”



valgrind –leak-check=yes ./webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt 2> valgrind_results.txt valgrind –leak-check=yes ./webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt 2>> valgrind_results.txt



./webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt > result1.txt ./webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt > result2.txt ./webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt > test_result_0.txt ./webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > test_result_1.txt ./webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > test_result_2.txt ./webSearchSol/webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt > ./webSearchSol/result1.txt ./webSearchSol/webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt > ./webSearchSol/result2.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt > ./webSearchSol/test_result_0.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > ./webSearchSol/test_result_1.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > ./webSearchSol/test_result_2.txt diff ./result1.txt ./webSearchSol/result1.txt > results_all.txt diff ./result2.txt ./webSearchSol/result2.txt >> results_all.txt diff ./test_result_0.txt ./webSearchSol/test_result_0.txt >> results_all.txt diff ./test_result_1.txt ./webSearchSol/test_result_1.txt >> results_all.txt diff ./test_result_2.txt ./webSearchSol/test_result_2.txt >> results_all.txt




sport sports computer science science computer ipfw with highlights some ipfw news other science another



computer science science computer ipfw computer science sports scores no capitalS no punctuation! definitely NOT capitals AND punctuation!!! link page



research computer research science IPFW ipfw science sports highlights links



sport sports sports! IPFW IPFW computer science ipfw computer science ipfw science computer ipfw with highlights some ipfw news other science another



#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup import sys import re import pprint import string import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) strings = soup.findAll(text=True) try: for s in strings: cleanStr=s.strip() if(len(cleanStr) > 0): print cleanStr.encode(“ascii”, “replace”) #pprint.pprint(cleanStr) # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”




#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup #import requests import sys import re import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) # this version excludes anchor links, but includes relative links #links = soup.findAll(‘a’, href=re.compile(“^[^#]”)) # this version only includes absolute http addresses links = soup.findAll(‘a’, href=re.compile(“^http://”)) try: print len(links) for l in links: if l.has_attr(‘href’): print l[‘href’] # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”


./webSearchSol/urlFiles/test2.txt 7 8 12


./webSearchSol/urlFiles/test0.txt 15 15 20 15 18


./webSearchSol/urlFiles/double2.txt 2 2


./webSearchSol/urlFiles/train1.txt 15 5 12


./webSearchSol/urlFiles/single1.txt 1


./webSearchSol/urlFiles/single2.txt 2


./webSearchSol/urlFiles/train2.txt 5 1 12


./webSearchSol/urlFiles/test1.txt 15 15 20


./webSearchSol/urlFiles/double1.txt 1 1



./webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt > result1.txt ./webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt > result2.txt ./webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt > test_result_0.txt ./webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > test_result_1.txt ./webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > test_result_2.txt ./webSearchSol/webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt > ./webSearchSol/result1.txt ./webSearchSol/webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt > ./webSearchSol/result2.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt > ./webSearchSol/test_result_0.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > ./webSearchSol/test_result_1.txt ./webSearchSol/webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt > ./webSearchSol/test_result_2.txt diff ./result1.txt ./webSearchSol/result1.txt > results_all.txt diff ./result2.txt ./webSearchSol/result2.txt >> results_all.txt diff ./test_result_0.txt ./webSearchSol/test_result_0.txt >> results_all.txt diff ./test_result_1.txt ./webSearchSol/test_result_1.txt >> results_all.txt diff ./test_result_2.txt ./webSearchSol/test_result_2.txt >> results_all.txt



#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup import sys import re import pprint import string import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) strings = soup.findAll(text=True) try: for s in strings: cleanStr=s.strip() if(len(cleanStr) > 0): print cleanStr.encode(“ascii”, “replace”) #pprint.pprint(cleanStr) # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”



#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup #import requests import sys import re import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) # this version excludes anchor links, but includes relative links #links = soup.findAll(‘a’, href=re.compile(“^[^#]”)) # this version only includes absolute http addresses links = soup.findAll(‘a’, href=re.compile(“^http://”)) try: print len(links) for l in links: if l.has_attr(‘href’): print l[‘href’] # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”



valgrind –leak-check=yes ./webSearch webSearchSol/urlFiles/train1.txt 10 10 < webSearchSol/searchFiles/train1.txt 2> valgrind_results.txt valgrind –leak-check=yes ./webSearch webSearchSol/urlFiles/train2.txt 10 10 < webSearchSol/searchFiles/train2.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test0.txt 100 1427195 < ./webSearchSol/searchFiles/test0.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test1.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt 2>> valgrind_results.txt valgrind –leak-check=yes ./webSearch ./webSearchSol/urlFiles/test2.txt 100 1427195 < ./webSearchSol/searchFiles/test1.txt 2>> valgrind_results.txt



#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup #import requests import sys import re import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) # this version excludes anchor links, but includes relative links #links = soup.findAll(‘a’, href=re.compile(“^[^#]”)) # this version only includes absolute http addresses links = soup.findAll(‘a’, href=re.compile(“^http://”)) try: print len(links) for l in links: if l.has_attr(‘href’): print l[‘href’] # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”


project3/project/webSearchSol/urlFiles/double2.txt 2 2


project3/project/webSearchSol/urlFiles/single1.txt 1


project3/project/webSearchSol/urlFiles/test0.txt 15 15 20 15 18


project3/project/webSearchSol/urlFiles/train1.txt 15 5 12


project3/project/webSearchSol/urlFiles/single2.txt 2


project3/project/webSearchSol/urlFiles/test1.txt 15 15 20


project3/project/webSearchSol/urlFiles/train2.txt 5 1 12


project3/project/webSearchSol/urlFiles/test2.txt 7 8 12


project3/project/webSearchSol/urlFiles/double1.txt 1 1



sport sports computer science science computer ipfw with highlights some ipfw news other science another



computer science science computer ipfw computer science sports scores no capitalS no punctuation! definitely NOT capitals AND punctuation!!! link page



sport sports sports! IPFW IPFW computer science ipfw computer science ipfw science computer ipfw with highlights some ipfw news other science another



research computer research science IPFW ipfw science sports highlights links




#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup import sys import re import pprint import string import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) strings = soup.findAll(text=True) try: for s in strings: cleanStr=s.strip() if(len(cleanStr) > 0): print cleanStr.encode(“ascii”, “replace”) #pprint.pprint(cleanStr) # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”



#from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup import sys import re import pprint import string import socket import errno doc = soup = BeautifulSoup(doc, “html5lib”) strings = soup.findAll(text=True) try: for s in strings: cleanStr=s.strip() if(len(cleanStr) > 0): print cleanStr.encode(“ascii”, “replace”) #pprint.pprint(cleanStr) # We close these in the “try” block to avoid # broken pipe errors when the program quits sys.stdout.close() sys.stderr.close() sys.stdin.close() except socket.error, e: # A socket error: that’s okay x=7; except IOError, e: if e.errno == errno.EPIPE: x=7; else: print “IOError”

Engage Quality Experts

Excellent Client Support

Get it before Your Deadline

You Need a Professional Writer To Work On Your Paper?

Privacy Policy


Do you have an Urgent Assignment?




Disclaimer: The services that we offer are meant to help the buyer by providing a guideline.
The product provided is intended to be used for research or study purposes only.

©2005-2023  All rights reserved