Ok, i have been trying to create a simple search engine with python and got a few errors which im not sure how it fix.
This program allows the user to enter a URL and the program will list URLs connected to that link and allow bring back all keywords. It then asks the user to search for a specific keyword and that's when the error occurs.
I have used some of my own code and some i got of the web and can seem to get it to work!
Basically the error is the following: 'dict' object has no attribute 'append'.
Is it because i'm using lists and not the dictionary? How would i fix it?
	
	
	
		
Im quite new to python so apologies
P.S i have been using it on one of my own small closed websites and not out on the web.
Cheers
	
		
			
		
		
	
				
			This program allows the user to enter a URL and the program will list URLs connected to that link and allow bring back all keywords. It then asks the user to search for a specific keyword and that's when the error occurs.
I have used some of my own code and some i got of the web and can seem to get it to work!
Basically the error is the following: 'dict' object has no attribute 'append'.
Is it because i'm using lists and not the dictionary? How would i fix it?
		Code:
	
	import urllib2
max_limit=5
def get_page(url):
        try:
                f = urllib.urlopen(url)
                page = f.read()
                f.close()
                return page
        except:        
                return ""
        return ""
def getAllNewLinksOnPage(page,prevLinks):
        response = urllib2.urlopen(page)
        html = response.read()
        links,pos,allFound=[],0,False
        while not allFound:
                aTag=html.find("<a href=",pos)
                if aTag>-1:
                        href=html.find('"',aTag+1)
                        endHref=html.find('"',href+1)
                        url=html[href+1:endHref]
                        if url[:7]=="http://":
                                if url[-1]=="/":
                                        url=url[:-1]
                                if not url in links and not url in prevLinks:
                                        links.append(url)     
                                        print url
                        closeTag=html.find("</a>",aTag)
                        pos=closeTag+1
                else:
                        allFound=True   
        return links
def getLinks(url):
        toCrawl=[url]
        crawled=[]
        while toCrawl:
                url=toCrawl.pop()
                crawled.append(url)
                newLinks=getAllNewLinksOnPage(url,crawled)
                toCrawl=list(set(toCrawl)|set(newLinks))
                
        return crawled
url=raw_input("Enter a URL to Search:")
linksCrawled = getLinks(url)
print("----------------------")
def addToIndex(index,keyword,url):
        for entry in index:
                if entry[0]==word:
                        if not url in entry[1]:
                                entry[1].append(url)
                        return
        index.append([word,[url]])
      
index=[]
response = urllib2.urlopen(url)
html = response.read()
pageText=""
html=html[html.find("<body")+5:html.find("</body>")]
finished=False
while not finished:
        nextCloseTag=html.find(">")
        nextOpenTag=html.find("<")
        if nextOpenTag>-1:
                content=" ".join(html[nextCloseTag+1:nextOpenTag].strip().split())
                pageText=pageText+" "+content
                html=html[nextOpenTag+1:]
        else:
                finished=True
                
        for word in pageText.split():
                if word[0].isalnum() and len(word)>4:
                        addToIndex(index,word,url)
                        
print "{} unique words found".format(len(index))
print index
print("----------------------")
def compute_ranks(graph):
        d=0.8
        numloops=10
        ranks={}
        npages=len(graph)
        for page in graph:
                ranks[page]=1.0/npages
        for i in range(0,numloops):
                newranks={}
                for page in graph:
                        newrank=(1-d)/npages
                        for node in graph:
                                if page in graph[node]:
                                        newrank=newrank+d*ranks[node]/len(graph[node])
                        newranks[page]=newrank
                ranks=newranks
        return ranks
        
def Crawl_web(seed):
        tocrawl=[seed]
        crawled=[]
        index={}
        graph={}
        global max_limit
        while tocrawl:
                p=tocrawl.pop()
                if p not in crawled:
                        max_limit-=1
                        print max_limit
                        if max_limit<=0:
                                break
                        c=get_page(p)
                        addToIndex(index,p,c)
                        f=get_all_links(c)
                        union(tocrawl,f)
                        graph[p]=f
                        crawled.append(p)
        return crawled,index,graph 
def QuickSort(pages,ranks):
        if len(pages)>1:
                piv=ranks[pages[0]]
                i=1
                j=1
                for j in range(1,len(pages)):
                        if ranks[pages[j]]>piv:
                                pages[i],pages[j]=pages[j],pages[i]
                                i+=1
                pages[i-1],pages[0]=pages[0],pages[i-1]
                QuickSort(pages[1:i],ranks)
                QuickSort(pages[i+1:len(pages)],ranks)
def Look_up_new(index,ranks,keyword):
        pages=Look_up(index,keyword)
        print '\nPrinting the results as is with page rank\n'
        for i in pages:
                print i+" --> "+str(ranks[i])#Displaying the lists, so that you can see the page rank along side
        QuickSort(pages,ranks)
        print "\nAfter Sorting the results by page rank\n"
        it=0
        for i in pages:
                it+=1
                print str(it)+'.\t'+i+'\n' 
#print index
print "Enter What you want to search"
search_term=raw_input()
try:
        print "Enter the depth you wanna go"
        max_limit=int(raw_input())
except:
        f=None
print '\nStarted crawling, presently at depth..'
crawled,index,graph=Crawl_web(url)#printing all the links
ranks=compute_ranks(graph)#Calculating the page ranks
Look_up_new(index,ranks,search_term)Im quite new to python so apologies

P.S i have been using it on one of my own small closed websites and not out on the web.
Cheers
			
				Last edited: 
			
		
	
								
								
									
	
		
			
		
		
	
	
	
		
			
		
		
	
								
							
							 
	 
  
 
		 
 
		
 
 
		